diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2024-01-09 19:58:18 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2024-01-09 19:58:18 +0000 |
commit | aca2e42c67292825f835f094eb0c4df5ce6013db (patch) | |
tree | 9cfb7eeef35545100c4f7219e794e6a0306ea6a6 | |
parent | 77dbea07356e1ab2f37a777d4d1ddc5dd3e301c2 (diff) |
Vendor import of llvm-project main llvmorg-18-init-16595-g7c00a5be5cde.vendor/llvm-project/llvmorg-18-init-16595-g7c00a5be5cde
687 files changed, 13952 insertions, 6464 deletions
diff --git a/clang/include/clang/AST/Attr.h b/clang/include/clang/AST/Attr.h index 1b831c9511e2..8e9b7ad8b468 100644 --- a/clang/include/clang/AST/Attr.h +++ b/clang/include/clang/AST/Attr.h @@ -25,6 +25,7 @@ #include "clang/Basic/Sanitizers.h" #include "clang/Basic/SourceLocation.h" #include "llvm/Frontend/HLSL/HLSLResource.h" +#include "llvm/Support/CodeGen.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/VersionTuple.h" #include "llvm/Support/raw_ostream.h" diff --git a/clang/include/clang/AST/DeclCXX.h b/clang/include/clang/AST/DeclCXX.h index 432293583576..984a4d8bab5e 100644 --- a/clang/include/clang/AST/DeclCXX.h +++ b/clang/include/clang/AST/DeclCXX.h @@ -2044,6 +2044,14 @@ public: // Implement isa/cast/dyncast/etc. static bool classof(const Decl *D) { return classofKind(D->getKind()); } static bool classofKind(Kind K) { return K == RequiresExprBody; } + + static DeclContext *castToDeclContext(const RequiresExprBodyDecl *D) { + return static_cast<DeclContext *>(const_cast<RequiresExprBodyDecl *>(D)); + } + + static RequiresExprBodyDecl *castFromDeclContext(const DeclContext *DC) { + return static_cast<RequiresExprBodyDecl *>(const_cast<DeclContext *>(DC)); + } }; /// Represents a static or instance method of a struct/union/class. diff --git a/clang/include/clang/AST/TextNodeDumper.h b/clang/include/clang/AST/TextNodeDumper.h index 2f4ed082a0c7..732749ad305e 100644 --- a/clang/include/clang/AST/TextNodeDumper.h +++ b/clang/include/clang/AST/TextNodeDumper.h @@ -252,6 +252,8 @@ public: void VisitGotoStmt(const GotoStmt *Node); void VisitCaseStmt(const CaseStmt *Node); void VisitReturnStmt(const ReturnStmt *Node); + void VisitCoawaitExpr(const CoawaitExpr *Node); + void VisitCoreturnStmt(const CoreturnStmt *Node); void VisitCompoundStmt(const CompoundStmt *Node); void VisitConstantExpr(const ConstantExpr *Node); void VisitCallExpr(const CallExpr *Node); diff --git a/clang/include/clang/AST/Type.h b/clang/include/clang/AST/Type.h index 1afa69367286..9e9f896ebef7 100644 --- a/clang/include/clang/AST/Type.h +++ b/clang/include/clang/AST/Type.h @@ -4224,6 +4224,8 @@ public: ExceptionSpecInfo() = default; ExceptionSpecInfo(ExceptionSpecificationType EST) : Type(EST) {} + + void instantiate(); }; /// Extra information about a function prototype. ExtProtoInfo is not diff --git a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h index 8a2d56668e32..b28f2c6b99c5 100644 --- a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h +++ b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h @@ -66,7 +66,7 @@ public: /// Invoked when an unsafe operation over raw pointers is found. virtual void handleUnsafeOperation(const Stmt *Operation, - bool IsRelatedToDecl) = 0; + bool IsRelatedToDecl, ASTContext &Ctx) = 0; /// Invoked when a fix is suggested against a variable. This function groups /// all variables that must be fixed together (i.e their types must be changed diff --git a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def index 757ee452ced7..c97661688365 100644 --- a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def +++ b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def @@ -30,6 +30,7 @@ WARNING_GADGET(Decrement) WARNING_GADGET(ArraySubscript) WARNING_GADGET(PointerArithmetic) WARNING_GADGET(UnsafeBufferUsageAttr) +WARNING_GADGET(DataInvocation) FIXABLE_GADGET(ULCArraySubscript) // `DRE[any]` in an Unspecified Lvalue Context FIXABLE_GADGET(DerefSimplePtrArithFixable) FIXABLE_GADGET(PointerDereference) diff --git a/clang/include/clang/Analysis/CFG.h b/clang/include/clang/Analysis/CFG.h index 67383bb316d3..9f776ca6cc26 100644 --- a/clang/include/clang/Analysis/CFG.h +++ b/clang/include/clang/Analysis/CFG.h @@ -1215,7 +1215,9 @@ public: //===--------------------------------------------------------------------===// class BuildOptions { - std::bitset<Stmt::lastStmtConstant> alwaysAddMask; + // Stmt::lastStmtConstant has the same value as the last Stmt kind, + // so make sure we add one to account for this! + std::bitset<Stmt::lastStmtConstant + 1> alwaysAddMask; public: using ForcedBlkExprs = llvm::DenseMap<const Stmt *, const CFGBlock *>; diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td index db17211747b1..d5eabaad4889 100644 --- a/clang/include/clang/Basic/Attr.td +++ b/clang/include/clang/Basic/Attr.td @@ -143,6 +143,11 @@ def ExternalGlobalVar : SubsetSubject<Var, !S->isLocalExternDecl()}], "external global variables">; +def NonTLSGlobalVar : SubsetSubject<Var, + [{S->hasGlobalStorage() && + S->getTLSKind() == 0}], + "non-TLS global variables">; + def InlineFunction : SubsetSubject<Function, [{S->isInlineSpecified()}], "inline functions">; @@ -431,6 +436,7 @@ def TargetAArch64 : TargetArch<["aarch64", "aarch64_be", "aarch64_32"]>; def TargetAnyArm : TargetArch<!listconcat(TargetARM.Arches, TargetAArch64.Arches)>; def TargetAVR : TargetArch<["avr"]>; def TargetBPF : TargetArch<["bpfel", "bpfeb"]>; +def TargetLoongArch : TargetArch<["loongarch32", "loongarch64"]>; def TargetMips32 : TargetArch<["mips", "mipsel"]>; def TargetAnyMips : TargetArch<["mips", "mipsel", "mips64", "mips64el"]>; def TargetMSP430 : TargetArch<["msp430"]>; @@ -1121,6 +1127,14 @@ def CoroLifetimeBound : InheritableAttr { let SimpleHandler = 1; } +def CoroDisableLifetimeBound : InheritableAttr { + let Spellings = [Clang<"coro_disable_lifetimebound">]; + let Subjects = SubjectList<[Function]>; + let LangOpts = [CPlusPlus]; + let Documentation = [CoroLifetimeBoundDoc]; + let SimpleHandler = 1; +} + // OSObject-based attributes. def OSConsumed : InheritableParamAttr { let Spellings = [Clang<"os_consumed">]; @@ -2730,6 +2744,15 @@ def PragmaClangTextSection : InheritableAttr { let Documentation = [InternalOnly]; } +def CodeModel : InheritableAttr, TargetSpecificAttr<TargetLoongArch> { + let Spellings = [GCC<"model">]; + let Args = [EnumArgument<"Model", "llvm::CodeModel::Model", + ["normal", "medium", "extreme"], ["Small", "Medium", "Large"], + /*opt=*/0, /*fake=*/0, /*isExternalType=*/1>]; + let Subjects = SubjectList<[NonTLSGlobalVar], ErrorDiag>; + let Documentation = [CodeModelDocs]; +} + def Sentinel : InheritableAttr { let Spellings = [GCC<"sentinel">]; let Args = [DefaultIntArgument<"Sentinel", 0>, diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td index 98a7ecc7fd7d..5416a0cbdd07 100644 --- a/clang/include/clang/Basic/AttrDocs.td +++ b/clang/include/clang/Basic/AttrDocs.td @@ -57,6 +57,15 @@ global variable or function should be in after translation. let Heading = "section, __declspec(allocate)"; } +def CodeModelDocs : Documentation { + let Category = DocCatVariable; + let Content = [{ +The ``model`` attribute allows overriding the translation unit's +code model (specified by ``-mcmodel``) for a specific global variable. + }]; + let Heading = "model"; +} + def UsedDocs : Documentation { let Category = DocCatFunction; let Content = [{ @@ -7671,9 +7680,12 @@ The ``[[clang::coro_lifetimebound]]`` is a class attribute which can be applied to a coroutine return type (`CRT`_) (i.e. it should also be annotated with ``[[clang::coro_return_type]]``). -All parameters of a function are considered to be lifetime bound. See `documentation`_ -of ``[[clang::lifetimebound]]`` for more details. -if the function returns a coroutine return type (CRT) annotated with ``[[clang::coro_lifetimebound]]``. +All parameters of a function are considered to be lifetime bound if the function returns a +coroutine return type (CRT) annotated with ``[[clang::coro_lifetimebound]]``. +This lifetime bound analysis can be disabled for a coroutine wrapper or a coroutine by annotating the function +with ``[[clang::coro_disable_lifetimebound]]`` function attribute . +See `documentation`_ of ``[[clang::lifetimebound]]`` for details about lifetime bound analysis. + Reference parameters of a coroutine are susceptible to capturing references to temporaries or local variables. @@ -7703,7 +7715,7 @@ Both coroutines and coroutine wrappers are part of this analysis. }; Task<int> coro(const int& a) { co_return a + 1; } - Task<int> [[clang::coro_wrapper]] coro_wrapper(const int& a, const int& b) { + [[clang::coro_wrapper]] Task<int> coro_wrapper(const int& a, const int& b) { return a > b ? coro(a) : coro(b); } Task<int> temporary_reference() { @@ -7718,6 +7730,21 @@ Both coroutines and coroutine wrappers are part of this analysis. return coro(a); // warning: returning address of stack variable `a`. } +This analysis can be disabled for all calls to a particular function by annotating the function +with function attribute ``[[clang::coro_disable_lifetimebound]]``. +For example, this could be useful for coroutine wrappers which accept reference parameters +but do not pass them to the underlying coroutine or pass them by value. + +.. code-block:: c++ + + Task<int> coro(int a) { co_return a + 1; } + [[clang::coro_wrapper, clang::coro_disable_lifetimebound]] Task<int> coro_wrapper(const int& a) { + return coro(a + 1); + } + void use() { + auto task = coro_wrapper(1); // use of temporary is fine as the argument is not lifetime bound. + } + .. _`documentation`: https://clang.llvm.org/docs/AttributeReference.html#lifetimebound .. _`CRT`: https://clang.llvm.org/docs/AttributeReference.html#coro-return-type }]; diff --git a/clang/include/clang/Basic/CodeGenOptions.def b/clang/include/clang/Basic/CodeGenOptions.def index 0acb5ae134ea..2c4fb6745bc1 100644 --- a/clang/include/clang/Basic/CodeGenOptions.def +++ b/clang/include/clang/Basic/CodeGenOptions.def @@ -209,6 +209,7 @@ CODEGENOPT(CoverageMapping , 1, 0) ///< Generate coverage mapping regions to ///< enable code coverage analysis. CODEGENOPT(DumpCoverageMapping , 1, 0) ///< Dump the generated coverage mapping ///< regions. +CODEGENOPT(MCDCCoverage , 1, 0) ///< Enable MC/DC code coverage criteria. /// If -fpcc-struct-return or -freg-struct-return is specified. ENUM_CODEGENOPT(StructReturnConvention, StructReturnConventionKind, 2, SRCK_Default) diff --git a/clang/include/clang/Basic/DiagnosticCommonKinds.td b/clang/include/clang/Basic/DiagnosticCommonKinds.td index 65a33f61a694..72952b08c04a 100644 --- a/clang/include/clang/Basic/DiagnosticCommonKinds.td +++ b/clang/include/clang/Basic/DiagnosticCommonKinds.td @@ -349,6 +349,9 @@ def warn_invalid_feature_combination : Warning< def warn_target_unrecognized_env : Warning< "mismatch between architecture and environment in target triple '%0'; did you mean '%1'?">, InGroup<InvalidCommandLineArgument>; +def warn_knl_knm_isa_support_removed : Warning< + "KNL, KNM related Intel Xeon Phi CPU's specific ISA's supports will be removed in LLVM 19.">, + InGroup<DiagGroup<"knl-knm-isa-support-removed">>; // Source manager def err_cannot_open_file : Error<"cannot open file '%0': %1">, DefaultFatal; diff --git a/clang/include/clang/Basic/DiagnosticDriverKinds.td b/clang/include/clang/Basic/DiagnosticDriverKinds.td index 676f1a62b49d..0a8a77fadbeb 100644 --- a/clang/include/clang/Basic/DiagnosticDriverKinds.td +++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td @@ -786,4 +786,7 @@ def warn_android_unversioned_fallback : Warning< " directories will not be used in Clang 19. Provide a versioned directory" " for the target version or lower instead.">, InGroup<DiagGroup<"android-unversioned-fallback">>; + +def err_drv_triple_version_invalid : Error< + "version '%0' in target triple '%1' is invalid">; } diff --git a/clang/include/clang/Basic/DiagnosticParseKinds.td b/clang/include/clang/Basic/DiagnosticParseKinds.td index e4b1069cde18..088f8b74983c 100644 --- a/clang/include/clang/Basic/DiagnosticParseKinds.td +++ b/clang/include/clang/Basic/DiagnosticParseKinds.td @@ -1364,6 +1364,8 @@ def err_acc_invalid_clause : Error<"invalid OpenACC clause %0">; def err_acc_missing_directive : Error<"expected OpenACC directive">; def err_acc_invalid_open_paren : Error<"expected clause-list or newline in OpenACC directive">; +def err_acc_invalid_default_clause_kind + : Error<"invalid value for 'default' clause; expected 'present' or 'none'">; // OpenMP support. def warn_pragma_omp_ignored : Warning< diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index aebb7d9b945c..a97182cad5d5 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -3415,6 +3415,8 @@ def warn_objc_redundant_literal_use : Warning< def err_attr_tlsmodel_arg : Error<"tls_model must be \"global-dynamic\", " "\"local-dynamic\", \"initial-exec\" or \"local-exec\"">; +def err_attr_codemodel_arg : Error<"code model '%0' is not supported on this target">; + def err_aix_attr_unsupported_tls_model : Error<"TLS model '%0' is not yet supported on AIX">; def err_tls_var_aligned_over_maximum : Error< @@ -6158,23 +6160,19 @@ def err_illegal_initializer_type : Error<"illegal initializer type %0">; def ext_init_list_type_narrowing : ExtWarn< "type %0 cannot be narrowed to %1 in initializer list">, InGroup<CXX11Narrowing>, DefaultError, SFINAEFailure; -// *_narrowing_const_reference diagnostics have the same messages, but are -// controlled by -Wc++11-narrowing-const-reference for narrowing involving a -// const reference. def ext_init_list_type_narrowing_const_reference : ExtWarn< - "type %0 cannot be narrowed to %1 in initializer list">, + ext_init_list_type_narrowing.Summary>, InGroup<CXX11NarrowingConstReference>, DefaultError, SFINAEFailure; def ext_init_list_variable_narrowing : ExtWarn< "non-constant-expression cannot be narrowed from type %0 to %1 in " "initializer list">, InGroup<CXX11Narrowing>, DefaultError, SFINAEFailure; def ext_init_list_variable_narrowing_const_reference : ExtWarn< - "non-constant-expression cannot be narrowed from type %0 to %1 in " - "initializer list">, InGroup<CXX11NarrowingConstReference>, DefaultError, SFINAEFailure; + ext_init_list_variable_narrowing.Summary>, InGroup<CXX11NarrowingConstReference>, DefaultError, SFINAEFailure; def ext_init_list_constant_narrowing : ExtWarn< "constant expression evaluates to %0 which cannot be narrowed to type %1">, InGroup<CXX11Narrowing>, DefaultError, SFINAEFailure; def ext_init_list_constant_narrowing_const_reference : ExtWarn< - "constant expression evaluates to %0 which cannot be narrowed to type %1">, + ext_init_list_constant_narrowing.Summary>, InGroup<CXX11NarrowingConstReference>, DefaultError, SFINAEFailure; def warn_init_list_type_narrowing : Warning< "type %0 cannot be narrowed to %1 in initializer list in C++11">, @@ -12064,7 +12062,7 @@ def warn_unsafe_buffer_variable : Warning< InGroup<UnsafeBufferUsage>, DefaultIgnore; def warn_unsafe_buffer_operation : Warning< "%select{unsafe pointer operation|unsafe pointer arithmetic|" - "unsafe buffer access|function introduces unsafe buffer manipulation}0">, + "unsafe buffer access|function introduces unsafe buffer manipulation|unsafe invocation of span::data}0">, InGroup<UnsafeBufferUsage>, DefaultIgnore; def note_unsafe_buffer_operation : Note< "used%select{| in pointer arithmetic| in buffer access}0 here">; diff --git a/clang/include/clang/Basic/ObjCRuntime.h b/clang/include/clang/Basic/ObjCRuntime.h index 500b2462f007..f05debe6fea5 100644 --- a/clang/include/clang/Basic/ObjCRuntime.h +++ b/clang/include/clang/Basic/ObjCRuntime.h @@ -100,16 +100,24 @@ public: bool isLegacyDispatchDefaultForArch(llvm::Triple::ArchType Arch) { // The GNUstep runtime uses a newer dispatch method by default from // version 1.6 onwards - if (getKind() == GNUstep && getVersion() >= VersionTuple(1, 6)) { - if (Arch == llvm::Triple::arm || - Arch == llvm::Triple::x86 || - Arch == llvm::Triple::x86_64) - return false; - } - else if ((getKind() == MacOSX) && isNonFragile() && - (getVersion() >= VersionTuple(10, 0)) && - (getVersion() < VersionTuple(10, 6))) - return Arch != llvm::Triple::x86_64; + if (getKind() == GNUstep) { + switch (Arch) { + case llvm::Triple::arm: + case llvm::Triple::x86: + case llvm::Triple::x86_64: + return !(getVersion() >= VersionTuple(1, 6)); + case llvm::Triple::aarch64: + case llvm::Triple::mips64: + return !(getVersion() >= VersionTuple(1, 9)); + case llvm::Triple::riscv64: + return !(getVersion() >= VersionTuple(2, 2)); + default: + return true; + } + } else if ((getKind() == MacOSX) && isNonFragile() && + (getVersion() >= VersionTuple(10, 0)) && + (getVersion() < VersionTuple(10, 6))) + return Arch != llvm::Triple::x86_64; // Except for deployment target of 10.5 or less, // Mac runtimes use legacy dispatch everywhere now. return true; diff --git a/clang/include/clang/Basic/OpenACCKinds.h b/clang/include/clang/Basic/OpenACCKinds.h index 3117d584d347..b0c157e00236 100644 --- a/clang/include/clang/Basic/OpenACCKinds.h +++ b/clang/include/clang/Basic/OpenACCKinds.h @@ -72,25 +72,42 @@ enum class OpenACCAtomicKind { /// Represents the kind of an OpenACC clause. enum class OpenACCClauseKind { - // 'finalize' clause, allowed on 'exit data' directive. + /// 'finalize' clause, allowed on 'exit data' directive. Finalize, - // 'if_present' clause, allowed on 'host_data' and 'update' directives. + /// 'if_present' clause, allowed on 'host_data' and 'update' directives. IfPresent, - // 'seq' clause, allowed on 'loop' and 'routine' directives. + /// 'seq' clause, allowed on 'loop' and 'routine' directives. Seq, - // 'independent' clause, allowed on 'loop' directives. + /// 'independent' clause, allowed on 'loop' directives. Independent, - // 'auto' clause, allowed on 'loop' directives. + /// 'auto' clause, allowed on 'loop' directives. Auto, - // 'worker' clause, allowed on 'loop' and 'routine' directives. + /// 'worker' clause, allowed on 'loop' and 'routine' directives. Worker, - // 'vector' clause, allowed on 'loop' and 'routine' directives. Takes no - // arguments for 'routine', so the 'loop' version is not yet implemented - // completely. + /// 'vector' clause, allowed on 'loop' and 'routine' directives. Takes no + /// arguments for 'routine', so the 'loop' version is not yet implemented + /// completely. Vector, - // 'nohost' clause, allowed on 'routine' directives. + /// 'nohost' clause, allowed on 'routine' directives. NoHost, - // Represents an invalid clause, for the purposes of parsing. + /// 'default' clause, allowed on parallel, serial, kernel (and compound) + /// constructs. + Default, + /// 'if' clause, allowed on all the Compute Constructs, Data Constructs, + /// Executable Constructs, and Combined Constructs. + If, + /// 'self' clause, allowed on Compute and Combined Constructs, plus 'update'. + Self, + /// Represents an invalid clause, for the purposes of parsing. + Invalid, +}; + +enum class OpenACCDefaultClauseKind { + /// 'none' option. + None, + /// 'present' option. + Present, + /// Not a valid option. Invalid, }; } // namespace clang diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index 91f62c4c7633..7f80fb0386cc 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -1976,39 +1976,37 @@ def SVFMINQV: SInst<"svminqv[_{d}]", "{Pd", "hfd", MergeNone, "aarch64_sve_fminq } let TargetGuard = "sve2p1|sme2" in { -//FIXME: Replace IsStreamingCompatible with IsStreamingOrHasSVE2p1 when available -def SVPEXT_SINGLE : SInst<"svpext_lane_{d}", "P}i", "QcQsQiQl", MergeNone, "aarch64_sve_pext", [IsStreamingCompatible], [ImmCheck<1, ImmCheck0_3>]>; -def SVPEXT_X2 : SInst<"svpext_lane_{d}_x2", "2.P}i", "QcQsQiQl", MergeNone, "aarch64_sve_pext_x2", [IsStreamingCompatible], [ImmCheck<1, ImmCheck0_1>]>; +def SVPEXT_SINGLE : SInst<"svpext_lane_{d}", "P}i", "QcQsQiQl", MergeNone, "aarch64_sve_pext", [IsStreamingOrSVE2p1], [ImmCheck<1, ImmCheck0_3>]>; +def SVPEXT_X2 : SInst<"svpext_lane_{d}_x2", "2.P}i", "QcQsQiQl", MergeNone, "aarch64_sve_pext_x2", [IsStreamingOrSVE2p1], [ImmCheck<1, ImmCheck0_1>]>; -def SVWHILEGE_COUNT : SInst<"svwhilege_{d}[_{1}]", "}lli", "QcQsQiQl", MergeNone, "aarch64_sve_whilege_{d}", [IsOverloadNone], [ImmCheck<2, ImmCheck2_4_Mul2>]>; -def SVWHILEGT_COUNT : SInst<"svwhilegt_{d}[_{1}]", "}lli", "QcQsQiQl", MergeNone, "aarch64_sve_whilegt_{d}", [IsOverloadNone], [ImmCheck<2, ImmCheck2_4_Mul2>]>; -def SVWHILELE_COUNT : SInst<"svwhilele_{d}[_{1}]", "}lli", "QcQsQiQl", MergeNone, "aarch64_sve_whilele_{d}", [IsOverloadNone], [ImmCheck<2, ImmCheck2_4_Mul2>]>; -def SVWHILELT_COUNT : SInst<"svwhilelt_{d}[_{1}]", "}lli", "QcQsQiQl", MergeNone, "aarch64_sve_whilelt_{d}", [IsOverloadNone], [ImmCheck<2, ImmCheck2_4_Mul2>]>; -def SVWHILELO_COUNT : SInst<"svwhilelt_{d}[_{1}]", "}nni", "QcQsQiQl", MergeNone, "aarch64_sve_whilelo_{d}", [IsOverloadNone], [ImmCheck<2, ImmCheck2_4_Mul2>]>; -def SVWHILELS_COUNT : SInst<"svwhilele_{d}[_{1}]", "}nni", "QcQsQiQl", MergeNone, "aarch64_sve_whilels_{d}", [IsOverloadNone], [ImmCheck<2, ImmCheck2_4_Mul2>]>; -def SVWHILEHI_COUNT : SInst<"svwhilegt_{d}[_{1}]", "}nni", "QcQsQiQl", MergeNone, "aarch64_sve_whilehi_{d}", [IsOverloadNone], [ImmCheck<2, ImmCheck2_4_Mul2>]>; -def SVWHILEHS_COUNT : SInst<"svwhilege_{d}[_{1}]", "}nni", "QcQsQiQl", MergeNone, "aarch64_sve_whilehs_{d}", [IsOverloadNone], [ImmCheck<2, ImmCheck2_4_Mul2>]>; +def SVWHILEGE_COUNT : SInst<"svwhilege_{d}[_{1}]", "}lli", "QcQsQiQl", MergeNone, "aarch64_sve_whilege_{d}", [IsOverloadNone, IsStreamingOrSVE2p1], [ImmCheck<2, ImmCheck2_4_Mul2>]>; +def SVWHILEGT_COUNT : SInst<"svwhilegt_{d}[_{1}]", "}lli", "QcQsQiQl", MergeNone, "aarch64_sve_whilegt_{d}", [IsOverloadNone, IsStreamingOrSVE2p1], [ImmCheck<2, ImmCheck2_4_Mul2>]>; +def SVWHILELE_COUNT : SInst<"svwhilele_{d}[_{1}]", "}lli", "QcQsQiQl", MergeNone, "aarch64_sve_whilele_{d}", [IsOverloadNone, IsStreamingOrSVE2p1], [ImmCheck<2, ImmCheck2_4_Mul2>]>; +def SVWHILELT_COUNT : SInst<"svwhilelt_{d}[_{1}]", "}lli", "QcQsQiQl", MergeNone, "aarch64_sve_whilelt_{d}", [IsOverloadNone, IsStreamingOrSVE2p1], [ImmCheck<2, ImmCheck2_4_Mul2>]>; +def SVWHILELO_COUNT : SInst<"svwhilelt_{d}[_{1}]", "}nni", "QcQsQiQl", MergeNone, "aarch64_sve_whilelo_{d}", [IsOverloadNone, IsStreamingOrSVE2p1], [ImmCheck<2, ImmCheck2_4_Mul2>]>; +def SVWHILELS_COUNT : SInst<"svwhilele_{d}[_{1}]", "}nni", "QcQsQiQl", MergeNone, "aarch64_sve_whilels_{d}", [IsOverloadNone, IsStreamingOrSVE2p1], [ImmCheck<2, ImmCheck2_4_Mul2>]>; +def SVWHILEHI_COUNT : SInst<"svwhilegt_{d}[_{1}]", "}nni", "QcQsQiQl", MergeNone, "aarch64_sve_whilehi_{d}", [IsOverloadNone, IsStreamingOrSVE2p1], [ImmCheck<2, ImmCheck2_4_Mul2>]>; +def SVWHILEHS_COUNT : SInst<"svwhilege_{d}[_{1}]", "}nni", "QcQsQiQl", MergeNone, "aarch64_sve_whilehs_{d}", [IsOverloadNone, IsStreamingOrSVE2p1], [ImmCheck<2, ImmCheck2_4_Mul2>]>; } multiclass MultiVecLoad<string i> { - // FIXME: Replace IsStreamingCompatible with IsStreamingOrHasSVE2p1 when available (SME2 requires __arm_streaming) - def SV # NAME # B_X2 : MInst<"sv" # i # "[_{2}]_x2", "2}c", "cUc", [IsStructLoad, IsStreamingCompatible], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">; - def SV # NAME # H_X2 : MInst<"sv" # i # "[_{2}]_x2", "2}c", "sUshb", [IsStructLoad, IsStreamingCompatible], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">; - def SV # NAME # W_X2 : MInst<"sv" # i # "[_{2}]_x2", "2}c", "iUif", [IsStructLoad, IsStreamingCompatible], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">; - def SV # NAME # D_X2 : MInst<"sv" # i # "[_{2}]_x2", "2}c", "lUld", [IsStructLoad, IsStreamingCompatible], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">; - def SV # NAME # B_X4 : MInst<"sv" # i # "[_{2}]_x4", "4}c", "cUc", [IsStructLoad, IsStreamingCompatible], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">; - def SV # NAME # H_X4 : MInst<"sv" # i # "[_{2}]_x4", "4}c", "sUshb", [IsStructLoad, IsStreamingCompatible], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">; - def SV # NAME # W_X4 : MInst<"sv" # i # "[_{2}]_x4", "4}c", "iUif", [IsStructLoad, IsStreamingCompatible], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">; - def SV # NAME # D_X4 : MInst<"sv" # i # "[_{2}]_x4", "4}c", "lUld", [IsStructLoad, IsStreamingCompatible], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">; - - def SV # NAME # B_VNUM_X2 : MInst<"sv" # i # "_vnum" # "[_{2}]_x2", "2}cl", "cUc", [IsStructLoad, IsStreamingCompatible], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">; - def SV # NAME # H_VNUM_X2 : MInst<"sv" # i # "_vnum" # "[_{2}]_x2", "2}cl", "sUshb", [IsStructLoad, IsStreamingCompatible], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">; - def SV # NAME # W_VNUM_X2 : MInst<"sv" # i # "_vnum" # "[_{2}]_x2", "2}cl", "iUif", [IsStructLoad, IsStreamingCompatible], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">; - def SV # NAME # D_VNUM_X2 : MInst<"sv" # i # "_vnum" # "[_{2}]_x2", "2}cl", "lUld", [IsStructLoad, IsStreamingCompatible], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">; - def SV # NAME # B_VNUM_X4 : MInst<"sv" # i # "_vnum" # "[_{2}]_x4", "4}cl", "cUc", [IsStructLoad, IsStreamingCompatible], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">; - def SV # NAME # H_VNUM_X4 : MInst<"sv" # i # "_vnum" # "[_{2}]_x4", "4}cl", "sUshb", [IsStructLoad, IsStreamingCompatible], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">; - def SV # NAME # W_VNUM_X4 : MInst<"sv" # i # "_vnum" # "[_{2}]_x4", "4}cl", "iUif", [IsStructLoad, IsStreamingCompatible], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">; - def SV # NAME # D_VNUM_X4 : MInst<"sv" # i # "_vnum" # "[_{2}]_x4", "4}cl", "lUld", [IsStructLoad, IsStreamingCompatible], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">; + def SV # NAME # B_X2 : MInst<"sv" # i # "[_{2}]_x2", "2}c", "cUc", [IsStructLoad, IsStreamingOrSVE2p1], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">; + def SV # NAME # H_X2 : MInst<"sv" # i # "[_{2}]_x2", "2}c", "sUshb", [IsStructLoad, IsStreamingOrSVE2p1], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">; + def SV # NAME # W_X2 : MInst<"sv" # i # "[_{2}]_x2", "2}c", "iUif", [IsStructLoad, IsStreamingOrSVE2p1], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">; + def SV # NAME # D_X2 : MInst<"sv" # i # "[_{2}]_x2", "2}c", "lUld", [IsStructLoad, IsStreamingOrSVE2p1], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">; + def SV # NAME # B_X4 : MInst<"sv" # i # "[_{2}]_x4", "4}c", "cUc", [IsStructLoad, IsStreamingOrSVE2p1], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">; + def SV # NAME # H_X4 : MInst<"sv" # i # "[_{2}]_x4", "4}c", "sUshb", [IsStructLoad, IsStreamingOrSVE2p1], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">; + def SV # NAME # W_X4 : MInst<"sv" # i # "[_{2}]_x4", "4}c", "iUif", [IsStructLoad, IsStreamingOrSVE2p1], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">; + def SV # NAME # D_X4 : MInst<"sv" # i # "[_{2}]_x4", "4}c", "lUld", [IsStructLoad, IsStreamingOrSVE2p1], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">; + + def SV # NAME # B_VNUM_X2 : MInst<"sv" # i # "_vnum" # "[_{2}]_x2", "2}cl", "cUc", [IsStructLoad, IsStreamingOrSVE2p1], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">; + def SV # NAME # H_VNUM_X2 : MInst<"sv" # i # "_vnum" # "[_{2}]_x2", "2}cl", "sUshb", [IsStructLoad, IsStreamingOrSVE2p1], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">; + def SV # NAME # W_VNUM_X2 : MInst<"sv" # i # "_vnum" # "[_{2}]_x2", "2}cl", "iUif", [IsStructLoad, IsStreamingOrSVE2p1], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">; + def SV # NAME # D_VNUM_X2 : MInst<"sv" # i # "_vnum" # "[_{2}]_x2", "2}cl", "lUld", [IsStructLoad, IsStreamingOrSVE2p1], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">; + def SV # NAME # B_VNUM_X4 : MInst<"sv" # i # "_vnum" # "[_{2}]_x4", "4}cl", "cUc", [IsStructLoad, IsStreamingOrSVE2p1], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">; + def SV # NAME # H_VNUM_X4 : MInst<"sv" # i # "_vnum" # "[_{2}]_x4", "4}cl", "sUshb", [IsStructLoad, IsStreamingOrSVE2p1], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">; + def SV # NAME # W_VNUM_X4 : MInst<"sv" # i # "_vnum" # "[_{2}]_x4", "4}cl", "iUif", [IsStructLoad, IsStreamingOrSVE2p1], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">; + def SV # NAME # D_VNUM_X4 : MInst<"sv" # i # "_vnum" # "[_{2}]_x4", "4}cl", "lUld", [IsStructLoad, IsStreamingOrSVE2p1], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">; } let TargetGuard = "sve2p1|sme2" in { @@ -2017,24 +2015,23 @@ let TargetGuard = "sve2p1|sme2" in { } multiclass MultiVecStore<string i> { - // FIXME: Replace IsStreamingCompatible with IsStreamingOrHasSVE2p1 when available (SME2 requires __arm_streaming) - def SV # NAME # B_X2 : MInst<"sv" # i # "[_{2}_x2]", "v}p2", "cUc", [IsStructStore, IsStreamingCompatible], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">; - def SV # NAME # H_X2 : MInst<"sv" # i # "[_{2}_x2]", "v}p2", "sUshb", [IsStructStore, IsStreamingCompatible], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">; - def SV # NAME # W_X2 : MInst<"sv" # i # "[_{2}_x2]", "v}p2", "iUif", [IsStructStore, IsStreamingCompatible], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">; - def SV # NAME # D_X2 : MInst<"sv" # i # "[_{2}_x2]", "v}p2", "lUld", [IsStructStore, IsStreamingCompatible], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">; - def SV # NAME # B_X4 : MInst<"sv" # i # "[_{2}_x4]", "v}p4", "cUc", [IsStructStore, IsStreamingCompatible], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">; - def SV # NAME # H_X4 : MInst<"sv" # i # "[_{2}_x4]", "v}p4", "sUshb", [IsStructStore, IsStreamingCompatible], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">; - def SV # NAME # W_X4 : MInst<"sv" # i # "[_{2}_x4]", "v}p4", "iUif", [IsStructStore, IsStreamingCompatible], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">; - def SV # NAME # D_X4 : MInst<"sv" # i # "[_{2}_x4]", "v}p4", "lUld", [IsStructStore, IsStreamingCompatible], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">; - - def SV # NAME # B_VNUM_X2 : MInst<"sv" # i # "_vnum" # "[_{2}_x2]", "v}pl2", "cUc", [IsStructStore, IsStreamingCompatible], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">; - def SV # NAME # H_VNUM_X2 : MInst<"sv" # i # "_vnum" # "[_{2}_x2]", "v}pl2", "sUshb", [IsStructStore, IsStreamingCompatible], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">; - def SV # NAME # W_VNUM_X2 : MInst<"sv" # i # "_vnum" # "[_{2}_x2]", "v}pl2", "iUif", [IsStructStore, IsStreamingCompatible], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">; - def SV # NAME # D_VNUM_X2 : MInst<"sv" # i # "_vnum" # "[_{2}_x2]", "v}pl2", "lUld", [IsStructStore, IsStreamingCompatible], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">; - def SV # NAME # B_VNUM_X4 : MInst<"sv" # i # "_vnum" # "[_{2}_x4]", "v}pl4", "cUc", [IsStructStore, IsStreamingCompatible], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">; - def SV # NAME # H_VNUM_X4 : MInst<"sv" # i # "_vnum" # "[_{2}_x4]", "v}pl4", "sUshb", [IsStructStore, IsStreamingCompatible], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">; - def SV # NAME # W_VNUM_X4 : MInst<"sv" # i # "_vnum" # "[_{2}_x4]", "v}pl4", "iUif", [IsStructStore, IsStreamingCompatible], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">; - def SV # NAME # D_VNUM_X4 : MInst<"sv" # i # "_vnum" # "[_{2}_x4]", "v}pl4", "lUld", [IsStructStore, IsStreamingCompatible], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">; + def SV # NAME # B_X2 : MInst<"sv" # i # "[_{2}_x2]", "v}p2", "cUc", [IsStructStore, IsStreamingOrSVE2p1], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">; + def SV # NAME # H_X2 : MInst<"sv" # i # "[_{2}_x2]", "v}p2", "sUshb", [IsStructStore, IsStreamingOrSVE2p1], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">; + def SV # NAME # W_X2 : MInst<"sv" # i # "[_{2}_x2]", "v}p2", "iUif", [IsStructStore, IsStreamingOrSVE2p1], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">; + def SV # NAME # D_X2 : MInst<"sv" # i # "[_{2}_x2]", "v}p2", "lUld", [IsStructStore, IsStreamingOrSVE2p1], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">; + def SV # NAME # B_X4 : MInst<"sv" # i # "[_{2}_x4]", "v}p4", "cUc", [IsStructStore, IsStreamingOrSVE2p1], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">; + def SV # NAME # H_X4 : MInst<"sv" # i # "[_{2}_x4]", "v}p4", "sUshb", [IsStructStore, IsStreamingOrSVE2p1], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">; + def SV # NAME # W_X4 : MInst<"sv" # i # "[_{2}_x4]", "v}p4", "iUif", [IsStructStore, IsStreamingOrSVE2p1], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">; + def SV # NAME # D_X4 : MInst<"sv" # i # "[_{2}_x4]", "v}p4", "lUld", [IsStructStore, IsStreamingOrSVE2p1], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">; + + def SV # NAME # B_VNUM_X2 : MInst<"sv" # i # "_vnum" # "[_{2}_x2]", "v}pl2", "cUc", [IsStructStore, IsStreamingOrSVE2p1], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">; + def SV # NAME # H_VNUM_X2 : MInst<"sv" # i # "_vnum" # "[_{2}_x2]", "v}pl2", "sUshb", [IsStructStore, IsStreamingOrSVE2p1], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">; + def SV # NAME # W_VNUM_X2 : MInst<"sv" # i # "_vnum" # "[_{2}_x2]", "v}pl2", "iUif", [IsStructStore, IsStreamingOrSVE2p1], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">; + def SV # NAME # D_VNUM_X2 : MInst<"sv" # i # "_vnum" # "[_{2}_x2]", "v}pl2", "lUld", [IsStructStore, IsStreamingOrSVE2p1], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">; + def SV # NAME # B_VNUM_X4 : MInst<"sv" # i # "_vnum" # "[_{2}_x4]", "v}pl4", "cUc", [IsStructStore, IsStreamingOrSVE2p1], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">; + def SV # NAME # H_VNUM_X4 : MInst<"sv" # i # "_vnum" # "[_{2}_x4]", "v}pl4", "sUshb", [IsStructStore, IsStreamingOrSVE2p1], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">; + def SV # NAME # W_VNUM_X4 : MInst<"sv" # i # "_vnum" # "[_{2}_x4]", "v}pl4", "iUif", [IsStructStore, IsStreamingOrSVE2p1], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">; + def SV # NAME # D_VNUM_X4 : MInst<"sv" # i # "_vnum" # "[_{2}_x4]", "v}pl4", "lUld", [IsStructStore, IsStreamingOrSVE2p1], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">; } let TargetGuard = "sve2p1|sme2" in { @@ -2051,21 +2048,20 @@ def SVDOT_LANE_X2_U : SInst<"svdot_lane[_{d}_{2}_{3}]", "ddhhi", "Ui", MergeNone def SVDOT_LANE_X2_F : SInst<"svdot_lane[_{d}_{2}_{3}]", "ddhhi", "f", MergeNone, "aarch64_sve_fdot_lane_x2", [], [ImmCheck<3, ImmCheck0_3>]>; } -let TargetGuard = "sve2p1|sme" in { -def SVSCLAMP : SInst<"svclamp[_{d}]", "dddd", "csil", MergeNone, "aarch64_sve_sclamp", [], []>; -def SVUCLAMP : SInst<"svclamp[_{d}]", "dddd", "UcUsUiUl", MergeNone, "aarch64_sve_uclamp", [], []>; +let TargetGuard = "sve2p1|sme2" in { +def SVSCLAMP : SInst<"svclamp[_{d}]", "dddd", "csil", MergeNone, "aarch64_sve_sclamp", [IsStreamingOrSVE2p1], []>; +def SVUCLAMP : SInst<"svclamp[_{d}]", "dddd", "UcUsUiUl", MergeNone, "aarch64_sve_uclamp", [IsStreamingOrSVE2p1], []>; defm SVREVD : SInstZPZ<"svrevd", "csilUcUsUiUlbhfd", "aarch64_sve_revd">; } let TargetGuard = "sve2p1|sme2" in { - //FIXME: Replace IsStreamingCompatible with IsStreamingOrHasSVE2p1 when available - def SVPTRUE_COUNT : SInst<"svptrue_{d}", "}v", "QcQsQiQl", MergeNone, "aarch64_sve_ptrue_{d}", [IsOverloadNone, IsStreamingCompatible], []>; + def SVPTRUE_COUNT : SInst<"svptrue_{d}", "}v", "QcQsQiQl", MergeNone, "aarch64_sve_ptrue_{d}", [IsOverloadNone, IsStreamingOrSVE2p1], []>; - def SVPFALSE_COUNT_ALIAS : SInst<"svpfalse_c", "}v", "", MergeNone, "", [IsOverloadNone, IsStreamingCompatible]>; + def SVPFALSE_COUNT_ALIAS : SInst<"svpfalse_c", "}v", "", MergeNone, "", [IsOverloadNone, IsStreamingOrSVE2p1]>; - def SVFCLAMP : SInst<"svclamp[_{d}]", "dddd", "hfd", MergeNone, "aarch64_sve_fclamp", [IsStreamingCompatible], []>; - def SVCNTP_COUNT : SInst<"svcntp_{d}", "n}i", "QcQsQiQl", MergeNone, "aarch64_sve_cntp_{d}", [IsOverloadNone, IsStreamingCompatible], [ImmCheck<1, ImmCheck2_4_Mul2>]>; + def SVFCLAMP : SInst<"svclamp[_{d}]", "dddd", "hfd", MergeNone, "aarch64_sve_fclamp", [IsStreamingOrSVE2p1], []>; + def SVCNTP_COUNT : SInst<"svcntp_{d}", "n}i", "QcQsQiQl", MergeNone, "aarch64_sve_cntp_{d}", [IsOverloadNone, IsStreamingOrSVE2p1], [ImmCheck<1, ImmCheck2_4_Mul2>]>; } let TargetGuard = "(sve2|sme2),b16b16" in { @@ -2326,10 +2322,9 @@ let TargetGuard = "sme2" in { let TargetGuard = "sve2p1|sme2" in { // == BFloat16 multiply-subtract == -// FIXME: Make all of these IsStreamingOrSVE2p1 once that is added - def SVBFMLSLB : SInst<"svbfmlslb[_{d}]", "dd$$", "f", MergeNone, "aarch64_sve_bfmlslb", [IsOverloadNone, IsStreamingCompatible], []>; - def SVBFMLSLT : SInst<"svbfmlslt[_{d}]", "dd$$", "f", MergeNone, "aarch64_sve_bfmlslt", [IsOverloadNone, IsStreamingCompatible], []>; + def SVBFMLSLB : SInst<"svbfmlslb[_{d}]", "dd$$", "f", MergeNone, "aarch64_sve_bfmlslb", [IsOverloadNone, IsStreamingOrSVE2p1], []>; + def SVBFMLSLT : SInst<"svbfmlslt[_{d}]", "dd$$", "f", MergeNone, "aarch64_sve_bfmlslt", [IsOverloadNone, IsStreamingOrSVE2p1], []>; - def SVBFMLSLB_LANE : SInst<"svbfmlslb_lane[_{d}]", "dd$$i", "f", MergeNone, "aarch64_sve_bfmlslb_lane", [IsOverloadNone, IsStreamingCompatible], [ImmCheck<3, ImmCheck0_7>]>; - def SVBFMLSLT_LANE : SInst<"svbfmlslt_lane[_{d}]", "dd$$i", "f", MergeNone, "aarch64_sve_bfmlslt_lane", [IsOverloadNone, IsStreamingCompatible], [ImmCheck<3, ImmCheck0_7>]>; + def SVBFMLSLB_LANE : SInst<"svbfmlslb_lane[_{d}]", "dd$$i", "f", MergeNone, "aarch64_sve_bfmlslb_lane", [IsOverloadNone, IsStreamingOrSVE2p1], [ImmCheck<3, ImmCheck0_7>]>; + def SVBFMLSLT_LANE : SInst<"svbfmlslt_lane[_{d}]", "dd$$i", "f", MergeNone, "aarch64_sve_bfmlslt_lane", [IsOverloadNone, IsStreamingOrSVE2p1], [ImmCheck<3, ImmCheck0_7>]>; } diff --git a/clang/include/clang/Basic/arm_sve_sme_incl.td b/clang/include/clang/Basic/arm_sve_sme_incl.td index 0dba8493bad2..ad29864440c9 100644 --- a/clang/include/clang/Basic/arm_sve_sme_incl.td +++ b/clang/include/clang/Basic/arm_sve_sme_incl.td @@ -227,6 +227,7 @@ def IsPreservesZA : FlagType<0x10000000000>; def IsReadZA : FlagType<0x20000000000>; def IsWriteZA : FlagType<0x40000000000>; def IsReductionQV : FlagType<0x80000000000>; +def IsStreamingOrSVE2p1 : FlagType<0x80000000000>; // Use for intrinsics that are common between sme/sme2 and sve2p1. // These must be kept in sync with the flags in include/clang/Basic/TargetBuiltins.h class ImmCheckType<int val> { diff --git a/clang/include/clang/Basic/riscv_sifive_vector.td b/clang/include/clang/Basic/riscv_sifive_vector.td index 0d471f6c554c..ef5114d6105e 100644 --- a/clang/include/clang/Basic/riscv_sifive_vector.td +++ b/clang/include/clang/Basic/riscv_sifive_vector.td @@ -121,11 +121,11 @@ multiclass RVVVQMACCDODBuiltinSet<list<list<string>> suffixes_prototypes> { } multiclass RVVVQMACCQOQBuiltinSet<list<list<string>> suffixes_prototypes> { - let OverloadedName = NAME, - Name = NAME, - HasMasked = false, - Log2LMUL = [-1, 0, 1, 2] in - defm NAME : RVVOutOp1Op2BuiltinSet<NAME, "s", suffixes_prototypes>; + let OverloadedName = NAME, + Name = NAME, + HasMasked = false, + Log2LMUL = [-1, 0, 1, 2] in + defm NAME : RVVOutOp1Op2BuiltinSet<NAME, "s", suffixes_prototypes>; } multiclass RVVVFNRCLIPBuiltinSet<string suffix, string prototype, string type_range> { diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 2b93ddf03349..84648c6d5500 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -773,6 +773,8 @@ def gcc_install_dir_EQ : Joined<["--"], "gcc-install-dir=">, def gcc_toolchain : Joined<["--"], "gcc-toolchain=">, Flags<[NoXarchOption]>, HelpText<"Specify a directory where Clang can find 'include' and 'lib{,32,64}/gcc{,-cross}/$triple/$version'. " "Clang will use the GCC installation with the largest version">; +def gcc_triple_EQ : Joined<["--"], "gcc-triple=">, + HelpText<"Search for the GCC installation with the specified triple.">; def CC : Flag<["-"], "CC">, Visibility<[ClangOption, CC1Option]>, Group<Preprocessor_Group>, HelpText<"Include comments from within macros in preprocessed output">, @@ -1695,6 +1697,12 @@ defm coverage_mapping : BoolFOption<"coverage-mapping", "Generate coverage mapping to enable code coverage analysis">, NegFlag<SetFalse, [], [ClangOption], "Disable code coverage analysis">, BothFlags< [], [ClangOption, CLOption]>>; +defm mcdc_coverage : BoolFOption<"coverage-mcdc", + CodeGenOpts<"MCDCCoverage">, DefaultFalse, + PosFlag<SetTrue, [], [ClangOption, CC1Option], + "Enable MC/DC criteria when generating code coverage">, + NegFlag<SetFalse, [], [ClangOption], "Disable MC/DC coverage criteria">, + BothFlags<[], [ClangOption, CLOption]>>; def fprofile_generate : Flag<["-"], "fprofile-generate">, Group<f_Group>, Visibility<[ClangOption, CLOption]>, HelpText<"Generate instrumented code to collect execution counts into default.profraw (overridden by LLVM_PROFILE_FILE env var)">; @@ -5192,9 +5200,9 @@ def nogpulib : Flag<["-"], "nogpulib">, MarshallingInfoFlag<LangOpts<"NoGPULib"> Visibility<[ClangOption, CC1Option]>, HelpText<"Do not link device library for CUDA/HIP device compilation">; def : Flag<["-"], "nocudalib">, Alias<nogpulib>; -def gpulibc : Flag<["-"], "gpulibc">, Visibility<[ClangOption, CC1Option]>, +def gpulibc : Flag<["-"], "gpulibc">, Visibility<[ClangOption, CC1Option, FlangOption, FC1Option]>, HelpText<"Link the LLVM C Library for GPUs">; -def nogpulibc : Flag<["-"], "nogpulibc">, Visibility<[ClangOption, CC1Option]>; +def nogpulibc : Flag<["-"], "nogpulibc">, Visibility<[ClangOption, CC1Option, FlangOption, FC1Option]>; def nodefaultlibs : Flag<["-"], "nodefaultlibs">; def nodriverkitlib : Flag<["-"], "nodriverkitlib">; def nofixprebinding : Flag<["-"], "nofixprebinding">; diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index 5e3b57ea3322..edaee4c4b66d 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -10263,11 +10263,13 @@ public: ~ConstraintEvalRAII() { TI.setEvaluateConstraints(OldValue); } }; - // Unlike the above, this evaluates constraints, which should only happen at - // 'constraint checking' time. + // Must be used instead of SubstExpr at 'constraint checking' time. ExprResult SubstConstraintExpr(Expr *E, const MultiLevelTemplateArgumentList &TemplateArgs); + // Unlike the above, this does not evaluates constraints. + ExprResult SubstConstraintExprWithoutSatisfaction( + Expr *E, const MultiLevelTemplateArgumentList &TemplateArgs); /// Substitute the given template arguments into a list of /// expressions, expanding pack expansions if required. @@ -11344,9 +11346,12 @@ private: /// rigorous semantic checking in the new mapped directives. bool mapLoopConstruct(llvm::SmallVector<OMPClause *> &ClausesWithoutBind, ArrayRef<OMPClause *> Clauses, - OpenMPBindClauseKind BindKind, + OpenMPBindClauseKind &BindKind, OpenMPDirectiveKind &Kind, - OpenMPDirectiveKind &PrevMappedDirective); + OpenMPDirectiveKind &PrevMappedDirective, + SourceLocation StartLoc, SourceLocation EndLoc, + const DeclarationNameInfo &DirName, + OpenMPDirectiveKind CancelRegion); public: /// The declarator \p D defines a function in the scope \p S which is nested @@ -12967,7 +12972,7 @@ public: QualType FindCompositeObjCPointerType(ExprResult &LHS, ExprResult &RHS, SourceLocation QuestionLoc); - bool DiagnoseConditionalForNull(Expr *LHSExpr, Expr *RHSExpr, + bool DiagnoseConditionalForNull(const Expr *LHSExpr, const Expr *RHSExpr, SourceLocation QuestionLoc); void DiagnoseAlwaysNonNullPointer(Expr *E, diff --git a/clang/include/clang/Sema/Template.h b/clang/include/clang/Sema/Template.h index 2a553054a0ce..ce44aca797b0 100644 --- a/clang/include/clang/Sema/Template.h +++ b/clang/include/clang/Sema/Template.h @@ -564,6 +564,7 @@ enum class TemplateSubstitutionKind : char { const MultiLevelTemplateArgumentList &TemplateArgs; Sema::LateInstantiatedAttrVec* LateAttrs = nullptr; LocalInstantiationScope *StartingScope = nullptr; + // Whether to evaluate the C++20 constraints or simply substitute into them. bool EvaluateConstraints = true; /// A list of out-of-line class template partial diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp index 0395b3e47ab6..b60dcfaabfd1 100644 --- a/clang/lib/AST/ASTContext.cpp +++ b/clang/lib/AST/ASTContext.cpp @@ -2748,21 +2748,20 @@ bool ASTContext::hasUniqueObjectRepresentations( QualType Ty, bool CheckIfTriviallyCopyable) const { // C++17 [meta.unary.prop]: // The predicate condition for a template specialization - // has_unique_object_representations<T> shall be - // satisfied if and only if: + // has_unique_object_representations<T> shall be satisfied if and only if: // (9.1) - T is trivially copyable, and // (9.2) - any two objects of type T with the same value have the same - // object representation, where two objects - // of array or non-union class type are considered to have the same value - // if their respective sequences of - // direct subobjects have the same values, and two objects of union type - // are considered to have the same - // value if they have the same active member and the corresponding members - // have the same value. + // object representation, where: + // - two objects of array or non-union class type are considered to have + // the same value if their respective sequences of direct subobjects + // have the same values, and + // - two objects of union type are considered to have the same value if + // they have the same active member and the corresponding members have + // the same value. // The set of scalar types for which this condition holds is - // implementation-defined. [ Note: If a type has padding - // bits, the condition does not hold; otherwise, the condition holds true - // for unsigned integral types. -- end note ] + // implementation-defined. [ Note: If a type has padding bits, the condition + // does not hold; otherwise, the condition holds true for unsigned integral + // types. -- end note ] assert(!Ty.isNull() && "Null QualType sent to unique object rep check"); // Arrays are unique only if their element type is unique. diff --git a/clang/lib/AST/ASTImporter.cpp b/clang/lib/AST/ASTImporter.cpp index b61180c4f349..5e5570bb42a1 100644 --- a/clang/lib/AST/ASTImporter.cpp +++ b/clang/lib/AST/ASTImporter.cpp @@ -2034,23 +2034,25 @@ ASTNodeImporter::ImportDeclContext(DeclContext *FromDC, bool ForceImport) { return ToDCOrErr.takeError(); } - DeclContext *ToDC = *ToDCOrErr; - // Remove all declarations, which may be in wrong order in the - // lexical DeclContext and then add them in the proper order. - for (auto *D : FromDC->decls()) { - if (!MightNeedReordering(D)) - continue; + if (const auto *FromRD = dyn_cast<RecordDecl>(FromDC)) { + DeclContext *ToDC = *ToDCOrErr; + // Remove all declarations, which may be in wrong order in the + // lexical DeclContext and then add them in the proper order. + for (auto *D : FromRD->decls()) { + if (!MightNeedReordering(D)) + continue; - assert(D && "DC contains a null decl"); - if (Decl *ToD = Importer.GetAlreadyImportedOrNull(D)) { - // Remove only the decls which we successfully imported. - assert(ToDC == ToD->getLexicalDeclContext() && ToDC->containsDecl(ToD)); - // Remove the decl from its wrong place in the linked list. - ToDC->removeDecl(ToD); - // Add the decl to the end of the linked list. - // This time it will be at the proper place because the enclosing for - // loop iterates in the original (good) order of the decls. - ToDC->addDeclInternal(ToD); + assert(D && "DC contains a null decl"); + if (Decl *ToD = Importer.GetAlreadyImportedOrNull(D)) { + // Remove only the decls which we successfully imported. + assert(ToDC == ToD->getLexicalDeclContext() && ToDC->containsDecl(ToD)); + // Remove the decl from its wrong place in the linked list. + ToDC->removeDecl(ToD); + // Add the decl to the end of the linked list. + // This time it will be at the proper place because the enclosing for + // loop iterates in the original (good) order of the decls. + ToDC->addDeclInternal(ToD); + } } } @@ -6141,6 +6143,11 @@ ExpectedDecl ASTNodeImporter::VisitClassTemplateSpecializationDecl( InsertPos)) // Add this partial specialization to the class template. ClassTemplate->AddPartialSpecialization(PartSpec2, InsertPos); + if (Expected<ClassTemplatePartialSpecializationDecl *> ToInstOrErr = + import(PartialSpec->getInstantiatedFromMember())) + PartSpec2->setInstantiatedFromMember(*ToInstOrErr); + else + return ToInstOrErr.takeError(); updateLookupTableForTemplateParameters(*ToTPList); } else { // Not a partial specialization. diff --git a/clang/lib/AST/ASTStructuralEquivalence.cpp b/clang/lib/AST/ASTStructuralEquivalence.cpp index 1f492b051e03..a9e0d1698a91 100644 --- a/clang/lib/AST/ASTStructuralEquivalence.cpp +++ b/clang/lib/AST/ASTStructuralEquivalence.cpp @@ -1463,8 +1463,9 @@ IsStructurallyEquivalentLambdas(StructuralEquivalenceContext &Context, } /// Determine if context of a class is equivalent. -static bool IsRecordContextStructurallyEquivalent(RecordDecl *D1, - RecordDecl *D2) { +static bool +IsRecordContextStructurallyEquivalent(StructuralEquivalenceContext &Context, + RecordDecl *D1, RecordDecl *D2) { // The context should be completely equal, including anonymous and inline // namespaces. // We compare objects as part of full translation units, not subtrees of @@ -1491,6 +1492,12 @@ static bool IsRecordContextStructurallyEquivalent(RecordDecl *D1, return false; } + if (auto *D1Spec = dyn_cast<ClassTemplateSpecializationDecl>(DC1)) { + auto *D2Spec = dyn_cast<ClassTemplateSpecializationDecl>(DC2); + if (!IsStructurallyEquivalent(Context, D1Spec, D2Spec)) + return false; + } + DC1 = DC1->getParent()->getNonTransparentContext(); DC2 = DC2->getParent()->getNonTransparentContext(); } @@ -1544,7 +1551,7 @@ static bool IsStructurallyEquivalent(StructuralEquivalenceContext &Context, // If the records occur in different context (namespace), these should be // different. This is specially important if the definition of one or both // records is missing. - if (!IsRecordContextStructurallyEquivalent(D1, D2)) + if (!IsRecordContextStructurallyEquivalent(Context, D1, D2)) return false; // If both declarations are class template specializations, we know diff --git a/clang/lib/AST/ComputeDependence.cpp b/clang/lib/AST/ComputeDependence.cpp index 097753fd3267..584b58473294 100644 --- a/clang/lib/AST/ComputeDependence.cpp +++ b/clang/lib/AST/ComputeDependence.cpp @@ -603,6 +603,8 @@ ExprDependence clang::computeDependence(PredefinedExpr *E) { ExprDependence clang::computeDependence(CallExpr *E, llvm::ArrayRef<Expr *> PreArgs) { auto D = E->getCallee()->getDependence(); + if (E->getType()->isDependentType()) + D |= ExprDependence::Type; for (auto *A : llvm::ArrayRef(E->getArgs(), E->getNumArgs())) { if (A) D |= A->getDependence(); diff --git a/clang/lib/AST/Decl.cpp b/clang/lib/AST/Decl.cpp index 12e0a6faa4c3..e1440e5183a4 100644 --- a/clang/lib/AST/Decl.cpp +++ b/clang/lib/AST/Decl.cpp @@ -2835,7 +2835,7 @@ CharUnits VarDecl::getFlexibleArrayInitChars(const ASTContext &Ctx) const { if (!Ty || !Ty->getDecl()->hasFlexibleArrayMember()) return CharUnits::Zero(); auto *List = dyn_cast<InitListExpr>(getInit()->IgnoreParens()); - if (!List) + if (!List || List->getNumInits() == 0) return CharUnits::Zero(); const Expr *FlexibleInit = List->getInit(List->getNumInits() - 1); auto InitTy = Ctx.getAsConstantArrayType(FlexibleInit->getType()); diff --git a/clang/lib/AST/DeclBase.cpp b/clang/lib/AST/DeclBase.cpp index 5e03f0223d31..b1733c2d052a 100644 --- a/clang/lib/AST/DeclBase.cpp +++ b/clang/lib/AST/DeclBase.cpp @@ -930,20 +930,14 @@ const AttrVec &Decl::getAttrs() const { Decl *Decl::castFromDeclContext (const DeclContext *D) { Decl::Kind DK = D->getDeclKind(); - switch(DK) { -#define DECL(NAME, BASE) -#define DECL_CONTEXT(NAME) \ - case Decl::NAME: \ - return static_cast<NAME##Decl *>(const_cast<DeclContext *>(D)); -#define DECL_CONTEXT_BASE(NAME) -#include "clang/AST/DeclNodes.inc" - default: + switch (DK) { #define DECL(NAME, BASE) -#define DECL_CONTEXT_BASE(NAME) \ - if (DK >= first##NAME && DK <= last##NAME) \ - return static_cast<NAME##Decl *>(const_cast<DeclContext *>(D)); +#define DECL_CONTEXT(NAME) \ + case Decl::NAME: \ + return static_cast<NAME##Decl *>(const_cast<DeclContext *>(D)); #include "clang/AST/DeclNodes.inc" - llvm_unreachable("a decl that inherits DeclContext isn't handled"); + default: + llvm_unreachable("a decl that inherits DeclContext isn't handled"); } } @@ -951,18 +945,12 @@ DeclContext *Decl::castToDeclContext(const Decl *D) { Decl::Kind DK = D->getKind(); switch(DK) { #define DECL(NAME, BASE) -#define DECL_CONTEXT(NAME) \ - case Decl::NAME: \ - return static_cast<NAME##Decl *>(const_cast<Decl *>(D)); -#define DECL_CONTEXT_BASE(NAME) +#define DECL_CONTEXT(NAME) \ + case Decl::NAME: \ + return static_cast<NAME##Decl *>(const_cast<Decl *>(D)); #include "clang/AST/DeclNodes.inc" - default: -#define DECL(NAME, BASE) -#define DECL_CONTEXT_BASE(NAME) \ - if (DK >= first##NAME && DK <= last##NAME) \ - return static_cast<NAME##Decl *>(const_cast<Decl *>(D)); -#include "clang/AST/DeclNodes.inc" - llvm_unreachable("a decl that inherits DeclContext isn't handled"); + default: + llvm_unreachable("a decl that inherits DeclContext isn't handled"); } } @@ -1129,20 +1117,14 @@ DeclContext::DeclContext(Decl::Kind K) { } bool DeclContext::classof(const Decl *D) { - switch (D->getKind()) { + Decl::Kind DK = D->getKind(); + switch (DK) { #define DECL(NAME, BASE) #define DECL_CONTEXT(NAME) case Decl::NAME: -#define DECL_CONTEXT_BASE(NAME) #include "clang/AST/DeclNodes.inc" - return true; - default: -#define DECL(NAME, BASE) -#define DECL_CONTEXT_BASE(NAME) \ - if (D->getKind() >= Decl::first##NAME && \ - D->getKind() <= Decl::last##NAME) \ - return true; -#include "clang/AST/DeclNodes.inc" - return false; + return true; + default: + return false; } } diff --git a/clang/lib/AST/FormatString.cpp b/clang/lib/AST/FormatString.cpp index e0c9e18cfe3a..c5d14b4af7ff 100644 --- a/clang/lib/AST/FormatString.cpp +++ b/clang/lib/AST/FormatString.cpp @@ -488,7 +488,6 @@ ArgType::matchesType(ASTContext &C, QualType argTy) const { return NoMatchPromotionTypeConfusion; break; case BuiltinType::Half: - case BuiltinType::Float16: case BuiltinType::Float: if (T == C.DoubleTy) return MatchPromotion; diff --git a/clang/lib/AST/Interp/Interp.cpp b/clang/lib/AST/Interp/Interp.cpp index a82d1c3c7c62..21ea2503b94b 100644 --- a/clang/lib/AST/Interp/Interp.cpp +++ b/clang/lib/AST/Interp/Interp.cpp @@ -290,10 +290,10 @@ bool CheckInitialized(InterpState &S, CodePtr OpPC, const Pointer &Ptr, } bool CheckLoad(InterpState &S, CodePtr OpPC, const Pointer &Ptr) { - if (!CheckDummy(S, OpPC, Ptr)) - return false; if (!CheckLive(S, OpPC, Ptr, AK_Read)) return false; + if (!CheckDummy(S, OpPC, Ptr)) + return false; if (!CheckExtern(S, OpPC, Ptr)) return false; if (!CheckRange(S, OpPC, Ptr, AK_Read)) diff --git a/clang/lib/AST/Interp/Interp.h b/clang/lib/AST/Interp/Interp.h index 828d4ea35526..c05dea0cc55d 100644 --- a/clang/lib/AST/Interp/Interp.h +++ b/clang/lib/AST/Interp/Interp.h @@ -1813,9 +1813,6 @@ inline bool ArrayElemPtr(InterpState &S, CodePtr OpPC) { const T &Offset = S.Stk.pop<T>(); const Pointer &Ptr = S.Stk.peek<Pointer>(); - if (!CheckArray(S, OpPC, Ptr)) - return false; - if (!OffsetHelper<T, ArithOp::Add>(S, OpPC, Offset, Ptr)) return false; @@ -1843,9 +1840,6 @@ inline bool ArrayElemPtrPop(InterpState &S, CodePtr OpPC) { const T &Offset = S.Stk.pop<T>(); const Pointer &Ptr = S.Stk.pop<Pointer>(); - if (!CheckArray(S, OpPC, Ptr)) - return false; - if (!OffsetHelper<T, ArithOp::Add>(S, OpPC, Offset, Ptr)) return false; diff --git a/clang/lib/AST/TextNodeDumper.cpp b/clang/lib/AST/TextNodeDumper.cpp index e8274fcd5cfe..369ff66ac4db 100644 --- a/clang/lib/AST/TextNodeDumper.cpp +++ b/clang/lib/AST/TextNodeDumper.cpp @@ -1094,6 +1094,16 @@ void clang::TextNodeDumper::VisitReturnStmt(const ReturnStmt *Node) { } } +void clang::TextNodeDumper::VisitCoawaitExpr(const CoawaitExpr *Node) { + if (Node->isImplicit()) + OS << " implicit"; +} + +void clang::TextNodeDumper::VisitCoreturnStmt(const CoreturnStmt *Node) { + if (Node->isImplicit()) + OS << " implicit"; +} + void TextNodeDumper::VisitConstantExpr(const ConstantExpr *Node) { if (Node->hasAPValueResult()) AddChild("value", diff --git a/clang/lib/AST/Type.cpp b/clang/lib/AST/Type.cpp index 160a725939cc..a894d3289eb1 100644 --- a/clang/lib/AST/Type.cpp +++ b/clang/lib/AST/Type.cpp @@ -3414,6 +3414,13 @@ StringRef FunctionType::getNameForCallConv(CallingConv CC) { llvm_unreachable("Invalid calling convention."); } +void FunctionProtoType::ExceptionSpecInfo::instantiate() { + assert(Type == EST_Uninstantiated); + NoexceptExpr = + cast<FunctionProtoType>(SourceTemplate->getType())->getNoexceptExpr(); + Type = EST_DependentNoexcept; +} + FunctionProtoType::FunctionProtoType(QualType result, ArrayRef<QualType> params, QualType canonical, const ExtProtoInfo &epi) diff --git a/clang/lib/Analysis/PathDiagnostic.cpp b/clang/lib/Analysis/PathDiagnostic.cpp index 0cb03943c547..79f337a91ec8 100644 --- a/clang/lib/Analysis/PathDiagnostic.cpp +++ b/clang/lib/Analysis/PathDiagnostic.cpp @@ -50,12 +50,7 @@ using namespace clang; using namespace ento; -static StringRef StripTrailingDots(StringRef s) { - for (StringRef::size_type i = s.size(); i != 0; --i) - if (s[i - 1] != '.') - return s.substr(0, i); - return {}; -} +static StringRef StripTrailingDots(StringRef s) { return s.rtrim('.'); } PathDiagnosticPiece::PathDiagnosticPiece(StringRef s, Kind k, DisplayHint hint) diff --git a/clang/lib/Analysis/UnsafeBufferUsage.cpp b/clang/lib/Analysis/UnsafeBufferUsage.cpp index 70eec1cee57f..724c4304a072 100644 --- a/clang/lib/Analysis/UnsafeBufferUsage.cpp +++ b/clang/lib/Analysis/UnsafeBufferUsage.cpp @@ -721,6 +721,34 @@ public: DeclUseList getClaimedVarUseSites() const override { return {}; } }; +// Warning gadget for unsafe invocation of span::data method. +// Triggers when the pointer returned by the invocation is immediately +// cast to a larger type. + +class DataInvocationGadget : public WarningGadget { + constexpr static const char *const OpTag = "data_invocation_expr"; + const ExplicitCastExpr *Op; + +public: + DataInvocationGadget(const MatchFinder::MatchResult &Result) + : WarningGadget(Kind::DataInvocation), + Op(Result.Nodes.getNodeAs<ExplicitCastExpr>(OpTag)) {} + + static bool classof(const Gadget *G) { + return G->getKind() == Kind::DataInvocation; + } + + static Matcher matcher() { + return stmt( + explicitCastExpr(has(cxxMemberCallExpr(callee(cxxMethodDecl( + hasName("data"), ofClass(hasName("std::span"))))))) + .bind(OpTag)); + } + const Stmt *getBaseStmt() const override { return Op; } + + DeclUseList getClaimedVarUseSites() const override { return {}; } +}; + // Represents expressions of the form `DRE[*]` in the Unspecified Lvalue // Context (see `isInUnspecifiedLvalueContext`). // Note here `[]` is the built-in subscript operator. @@ -2657,8 +2685,8 @@ void clang::checkUnsafeBufferUsage(const Decl *D, // every problematic operation and consider it done. No need to deal // with fixable gadgets, no need to group operations by variable. for (const auto &G : WarningGadgets) { - Handler.handleUnsafeOperation(G->getBaseStmt(), - /*IsRelatedToDecl=*/false); + Handler.handleUnsafeOperation(G->getBaseStmt(), /*IsRelatedToDecl=*/false, + D->getASTContext()); } // This return guarantees that most of the machine doesn't run when @@ -2893,7 +2921,8 @@ void clang::checkUnsafeBufferUsage(const Decl *D, Tracker, Handler, VarGrpMgr); for (const auto &G : UnsafeOps.noVar) { - Handler.handleUnsafeOperation(G->getBaseStmt(), /*IsRelatedToDecl=*/false); + Handler.handleUnsafeOperation(G->getBaseStmt(), /*IsRelatedToDecl=*/false, + D->getASTContext()); } for (const auto &[VD, WarningGadgets] : UnsafeOps.byVar) { @@ -2904,7 +2933,8 @@ void clang::checkUnsafeBufferUsage(const Decl *D, : FixItList{}, D); for (const auto &G : WarningGadgets) { - Handler.handleUnsafeOperation(G->getBaseStmt(), /*IsRelatedToDecl=*/true); + Handler.handleUnsafeOperation(G->getBaseStmt(), /*IsRelatedToDecl=*/true, + D->getASTContext()); } } } diff --git a/clang/lib/Basic/Targets/ARM.cpp b/clang/lib/Basic/Targets/ARM.cpp index 01f9e844da12..a72bd42bad41 100644 --- a/clang/lib/Basic/Targets/ARM.cpp +++ b/clang/lib/Basic/Targets/ARM.cpp @@ -17,6 +17,7 @@ #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSwitch.h" +#include "llvm/TargetParser/ARMTargetParser.h" using namespace clang; using namespace clang::targets; @@ -837,6 +838,69 @@ void ARMTargetInfo::getTargetDefines(const LangOptions &Opts, if (Opts.RWPI) Builder.defineMacro("__ARM_RWPI", "1"); + // Macros for enabling co-proc intrinsics + uint64_t FeatureCoprocBF = 0; + switch (ArchKind) { + default: + break; + case llvm::ARM::ArchKind::ARMV4: + case llvm::ARM::ArchKind::ARMV4T: + // Filter __arm_ldcl and __arm_stcl in acle.h + FeatureCoprocBF = isThumb() ? 0 : FEATURE_COPROC_B1; + break; + case llvm::ARM::ArchKind::ARMV5T: + FeatureCoprocBF = isThumb() ? 0 : FEATURE_COPROC_B1 | FEATURE_COPROC_B2; + break; + case llvm::ARM::ArchKind::ARMV5TE: + case llvm::ARM::ArchKind::ARMV5TEJ: + if (!isThumb()) + FeatureCoprocBF = + FEATURE_COPROC_B1 | FEATURE_COPROC_B2 | FEATURE_COPROC_B3; + break; + case llvm::ARM::ArchKind::ARMV6: + case llvm::ARM::ArchKind::ARMV6K: + case llvm::ARM::ArchKind::ARMV6KZ: + case llvm::ARM::ArchKind::ARMV6T2: + if (!isThumb() || ArchKind == llvm::ARM::ArchKind::ARMV6T2) + FeatureCoprocBF = FEATURE_COPROC_B1 | FEATURE_COPROC_B2 | + FEATURE_COPROC_B3 | FEATURE_COPROC_B4; + break; + case llvm::ARM::ArchKind::ARMV7A: + case llvm::ARM::ArchKind::ARMV7R: + case llvm::ARM::ArchKind::ARMV7M: + case llvm::ARM::ArchKind::ARMV7S: + case llvm::ARM::ArchKind::ARMV7EM: + FeatureCoprocBF = FEATURE_COPROC_B1 | FEATURE_COPROC_B2 | + FEATURE_COPROC_B3 | FEATURE_COPROC_B4; + break; + case llvm::ARM::ArchKind::ARMV8A: + case llvm::ARM::ArchKind::ARMV8R: + case llvm::ARM::ArchKind::ARMV8_1A: + case llvm::ARM::ArchKind::ARMV8_2A: + case llvm::ARM::ArchKind::ARMV8_3A: + case llvm::ARM::ArchKind::ARMV8_4A: + case llvm::ARM::ArchKind::ARMV8_5A: + case llvm::ARM::ArchKind::ARMV8_6A: + case llvm::ARM::ArchKind::ARMV8_7A: + case llvm::ARM::ArchKind::ARMV8_8A: + case llvm::ARM::ArchKind::ARMV8_9A: + case llvm::ARM::ArchKind::ARMV9A: + case llvm::ARM::ArchKind::ARMV9_1A: + case llvm::ARM::ArchKind::ARMV9_2A: + case llvm::ARM::ArchKind::ARMV9_3A: + case llvm::ARM::ArchKind::ARMV9_4A: + // Filter __arm_cdp, __arm_ldcl, __arm_stcl in arm_acle.h + FeatureCoprocBF = FEATURE_COPROC_B1 | FEATURE_COPROC_B3; + break; + case llvm::ARM::ArchKind::ARMV8MMainline: + case llvm::ARM::ArchKind::ARMV8_1MMainline: + FeatureCoprocBF = FEATURE_COPROC_B1 | FEATURE_COPROC_B2 | + FEATURE_COPROC_B3 | FEATURE_COPROC_B4; + break; + } + Builder.defineMacro("__ARM_FEATURE_COPROC", + "0x" + Twine::utohexstr(FeatureCoprocBF)); + if (ArchKind == llvm::ARM::ArchKind::XSCALE) Builder.defineMacro("__XSCALE__"); diff --git a/clang/lib/Basic/Targets/ARM.h b/clang/lib/Basic/Targets/ARM.h index b1aa2794c7e4..9802eb01abf3 100644 --- a/clang/lib/Basic/Targets/ARM.h +++ b/clang/lib/Basic/Targets/ARM.h @@ -100,6 +100,19 @@ class LLVM_LIBRARY_VISIBILITY ARMTargetInfo : public TargetInfo { }; uint32_t HW_FP; + enum { + /// __arm_cdp __arm_ldc, __arm_ldcl, __arm_stc, + /// __arm_stcl, __arm_mcr and __arm_mrc + FEATURE_COPROC_B1 = (1 << 0), + /// __arm_cdp2, __arm_ldc2, __arm_stc2, __arm_ldc2l, + /// __arm_stc2l, __arm_mcr2 and __arm_mrc2 + FEATURE_COPROC_B2 = (1 << 1), + /// __arm_mcrr, __arm_mrrc + FEATURE_COPROC_B3 = (1 << 2), + /// __arm_mcrr2, __arm_mrrc2 + FEATURE_COPROC_B4 = (1 << 3), + }; + void setABIAAPCS(); void setABIAPCS(bool IsAAPCS16); diff --git a/clang/lib/Basic/Targets/AVR.h b/clang/lib/Basic/Targets/AVR.h index 854a51d78c39..9376c46cd98c 100644 --- a/clang/lib/Basic/Targets/AVR.h +++ b/clang/lib/Basic/Targets/AVR.h @@ -146,7 +146,9 @@ public: case 'R': // Integer constant (Range: -6 to 5) Info.setRequiresImmediate(-6, 5); return true; - case 'G': // Floating point constant + case 'G': // Floating point constant 0.0 + Info.setRequiresImmediate(0); + return true; case 'Q': // A memory address based on Y or Z pointer with displacement. return true; } diff --git a/clang/lib/Basic/Targets/RISCV.cpp b/clang/lib/Basic/Targets/RISCV.cpp index 6bc57a83a2d5..daaa8639ae83 100644 --- a/clang/lib/Basic/Targets/RISCV.cpp +++ b/clang/lib/Basic/Targets/RISCV.cpp @@ -237,22 +237,15 @@ ArrayRef<Builtin::Info> RISCVTargetInfo::getTargetBuiltins() const { static std::vector<std::string> collectNonISAExtFeature(ArrayRef<std::string> FeaturesNeedOverride, int XLen) { - auto ParseResult = - llvm::RISCVISAInfo::parseFeatures(XLen, FeaturesNeedOverride); - - if (!ParseResult) { - consumeError(ParseResult.takeError()); - return std::vector<std::string>(); - } - - std::vector<std::string> ImpliedFeatures = (*ParseResult)->toFeatureVector(); - std::vector<std::string> NonISAExtFeatureVec; + auto IsNonISAExtFeature = [](const std::string &Feature) { + assert(Feature.size() > 1 && (Feature[0] == '+' || Feature[0] == '-')); + StringRef Ext = StringRef(Feature).drop_front(); // drop the +/- + return !llvm::RISCVISAInfo::isSupportedExtensionFeature(Ext); + }; llvm::copy_if(FeaturesNeedOverride, std::back_inserter(NonISAExtFeatureVec), - [&](const std::string &Feat) { - return !llvm::is_contained(ImpliedFeatures, Feat); - }); + IsNonISAExtFeature); return NonISAExtFeatureVec; } @@ -303,7 +296,7 @@ bool RISCVTargetInfo::initFeatureMap( } // RISCVISAInfo makes implications for ISA features - std::vector<std::string> ImpliedFeatures = (*ParseResult)->toFeatureVector(); + std::vector<std::string> ImpliedFeatures = (*ParseResult)->toFeatures(); // parseFeatures normalizes the feature set by dropping any explicit // negatives, and non-extension features. We need to preserve the later @@ -420,7 +413,7 @@ static void handleFullArchString(StringRef FullArchStr, // Forward the invalid FullArchStr. Features.push_back("+" + FullArchStr.str()); } else { - std::vector<std::string> FeatStrings = (*RII)->toFeatureVector(); + std::vector<std::string> FeatStrings = (*RII)->toFeatures(); Features.insert(Features.end(), FeatStrings.begin(), FeatStrings.end()); } } diff --git a/clang/lib/Basic/Targets/X86.cpp b/clang/lib/Basic/Targets/X86.cpp index 3deaa19f8d4f..64e281b888a9 100644 --- a/clang/lib/Basic/Targets/X86.cpp +++ b/clang/lib/Basic/Targets/X86.cpp @@ -295,11 +295,13 @@ bool X86TargetInfo::handleTargetFeatures(std::vector<std::string> &Features, HasAVX512BF16 = true; } else if (Feature == "+avx512er") { HasAVX512ER = true; + Diags.Report(diag::warn_knl_knm_isa_support_removed); } else if (Feature == "+avx512fp16") { HasAVX512FP16 = true; HasLegalHalfType = true; } else if (Feature == "+avx512pf") { HasAVX512PF = true; + Diags.Report(diag::warn_knl_knm_isa_support_removed); } else if (Feature == "+avx512dq") { HasAVX512DQ = true; } else if (Feature == "+avx512bitalg") { @@ -358,6 +360,7 @@ bool X86TargetInfo::handleTargetFeatures(std::vector<std::string> &Features, HasPREFETCHI = true; } else if (Feature == "+prefetchwt1") { HasPREFETCHWT1 = true; + Diags.Report(diag::warn_knl_knm_isa_support_removed); } else if (Feature == "+clzero") { HasCLZERO = true; } else if (Feature == "+cldemote") { diff --git a/clang/lib/CodeGen/CGClass.cpp b/clang/lib/CodeGen/CGClass.cpp index d18f186ce5b4..34319381901a 100644 --- a/clang/lib/CodeGen/CGClass.cpp +++ b/clang/lib/CodeGen/CGClass.cpp @@ -856,6 +856,7 @@ void CodeGenFunction::EmitConstructorBody(FunctionArgList &Args) { EnterCXXTryStmt(*cast<CXXTryStmt>(Body), true); incrementProfileCounter(Body); + maybeCreateMCDCCondBitmap(); RunCleanupsScope RunCleanups(*this); @@ -1444,8 +1445,10 @@ void CodeGenFunction::EmitDestructorBody(FunctionArgList &Args) { } Stmt *Body = Dtor->getBody(); - if (Body) + if (Body) { incrementProfileCounter(Body); + maybeCreateMCDCCondBitmap(); + } // The call to operator delete in a deleting destructor happens // outside of the function-try-block, which means it's always @@ -1548,6 +1551,7 @@ void CodeGenFunction::emitImplicitAssignmentOperatorBody(FunctionArgList &Args) LexicalScope Scope(*this, RootCS->getSourceRange()); incrementProfileCounter(RootCS); + maybeCreateMCDCCondBitmap(); AssignmentMemcpyizer AM(*this, AssignOp, Args); for (auto *I : RootCS->body()) AM.emitAssignment(I); diff --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp index 6adf99531e30..9ec185153d12 100644 --- a/clang/lib/CodeGen/CGExprScalar.cpp +++ b/clang/lib/CodeGen/CGExprScalar.cpp @@ -4564,6 +4564,12 @@ Value *ScalarExprEmitter::VisitBinLAnd(const BinaryOperator *E) { if (LHSCondVal) { // If we have 1 && X, just emit X. CGF.incrementProfileCounter(E); + // If the top of the logical operator nest, reset the MCDC temp to 0. + if (CGF.MCDCLogOpStack.empty()) + CGF.maybeResetMCDCCondBitmap(E); + + CGF.MCDCLogOpStack.push_back(E); + Value *RHSCond = CGF.EvaluateExprAsBool(E->getRHS()); // If we're generating for profiling or coverage, generate a branch to a @@ -4572,6 +4578,7 @@ Value *ScalarExprEmitter::VisitBinLAnd(const BinaryOperator *E) { // "FalseBlock" after the increment is done. if (InstrumentRegions && CodeGenFunction::isInstrumentedCondition(E->getRHS())) { + CGF.maybeUpdateMCDCCondBitmap(E->getRHS(), RHSCond); llvm::BasicBlock *FBlock = CGF.createBasicBlock("land.end"); llvm::BasicBlock *RHSBlockCnt = CGF.createBasicBlock("land.rhscnt"); Builder.CreateCondBr(RHSCond, RHSBlockCnt, FBlock); @@ -4581,6 +4588,11 @@ Value *ScalarExprEmitter::VisitBinLAnd(const BinaryOperator *E) { CGF.EmitBlock(FBlock); } + CGF.MCDCLogOpStack.pop_back(); + // If the top of the logical operator nest, update the MCDC bitmap. + if (CGF.MCDCLogOpStack.empty()) + CGF.maybeUpdateMCDCTestVectorBitmap(E); + // ZExt result to int or bool. return Builder.CreateZExtOrBitCast(RHSCond, ResTy, "land.ext"); } @@ -4590,6 +4602,12 @@ Value *ScalarExprEmitter::VisitBinLAnd(const BinaryOperator *E) { return llvm::Constant::getNullValue(ResTy); } + // If the top of the logical operator nest, reset the MCDC temp to 0. + if (CGF.MCDCLogOpStack.empty()) + CGF.maybeResetMCDCCondBitmap(E); + + CGF.MCDCLogOpStack.push_back(E); + llvm::BasicBlock *ContBlock = CGF.createBasicBlock("land.end"); llvm::BasicBlock *RHSBlock = CGF.createBasicBlock("land.rhs"); @@ -4622,6 +4640,7 @@ Value *ScalarExprEmitter::VisitBinLAnd(const BinaryOperator *E) { // condition coverage. if (InstrumentRegions && CodeGenFunction::isInstrumentedCondition(E->getRHS())) { + CGF.maybeUpdateMCDCCondBitmap(E->getRHS(), RHSCond); llvm::BasicBlock *RHSBlockCnt = CGF.createBasicBlock("land.rhscnt"); Builder.CreateCondBr(RHSCond, RHSBlockCnt, ContBlock); CGF.EmitBlock(RHSBlockCnt); @@ -4639,6 +4658,11 @@ Value *ScalarExprEmitter::VisitBinLAnd(const BinaryOperator *E) { // Insert an entry into the phi node for the edge with the value of RHSCond. PN->addIncoming(RHSCond, RHSBlock); + CGF.MCDCLogOpStack.pop_back(); + // If the top of the logical operator nest, update the MCDC bitmap. + if (CGF.MCDCLogOpStack.empty()) + CGF.maybeUpdateMCDCTestVectorBitmap(E); + // Artificial location to preserve the scope information { auto NL = ApplyDebugLocation::CreateArtificial(CGF); @@ -4680,6 +4704,12 @@ Value *ScalarExprEmitter::VisitBinLOr(const BinaryOperator *E) { if (!LHSCondVal) { // If we have 0 || X, just emit X. CGF.incrementProfileCounter(E); + // If the top of the logical operator nest, reset the MCDC temp to 0. + if (CGF.MCDCLogOpStack.empty()) + CGF.maybeResetMCDCCondBitmap(E); + + CGF.MCDCLogOpStack.push_back(E); + Value *RHSCond = CGF.EvaluateExprAsBool(E->getRHS()); // If we're generating for profiling or coverage, generate a branch to a @@ -4688,6 +4718,7 @@ Value *ScalarExprEmitter::VisitBinLOr(const BinaryOperator *E) { // "FalseBlock" after the increment is done. if (InstrumentRegions && CodeGenFunction::isInstrumentedCondition(E->getRHS())) { + CGF.maybeUpdateMCDCCondBitmap(E->getRHS(), RHSCond); llvm::BasicBlock *FBlock = CGF.createBasicBlock("lor.end"); llvm::BasicBlock *RHSBlockCnt = CGF.createBasicBlock("lor.rhscnt"); Builder.CreateCondBr(RHSCond, FBlock, RHSBlockCnt); @@ -4697,6 +4728,11 @@ Value *ScalarExprEmitter::VisitBinLOr(const BinaryOperator *E) { CGF.EmitBlock(FBlock); } + CGF.MCDCLogOpStack.pop_back(); + // If the top of the logical operator nest, update the MCDC bitmap. + if (CGF.MCDCLogOpStack.empty()) + CGF.maybeUpdateMCDCTestVectorBitmap(E); + // ZExt result to int or bool. return Builder.CreateZExtOrBitCast(RHSCond, ResTy, "lor.ext"); } @@ -4706,6 +4742,12 @@ Value *ScalarExprEmitter::VisitBinLOr(const BinaryOperator *E) { return llvm::ConstantInt::get(ResTy, 1); } + // If the top of the logical operator nest, reset the MCDC temp to 0. + if (CGF.MCDCLogOpStack.empty()) + CGF.maybeResetMCDCCondBitmap(E); + + CGF.MCDCLogOpStack.push_back(E); + llvm::BasicBlock *ContBlock = CGF.createBasicBlock("lor.end"); llvm::BasicBlock *RHSBlock = CGF.createBasicBlock("lor.rhs"); @@ -4742,6 +4784,7 @@ Value *ScalarExprEmitter::VisitBinLOr(const BinaryOperator *E) { // condition coverage. if (InstrumentRegions && CodeGenFunction::isInstrumentedCondition(E->getRHS())) { + CGF.maybeUpdateMCDCCondBitmap(E->getRHS(), RHSCond); llvm::BasicBlock *RHSBlockCnt = CGF.createBasicBlock("lor.rhscnt"); Builder.CreateCondBr(RHSCond, ContBlock, RHSBlockCnt); CGF.EmitBlock(RHSBlockCnt); @@ -4755,6 +4798,11 @@ Value *ScalarExprEmitter::VisitBinLOr(const BinaryOperator *E) { CGF.EmitBlock(ContBlock); PN->addIncoming(RHSCond, RHSBlock); + CGF.MCDCLogOpStack.pop_back(); + // If the top of the logical operator nest, update the MCDC bitmap. + if (CGF.MCDCLogOpStack.empty()) + CGF.maybeUpdateMCDCTestVectorBitmap(E); + // ZExt result to int. return Builder.CreateZExtOrBitCast(PN, ResTy, "lor.ext"); } @@ -4899,6 +4947,10 @@ VisitAbstractConditionalOperator(const AbstractConditionalOperator *E) { return Builder.CreateSelect(CondV, LHS, RHS, "cond"); } + // If the top of the logical operator nest, reset the MCDC temp to 0. + if (CGF.MCDCLogOpStack.empty()) + CGF.maybeResetMCDCCondBitmap(condExpr); + llvm::BasicBlock *LHSBlock = CGF.createBasicBlock("cond.true"); llvm::BasicBlock *RHSBlock = CGF.createBasicBlock("cond.false"); llvm::BasicBlock *ContBlock = CGF.createBasicBlock("cond.end"); @@ -4934,6 +4986,11 @@ VisitAbstractConditionalOperator(const AbstractConditionalOperator *E) { llvm::PHINode *PN = Builder.CreatePHI(LHS->getType(), 2, "cond"); PN->addIncoming(LHS, LHSBlock); PN->addIncoming(RHS, RHSBlock); + + // If the top of the logical operator nest, update the MCDC bitmap. + if (CGF.MCDCLogOpStack.empty()) + CGF.maybeUpdateMCDCTestVectorBitmap(condExpr); + return PN; } @@ -5292,8 +5349,8 @@ static GEPOffsetAndOverflow EmitGEPOffsetInBytes(Value *BasePtr, Value *GEPVal, } else { // Otherwise this is array-like indexing. The local offset is the index // multiplied by the element size. - auto *ElementSize = llvm::ConstantInt::get( - IntPtrTy, DL.getTypeAllocSize(GTI.getIndexedType())); + auto *ElementSize = + llvm::ConstantInt::get(IntPtrTy, GTI.getSequentialElementStride(DL)); auto *IndexS = Builder.CreateIntCast(Index, IntPtrTy, /*isSigned=*/true); LocalOffset = eval(BO_Mul, ElementSize, IndexS); } diff --git a/clang/lib/CodeGen/CGObjCGNU.cpp b/clang/lib/CodeGen/CGObjCGNU.cpp index 4ca1a8cce64d..9443fecf9b79 100644 --- a/clang/lib/CodeGen/CGObjCGNU.cpp +++ b/clang/lib/CodeGen/CGObjCGNU.cpp @@ -1851,6 +1851,8 @@ class CGObjCGNUstep2 : public CGObjCGNUstep { llvm::GlobalValue::HiddenVisibility : llvm::GlobalValue::DefaultVisibility; OffsetVar->setVisibility(ivarVisibility); + if (ivarVisibility != llvm::GlobalValue::HiddenVisibility) + CGM.setGVProperties(OffsetVar, OID->getClassInterface()); ivarBuilder.add(OffsetVar); // Ivar size ivarBuilder.addInt(Int32Ty, diff --git a/clang/lib/CodeGen/CGStmt.cpp b/clang/lib/CodeGen/CGStmt.cpp index 0f79a2e861d2..b89017de0bcf 100644 --- a/clang/lib/CodeGen/CGStmt.cpp +++ b/clang/lib/CodeGen/CGStmt.cpp @@ -837,7 +837,19 @@ void CodeGenFunction::EmitIfStmt(const IfStmt &S) { if (!ThenCount && !getCurrentProfileCount() && CGM.getCodeGenOpts().OptimizationLevel) LH = Stmt::getLikelihood(S.getThen(), S.getElse()); - EmitBranchOnBoolExpr(S.getCond(), ThenBlock, ElseBlock, ThenCount, LH); + + // When measuring MC/DC, always fully evaluate the condition up front using + // EvaluateExprAsBool() so that the test vector bitmap can be updated prior to + // executing the body of the if.then or if.else. This is useful for when + // there is a 'return' within the body, but this is particularly beneficial + // when one if-stmt is nested within another if-stmt so that all of the MC/DC + // updates are kept linear and consistent. + if (!CGM.getCodeGenOpts().MCDCCoverage) + EmitBranchOnBoolExpr(S.getCond(), ThenBlock, ElseBlock, ThenCount, LH); + else { + llvm::Value *BoolCondVal = EvaluateExprAsBool(S.getCond()); + Builder.CreateCondBr(BoolCondVal, ThenBlock, ElseBlock); + } // Emit the 'then' code. EmitBlock(ThenBlock); diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp index 2199d7b58fb9..2673e4a5cee7 100644 --- a/clang/lib/CodeGen/CodeGenFunction.cpp +++ b/clang/lib/CodeGen/CodeGenFunction.cpp @@ -1256,6 +1256,7 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, QualType RetTy, void CodeGenFunction::EmitFunctionBody(const Stmt *Body) { incrementProfileCounter(Body); + maybeCreateMCDCCondBitmap(); if (const CompoundStmt *S = dyn_cast<CompoundStmt>(Body)) EmitCompoundStmtWithoutScope(*S); else @@ -1601,6 +1602,13 @@ bool CodeGenFunction::mightAddDeclToScope(const Stmt *S) { bool CodeGenFunction::ConstantFoldsToSimpleInteger(const Expr *Cond, bool &ResultBool, bool AllowLabels) { + // If MC/DC is enabled, disable folding so that we can instrument all + // conditions to yield complete test vectors. We still keep track of + // folded conditions during region mapping and visualization. + if (!AllowLabels && CGM.getCodeGenOpts().hasProfileClangInstr() && + CGM.getCodeGenOpts().MCDCCoverage) + return false; + llvm::APSInt ResultInt; if (!ConstantFoldsToSimpleInteger(Cond, ResultInt, AllowLabels)) return false; @@ -1629,16 +1637,20 @@ bool CodeGenFunction::ConstantFoldsToSimpleInteger(const Expr *Cond, return true; } +/// Strip parentheses and simplistic logical-NOT operators. +const Expr *CodeGenFunction::stripCond(const Expr *C) { + while (const UnaryOperator *Op = dyn_cast<UnaryOperator>(C->IgnoreParens())) { + if (Op->getOpcode() != UO_LNot) + break; + C = Op->getSubExpr(); + } + return C->IgnoreParens(); +} + /// Determine whether the given condition is an instrumentable condition /// (i.e. no "&&" or "||"). bool CodeGenFunction::isInstrumentedCondition(const Expr *C) { - // Bypass simplistic logical-NOT operator before determining whether the - // condition contains any other logical operator. - if (const UnaryOperator *UnOp = dyn_cast<UnaryOperator>(C->IgnoreParens())) - if (UnOp->getOpcode() == UO_LNot) - C = UnOp->getSubExpr(); - - const BinaryOperator *BOp = dyn_cast<BinaryOperator>(C->IgnoreParens()); + const BinaryOperator *BOp = dyn_cast<BinaryOperator>(stripCond(C)); return (!BOp || !BOp->isLogicalOp()); } @@ -1717,17 +1729,19 @@ void CodeGenFunction::EmitBranchToCounterBlock( /// statement) to the specified blocks. Based on the condition, this might try /// to simplify the codegen of the conditional based on the branch. /// \param LH The value of the likelihood attribute on the True branch. -void CodeGenFunction::EmitBranchOnBoolExpr(const Expr *Cond, - llvm::BasicBlock *TrueBlock, - llvm::BasicBlock *FalseBlock, - uint64_t TrueCount, - Stmt::Likelihood LH) { +/// \param ConditionalOp Used by MC/DC code coverage to track the result of the +/// ConditionalOperator (ternary) through a recursive call for the operator's +/// LHS and RHS nodes. +void CodeGenFunction::EmitBranchOnBoolExpr( + const Expr *Cond, llvm::BasicBlock *TrueBlock, llvm::BasicBlock *FalseBlock, + uint64_t TrueCount, Stmt::Likelihood LH, const Expr *ConditionalOp) { Cond = Cond->IgnoreParens(); if (const BinaryOperator *CondBOp = dyn_cast<BinaryOperator>(Cond)) { - // Handle X && Y in a condition. if (CondBOp->getOpcode() == BO_LAnd) { + MCDCLogOpStack.push_back(CondBOp); + // If we have "1 && X", simplify the code. "0 && X" would have constant // folded if the case was simple enough. bool ConstantBool = false; @@ -1735,8 +1749,10 @@ void CodeGenFunction::EmitBranchOnBoolExpr(const Expr *Cond, ConstantBool) { // br(1 && X) -> br(X). incrementProfileCounter(CondBOp); - return EmitBranchToCounterBlock(CondBOp->getRHS(), BO_LAnd, TrueBlock, - FalseBlock, TrueCount, LH); + EmitBranchToCounterBlock(CondBOp->getRHS(), BO_LAnd, TrueBlock, + FalseBlock, TrueCount, LH); + MCDCLogOpStack.pop_back(); + return; } // If we have "X && 1", simplify the code to use an uncond branch. @@ -1744,8 +1760,10 @@ void CodeGenFunction::EmitBranchOnBoolExpr(const Expr *Cond, if (ConstantFoldsToSimpleInteger(CondBOp->getRHS(), ConstantBool) && ConstantBool) { // br(X && 1) -> br(X). - return EmitBranchToCounterBlock(CondBOp->getLHS(), BO_LAnd, TrueBlock, - FalseBlock, TrueCount, LH, CondBOp); + EmitBranchToCounterBlock(CondBOp->getLHS(), BO_LAnd, TrueBlock, + FalseBlock, TrueCount, LH, CondBOp); + MCDCLogOpStack.pop_back(); + return; } // Emit the LHS as a conditional. If the LHS conditional is false, we @@ -1774,11 +1792,13 @@ void CodeGenFunction::EmitBranchOnBoolExpr(const Expr *Cond, EmitBranchToCounterBlock(CondBOp->getRHS(), BO_LAnd, TrueBlock, FalseBlock, TrueCount, LH); eval.end(*this); - + MCDCLogOpStack.pop_back(); return; } if (CondBOp->getOpcode() == BO_LOr) { + MCDCLogOpStack.push_back(CondBOp); + // If we have "0 || X", simplify the code. "1 || X" would have constant // folded if the case was simple enough. bool ConstantBool = false; @@ -1786,8 +1806,10 @@ void CodeGenFunction::EmitBranchOnBoolExpr(const Expr *Cond, !ConstantBool) { // br(0 || X) -> br(X). incrementProfileCounter(CondBOp); - return EmitBranchToCounterBlock(CondBOp->getRHS(), BO_LOr, TrueBlock, - FalseBlock, TrueCount, LH); + EmitBranchToCounterBlock(CondBOp->getRHS(), BO_LOr, TrueBlock, + FalseBlock, TrueCount, LH); + MCDCLogOpStack.pop_back(); + return; } // If we have "X || 0", simplify the code to use an uncond branch. @@ -1795,10 +1817,11 @@ void CodeGenFunction::EmitBranchOnBoolExpr(const Expr *Cond, if (ConstantFoldsToSimpleInteger(CondBOp->getRHS(), ConstantBool) && !ConstantBool) { // br(X || 0) -> br(X). - return EmitBranchToCounterBlock(CondBOp->getLHS(), BO_LOr, TrueBlock, - FalseBlock, TrueCount, LH, CondBOp); + EmitBranchToCounterBlock(CondBOp->getLHS(), BO_LOr, TrueBlock, + FalseBlock, TrueCount, LH, CondBOp); + MCDCLogOpStack.pop_back(); + return; } - // Emit the LHS as a conditional. If the LHS conditional is true, we // want to jump to the TrueBlock. llvm::BasicBlock *LHSFalse = createBasicBlock("lor.lhs.false"); @@ -1829,14 +1852,20 @@ void CodeGenFunction::EmitBranchOnBoolExpr(const Expr *Cond, RHSCount, LH); eval.end(*this); - + MCDCLogOpStack.pop_back(); return; } } if (const UnaryOperator *CondUOp = dyn_cast<UnaryOperator>(Cond)) { // br(!x, t, f) -> br(x, f, t) - if (CondUOp->getOpcode() == UO_LNot) { + // Avoid doing this optimization when instrumenting a condition for MC/DC. + // LNot is taken as part of the condition for simplicity, and changing its + // sense negatively impacts test vector tracking. + bool MCDCCondition = CGM.getCodeGenOpts().hasProfileClangInstr() && + CGM.getCodeGenOpts().MCDCCoverage && + isInstrumentedCondition(Cond); + if (CondUOp->getOpcode() == UO_LNot && !MCDCCondition) { // Negate the count. uint64_t FalseCount = getCurrentProfileCount() - TrueCount; // The values of the enum are chosen to make this negation possible. @@ -1876,14 +1905,14 @@ void CodeGenFunction::EmitBranchOnBoolExpr(const Expr *Cond, { ApplyDebugLocation DL(*this, Cond); EmitBranchOnBoolExpr(CondOp->getLHS(), TrueBlock, FalseBlock, - LHSScaledTrueCount, LH); + LHSScaledTrueCount, LH, CondOp); } cond.end(*this); cond.begin(*this); EmitBlock(RHSBlock); EmitBranchOnBoolExpr(CondOp->getRHS(), TrueBlock, FalseBlock, - TrueCount - LHSScaledTrueCount, LH); + TrueCount - LHSScaledTrueCount, LH, CondOp); cond.end(*this); return; @@ -1906,6 +1935,21 @@ void CodeGenFunction::EmitBranchOnBoolExpr(const Expr *Cond, CondV = EvaluateExprAsBool(Cond); } + // If not at the top of the logical operator nest, update MCDC temp with the + // boolean result of the evaluated condition. + if (!MCDCLogOpStack.empty()) { + const Expr *MCDCBaseExpr = Cond; + // When a nested ConditionalOperator (ternary) is encountered in a boolean + // expression, MC/DC tracks the result of the ternary, and this is tied to + // the ConditionalOperator expression and not the ternary's LHS or RHS. If + // this is the case, the ConditionalOperator expression is passed through + // the ConditionalOp parameter and then used as the MCDC base expression. + if (ConditionalOp) + MCDCBaseExpr = ConditionalOp; + + maybeUpdateMCDCCondBitmap(MCDCBaseExpr, CondV); + } + llvm::MDNode *Weights = nullptr; llvm::MDNode *Unpredictable = nullptr; diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index 751d8110b13d..07c7678df87e 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -287,6 +287,9 @@ public: /// nest would extend. SmallVector<llvm::CanonicalLoopInfo *, 4> OMPLoopNestStack; + /// Stack to track the Logical Operator recursion nest for MC/DC. + SmallVector<const BinaryOperator *, 16> MCDCLogOpStack; + /// Number of nested loop to be consumed by the last surrounding /// loop-associated directive. int ExpectedOMPLoopDepth = 0; @@ -1521,6 +1524,9 @@ private: CodeGenPGO PGO; + /// Bitmap used by MC/DC to track condition outcomes of a boolean expression. + Address MCDCCondBitmapAddr = Address::invalid(); + /// Calculate branch weights appropriate for PGO data llvm::MDNode *createProfileWeights(uint64_t TrueCount, uint64_t FalseCount) const; @@ -1539,6 +1545,52 @@ public: PGO.setCurrentStmt(S); } + bool isMCDCCoverageEnabled() const { + return (CGM.getCodeGenOpts().hasProfileClangInstr() && + CGM.getCodeGenOpts().MCDCCoverage && + !CurFn->hasFnAttribute(llvm::Attribute::NoProfile)); + } + + /// Allocate a temp value on the stack that MCDC can use to track condition + /// results. + void maybeCreateMCDCCondBitmap() { + if (isMCDCCoverageEnabled()) { + PGO.emitMCDCParameters(Builder); + MCDCCondBitmapAddr = + CreateIRTemp(getContext().UnsignedIntTy, "mcdc.addr"); + } + } + + bool isBinaryLogicalOp(const Expr *E) const { + const BinaryOperator *BOp = dyn_cast<BinaryOperator>(E->IgnoreParens()); + return (BOp && BOp->isLogicalOp()); + } + + /// Zero-init the MCDC temp value. + void maybeResetMCDCCondBitmap(const Expr *E) { + if (isMCDCCoverageEnabled() && isBinaryLogicalOp(E)) { + PGO.emitMCDCCondBitmapReset(Builder, E, MCDCCondBitmapAddr); + PGO.setCurrentStmt(E); + } + } + + /// Increment the profiler's counter for the given expression by \p StepV. + /// If \p StepV is null, the default increment is 1. + void maybeUpdateMCDCTestVectorBitmap(const Expr *E) { + if (isMCDCCoverageEnabled() && isBinaryLogicalOp(E)) { + PGO.emitMCDCTestVectorBitmapUpdate(Builder, E, MCDCCondBitmapAddr); + PGO.setCurrentStmt(E); + } + } + + /// Update the MCDC temp value with the condition's evaluated result. + void maybeUpdateMCDCCondBitmap(const Expr *E, llvm::Value *Val) { + if (isMCDCCoverageEnabled()) { + PGO.emitMCDCCondBitmapUpdate(Builder, E, MCDCCondBitmapAddr, Val); + PGO.setCurrentStmt(E); + } + } + /// Get the profiler's count for the given statement. uint64_t getProfileCount(const Stmt *S) { return PGO.getStmtCount(S).value_or(0); @@ -4626,6 +4678,9 @@ public: bool ConstantFoldsToSimpleInteger(const Expr *Cond, llvm::APSInt &Result, bool AllowLabels = false); + /// Ignore parentheses and logical-NOT to track conditions consistently. + static const Expr *stripCond(const Expr *C); + /// isInstrumentedCondition - Determine whether the given condition is an /// instrumentable condition (i.e. no "&&" or "||"). static bool isInstrumentedCondition(const Expr *C); @@ -4648,7 +4703,8 @@ public: /// evaluate to true based on PGO data. void EmitBranchOnBoolExpr(const Expr *Cond, llvm::BasicBlock *TrueBlock, llvm::BasicBlock *FalseBlock, uint64_t TrueCount, - Stmt::Likelihood LH = Stmt::LH_None); + Stmt::Likelihood LH = Stmt::LH_None, + const Expr *ConditionalOp = nullptr); /// Given an assignment `*LHS = RHS`, emit a test that checks if \p RHS is /// nonnull, if \p LHS is marked _Nonnull. diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index d78f2594a237..4fd32337cccc 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -4869,6 +4869,10 @@ CodeGenModule::GetOrCreateLLVMGlobal(StringRef MangledName, llvm::Type *Ty, isExternallyVisible(D->getLinkageAndVisibility().getLinkage())) GV->setSection(".cp.rodata"); + // Handle code model attribute + if (const auto *CMA = D->getAttr<CodeModelAttr>()) + GV->setCodeModel(CMA->getModel()); + // Check if we a have a const declaration with an initializer, we may be // able to emit it as available_externally to expose it's value to the // optimizer. diff --git a/clang/lib/CodeGen/CodeGenPGO.cpp b/clang/lib/CodeGen/CodeGenPGO.cpp index 81bf8ea696b1..d68844d476eb 100644 --- a/clang/lib/CodeGen/CodeGenPGO.cpp +++ b/clang/lib/CodeGen/CodeGenPGO.cpp @@ -161,13 +161,24 @@ struct MapRegionCounters : public RecursiveASTVisitor<MapRegionCounters> { PGOHash Hash; /// The map of statements to counters. llvm::DenseMap<const Stmt *, unsigned> &CounterMap; + /// The next bitmap byte index to assign. + unsigned NextMCDCBitmapIdx; + /// The map of statements to MC/DC bitmap coverage objects. + llvm::DenseMap<const Stmt *, unsigned> &MCDCBitmapMap; + /// Maximum number of supported MC/DC conditions in a boolean expression. + unsigned MCDCMaxCond; /// The profile version. uint64_t ProfileVersion; + /// Diagnostics Engine used to report warnings. + DiagnosticsEngine &Diag; MapRegionCounters(PGOHashVersion HashVersion, uint64_t ProfileVersion, - llvm::DenseMap<const Stmt *, unsigned> &CounterMap) + llvm::DenseMap<const Stmt *, unsigned> &CounterMap, + llvm::DenseMap<const Stmt *, unsigned> &MCDCBitmapMap, + unsigned MCDCMaxCond, DiagnosticsEngine &Diag) : NextCounter(0), Hash(HashVersion), CounterMap(CounterMap), - ProfileVersion(ProfileVersion) {} + NextMCDCBitmapIdx(0), MCDCBitmapMap(MCDCBitmapMap), + MCDCMaxCond(MCDCMaxCond), ProfileVersion(ProfileVersion), Diag(Diag) {} // Blocks and lambdas are handled as separate functions, so we need not // traverse them in the parent context. @@ -207,15 +218,126 @@ struct MapRegionCounters : public RecursiveASTVisitor<MapRegionCounters> { return Type; } + /// The following stacks are used with dataTraverseStmtPre() and + /// dataTraverseStmtPost() to track the depth of nested logical operators in a + /// boolean expression in a function. The ultimate purpose is to keep track + /// of the number of leaf-level conditions in the boolean expression so that a + /// profile bitmap can be allocated based on that number. + /// + /// The stacks are also used to find error cases and notify the user. A + /// standard logical operator nest for a boolean expression could be in a form + /// similar to this: "x = a && b && c && (d || f)" + unsigned NumCond = 0; + bool SplitNestedLogicalOp = false; + SmallVector<const Stmt *, 16> NonLogOpStack; + SmallVector<const BinaryOperator *, 16> LogOpStack; + + // Hook: dataTraverseStmtPre() is invoked prior to visiting an AST Stmt node. + bool dataTraverseStmtPre(Stmt *S) { + /// If MC/DC is not enabled, MCDCMaxCond will be set to 0. Do nothing. + if (MCDCMaxCond == 0) + return true; + + /// At the top of the logical operator nest, reset the number of conditions. + if (LogOpStack.empty()) + NumCond = 0; + + if (const Expr *E = dyn_cast<Expr>(S)) { + const BinaryOperator *BinOp = dyn_cast<BinaryOperator>(E->IgnoreParens()); + if (BinOp && BinOp->isLogicalOp()) { + /// Check for "split-nested" logical operators. This happens when a new + /// boolean expression logical-op nest is encountered within an existing + /// boolean expression, separated by a non-logical operator. For + /// example, in "x = (a && b && c && foo(d && f))", the "d && f" case + /// starts a new boolean expression that is separated from the other + /// conditions by the operator foo(). Split-nested cases are not + /// supported by MC/DC. + SplitNestedLogicalOp = SplitNestedLogicalOp || !NonLogOpStack.empty(); + + LogOpStack.push_back(BinOp); + return true; + } + } + + /// Keep track of non-logical operators. These are OK as long as we don't + /// encounter a new logical operator after seeing one. + if (!LogOpStack.empty()) + NonLogOpStack.push_back(S); + + return true; + } + + // Hook: dataTraverseStmtPost() is invoked by the AST visitor after visiting + // an AST Stmt node. MC/DC will use it to to signal when the top of a + // logical operation (boolean expression) nest is encountered. + bool dataTraverseStmtPost(Stmt *S) { + /// If MC/DC is not enabled, MCDCMaxCond will be set to 0. Do nothing. + if (MCDCMaxCond == 0) + return true; + + if (const Expr *E = dyn_cast<Expr>(S)) { + const BinaryOperator *BinOp = dyn_cast<BinaryOperator>(E->IgnoreParens()); + if (BinOp && BinOp->isLogicalOp()) { + assert(LogOpStack.back() == BinOp); + LogOpStack.pop_back(); + + /// At the top of logical operator nest: + if (LogOpStack.empty()) { + /// Was the "split-nested" logical operator case encountered? + if (SplitNestedLogicalOp) { + unsigned DiagID = Diag.getCustomDiagID( + DiagnosticsEngine::Warning, + "unsupported MC/DC boolean expression; " + "contains an operation with a nested boolean expression. " + "Expression will not be covered"); + Diag.Report(S->getBeginLoc(), DiagID); + return false; + } + + /// Was the maximum number of conditions encountered? + if (NumCond > MCDCMaxCond) { + unsigned DiagID = Diag.getCustomDiagID( + DiagnosticsEngine::Warning, + "unsupported MC/DC boolean expression; " + "number of conditions (%0) exceeds max (%1). " + "Expression will not be covered"); + Diag.Report(S->getBeginLoc(), DiagID) << NumCond << MCDCMaxCond; + return false; + } + + // Otherwise, allocate the number of bytes required for the bitmap + // based on the number of conditions. Must be at least 1-byte long. + MCDCBitmapMap[BinOp] = NextMCDCBitmapIdx; + unsigned SizeInBits = std::max<unsigned>(1L << NumCond, CHAR_BIT); + NextMCDCBitmapIdx += SizeInBits / CHAR_BIT; + } + return true; + } + } + + if (!LogOpStack.empty()) + NonLogOpStack.pop_back(); + + return true; + } + /// The RHS of all logical operators gets a fresh counter in order to count /// how many times the RHS evaluates to true or false, depending on the /// semantics of the operator. This is only valid for ">= v7" of the profile - /// version so that we facilitate backward compatibility. + /// version so that we facilitate backward compatibility. In addition, in + /// order to use MC/DC, count the number of total LHS and RHS conditions. bool VisitBinaryOperator(BinaryOperator *S) { - if (ProfileVersion >= llvm::IndexedInstrProf::Version7) - if (S->isLogicalOp() && - CodeGenFunction::isInstrumentedCondition(S->getRHS())) - CounterMap[S->getRHS()] = NextCounter++; + if (S->isLogicalOp()) { + if (CodeGenFunction::isInstrumentedCondition(S->getLHS())) + NumCond++; + + if (CodeGenFunction::isInstrumentedCondition(S->getRHS())) { + if (ProfileVersion >= llvm::IndexedInstrProf::Version7) + CounterMap[S->getRHS()] = NextCounter++; + + NumCond++; + } + } return Base::VisitBinaryOperator(S); } @@ -851,8 +973,22 @@ void CodeGenPGO::mapRegionCounters(const Decl *D) { ProfileVersion = PGOReader->getVersion(); } + // If MC/DC is enabled, set the MaxConditions to a preset value. Otherwise, + // set it to zero. This value impacts the number of conditions accepted in a + // given boolean expression, which impacts the size of the bitmap used to + // track test vector execution for that boolean expression. Because the + // bitmap scales exponentially (2^n) based on the number of conditions seen, + // the maximum value is hard-coded at 6 conditions, which is more than enough + // for most embedded applications. Setting a maximum value prevents the + // bitmap footprint from growing too large without the user's knowledge. In + // the future, this value could be adjusted with a command-line option. + unsigned MCDCMaxConditions = (CGM.getCodeGenOpts().MCDCCoverage) ? 6 : 0; + RegionCounterMap.reset(new llvm::DenseMap<const Stmt *, unsigned>); - MapRegionCounters Walker(HashVersion, ProfileVersion, *RegionCounterMap); + RegionMCDCBitmapMap.reset(new llvm::DenseMap<const Stmt *, unsigned>); + MapRegionCounters Walker(HashVersion, ProfileVersion, *RegionCounterMap, + *RegionMCDCBitmapMap, MCDCMaxConditions, + CGM.getDiags()); if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D)) Walker.TraverseDecl(const_cast<FunctionDecl *>(FD)); else if (const ObjCMethodDecl *MD = dyn_cast_or_null<ObjCMethodDecl>(D)) @@ -863,6 +999,7 @@ void CodeGenPGO::mapRegionCounters(const Decl *D) { Walker.TraverseDecl(const_cast<CapturedDecl *>(CD)); assert(Walker.NextCounter > 0 && "no entry counter mapped for decl"); NumRegionCounters = Walker.NextCounter; + MCDCBitmapBytes = Walker.NextMCDCBitmapIdx; FunctionHash = Walker.Hash.finalize(); } @@ -894,9 +1031,11 @@ void CodeGenPGO::emitCounterRegionMapping(const Decl *D) { std::string CoverageMapping; llvm::raw_string_ostream OS(CoverageMapping); - CoverageMappingGen MappingGen(*CGM.getCoverageMapping(), - CGM.getContext().getSourceManager(), - CGM.getLangOpts(), RegionCounterMap.get()); + RegionCondIDMap.reset(new llvm::DenseMap<const Stmt *, unsigned>); + CoverageMappingGen MappingGen( + *CGM.getCoverageMapping(), CGM.getContext().getSourceManager(), + CGM.getLangOpts(), RegionCounterMap.get(), RegionMCDCBitmapMap.get(), + RegionCondIDMap.get()); MappingGen.emitCounterMapping(D, OS); OS.flush(); @@ -972,6 +1111,108 @@ void CodeGenPGO::emitCounterIncrement(CGBuilderTy &Builder, const Stmt *S, ArrayRef(Args)); } +bool CodeGenPGO::canEmitMCDCCoverage(const CGBuilderTy &Builder) { + return (CGM.getCodeGenOpts().hasProfileClangInstr() && + CGM.getCodeGenOpts().MCDCCoverage && Builder.GetInsertBlock()); +} + +void CodeGenPGO::emitMCDCParameters(CGBuilderTy &Builder) { + if (!canEmitMCDCCoverage(Builder) || !RegionMCDCBitmapMap) + return; + + auto *I8PtrTy = llvm::PointerType::getUnqual(CGM.getLLVMContext()); + + // Emit intrinsic representing MCDC bitmap parameters at function entry. + // This is used by the instrumentation pass, but it isn't actually lowered to + // anything. + llvm::Value *Args[3] = {llvm::ConstantExpr::getBitCast(FuncNameVar, I8PtrTy), + Builder.getInt64(FunctionHash), + Builder.getInt32(MCDCBitmapBytes)}; + Builder.CreateCall( + CGM.getIntrinsic(llvm::Intrinsic::instrprof_mcdc_parameters), Args); +} + +void CodeGenPGO::emitMCDCTestVectorBitmapUpdate(CGBuilderTy &Builder, + const Expr *S, + Address MCDCCondBitmapAddr) { + if (!canEmitMCDCCoverage(Builder) || !RegionMCDCBitmapMap) + return; + + S = S->IgnoreParens(); + + auto ExprMCDCBitmapMapIterator = RegionMCDCBitmapMap->find(S); + if (ExprMCDCBitmapMapIterator == RegionMCDCBitmapMap->end()) + return; + + // Extract the ID of the global bitmap associated with this expression. + unsigned MCDCTestVectorBitmapID = ExprMCDCBitmapMapIterator->second; + auto *I8PtrTy = llvm::PointerType::getUnqual(CGM.getLLVMContext()); + + // Emit intrinsic responsible for updating the global bitmap corresponding to + // a boolean expression. The index being set is based on the value loaded + // from a pointer to a dedicated temporary value on the stack that is itself + // updated via emitMCDCCondBitmapReset() and emitMCDCCondBitmapUpdate(). The + // index represents an executed test vector. + llvm::Value *Args[5] = {llvm::ConstantExpr::getBitCast(FuncNameVar, I8PtrTy), + Builder.getInt64(FunctionHash), + Builder.getInt32(MCDCBitmapBytes), + Builder.getInt32(MCDCTestVectorBitmapID), + MCDCCondBitmapAddr.getPointer()}; + Builder.CreateCall( + CGM.getIntrinsic(llvm::Intrinsic::instrprof_mcdc_tvbitmap_update), Args); +} + +void CodeGenPGO::emitMCDCCondBitmapReset(CGBuilderTy &Builder, const Expr *S, + Address MCDCCondBitmapAddr) { + if (!canEmitMCDCCoverage(Builder) || !RegionMCDCBitmapMap) + return; + + S = S->IgnoreParens(); + + if (RegionMCDCBitmapMap->find(S) == RegionMCDCBitmapMap->end()) + return; + + // Emit intrinsic that resets a dedicated temporary value on the stack to 0. + Builder.CreateStore(Builder.getInt32(0), MCDCCondBitmapAddr); +} + +void CodeGenPGO::emitMCDCCondBitmapUpdate(CGBuilderTy &Builder, const Expr *S, + Address MCDCCondBitmapAddr, + llvm::Value *Val) { + if (!canEmitMCDCCoverage(Builder) || !RegionCondIDMap) + return; + + // Even though, for simplicity, parentheses and unary logical-NOT operators + // are considered part of their underlying condition for both MC/DC and + // branch coverage, the condition IDs themselves are assigned and tracked + // using the underlying condition itself. This is done solely for + // consistency since parentheses and logical-NOTs are ignored when checking + // whether the condition is actually an instrumentable condition. This can + // also make debugging a bit easier. + S = CodeGenFunction::stripCond(S); + + auto ExprMCDCConditionIDMapIterator = RegionCondIDMap->find(S); + if (ExprMCDCConditionIDMapIterator == RegionCondIDMap->end()) + return; + + // Extract the ID of the condition we are setting in the bitmap. + unsigned CondID = ExprMCDCConditionIDMapIterator->second; + assert(CondID > 0 && "Condition has no ID!"); + + auto *I8PtrTy = llvm::PointerType::getUnqual(CGM.getLLVMContext()); + + // Emit intrinsic that updates a dedicated temporary value on the stack after + // a condition is evaluated. After the set of conditions has been updated, + // the resulting value is used to update the boolean expression's bitmap. + llvm::Value *Args[5] = {llvm::ConstantExpr::getBitCast(FuncNameVar, I8PtrTy), + Builder.getInt64(FunctionHash), + Builder.getInt32(CondID - 1), + MCDCCondBitmapAddr.getPointer(), Val}; + Builder.CreateCall( + CGM.getIntrinsic(llvm::Intrinsic::instrprof_mcdc_condbitmap_update), + Args); +} + void CodeGenPGO::setValueProfilingFlag(llvm::Module &M) { if (CGM.getCodeGenOpts().hasProfileClangInstr()) M.addModuleFlag(llvm::Module::Warning, "EnableValueProfiling", diff --git a/clang/lib/CodeGen/CodeGenPGO.h b/clang/lib/CodeGen/CodeGenPGO.h index 392ec5a144fe..6596b6c35277 100644 --- a/clang/lib/CodeGen/CodeGenPGO.h +++ b/clang/lib/CodeGen/CodeGenPGO.h @@ -33,8 +33,11 @@ private: std::array <unsigned, llvm::IPVK_Last + 1> NumValueSites; unsigned NumRegionCounters; + unsigned MCDCBitmapBytes; uint64_t FunctionHash; std::unique_ptr<llvm::DenseMap<const Stmt *, unsigned>> RegionCounterMap; + std::unique_ptr<llvm::DenseMap<const Stmt *, unsigned>> RegionMCDCBitmapMap; + std::unique_ptr<llvm::DenseMap<const Stmt *, unsigned>> RegionCondIDMap; std::unique_ptr<llvm::DenseMap<const Stmt *, uint64_t>> StmtCountMap; std::unique_ptr<llvm::InstrProfRecord> ProfRecord; std::vector<uint64_t> RegionCounts; @@ -43,7 +46,8 @@ private: public: CodeGenPGO(CodeGenModule &CGModule) : CGM(CGModule), FuncNameVar(nullptr), NumValueSites({{0}}), - NumRegionCounters(0), FunctionHash(0), CurrentRegionCount(0) {} + NumRegionCounters(0), MCDCBitmapBytes(0), FunctionHash(0), + CurrentRegionCount(0) {} /// Whether or not we have PGO region data for the current function. This is /// false both when we have no data at all and when our data has been @@ -103,10 +107,18 @@ private: bool IsInMainFile); bool skipRegionMappingForDecl(const Decl *D); void emitCounterRegionMapping(const Decl *D); + bool canEmitMCDCCoverage(const CGBuilderTy &Builder); public: void emitCounterIncrement(CGBuilderTy &Builder, const Stmt *S, llvm::Value *StepV); + void emitMCDCTestVectorBitmapUpdate(CGBuilderTy &Builder, const Expr *S, + Address MCDCCondBitmapAddr); + void emitMCDCParameters(CGBuilderTy &Builder); + void emitMCDCCondBitmapReset(CGBuilderTy &Builder, const Expr *S, + Address MCDCCondBitmapAddr); + void emitMCDCCondBitmapUpdate(CGBuilderTy &Builder, const Expr *S, + Address MCDCCondBitmapAddr, llvm::Value *Val); /// Return the region count for the counter at the given index. uint64_t getRegionCount(const Stmt *S) { diff --git a/clang/lib/CodeGen/CoverageMappingGen.cpp b/clang/lib/CodeGen/CoverageMappingGen.cpp index 56411e2240e5..bf227386a71b 100644 --- a/clang/lib/CodeGen/CoverageMappingGen.cpp +++ b/clang/lib/CodeGen/CoverageMappingGen.cpp @@ -95,6 +95,8 @@ void CoverageSourceInfo::updateNextTokLoc(SourceLocation Loc) { } namespace { +using MCDCConditionID = CounterMappingRegion::MCDCConditionID; +using MCDCParameters = CounterMappingRegion::MCDCParameters; /// A region of source code that can be mapped to a counter. class SourceMappingRegion { @@ -104,6 +106,9 @@ class SourceMappingRegion { /// Secondary Counter used for Branch Regions for "False" branches. std::optional<Counter> FalseCount; + /// Parameters used for Modified Condition/Decision Coverage + MCDCParameters MCDCParams; + /// The region's starting location. std::optional<SourceLocation> LocStart; @@ -122,11 +127,18 @@ public: } SourceMappingRegion(Counter Count, std::optional<Counter> FalseCount, + MCDCParameters MCDCParams, std::optional<SourceLocation> LocStart, std::optional<SourceLocation> LocEnd, bool GapRegion = false) - : Count(Count), FalseCount(FalseCount), LocStart(LocStart), - LocEnd(LocEnd), GapRegion(GapRegion) {} + : Count(Count), FalseCount(FalseCount), MCDCParams(MCDCParams), + LocStart(LocStart), LocEnd(LocEnd), GapRegion(GapRegion) {} + + SourceMappingRegion(MCDCParameters MCDCParams, + std::optional<SourceLocation> LocStart, + std::optional<SourceLocation> LocEnd) + : MCDCParams(MCDCParams), LocStart(LocStart), LocEnd(LocEnd), + GapRegion(false) {} const Counter &getCounter() const { return Count; } @@ -163,6 +175,10 @@ public: void setGap(bool Gap) { GapRegion = Gap; } bool isBranch() const { return FalseCount.has_value(); } + + bool isMCDCDecision() const { return MCDCParams.NumConditions != 0; } + + const MCDCParameters &getMCDCParams() const { return MCDCParams; } }; /// Spelling locations for the start and end of a source region. @@ -454,8 +470,13 @@ public: SR.LineEnd, SR.ColumnEnd)); } else if (Region.isBranch()) { MappingRegions.push_back(CounterMappingRegion::makeBranchRegion( - Region.getCounter(), Region.getFalseCounter(), *CovFileID, - SR.LineStart, SR.ColumnStart, SR.LineEnd, SR.ColumnEnd)); + Region.getCounter(), Region.getFalseCounter(), + Region.getMCDCParams(), *CovFileID, SR.LineStart, SR.ColumnStart, + SR.LineEnd, SR.ColumnEnd)); + } else if (Region.isMCDCDecision()) { + MappingRegions.push_back(CounterMappingRegion::makeDecisionRegion( + Region.getMCDCParams(), *CovFileID, SR.LineStart, SR.ColumnStart, + SR.LineEnd, SR.ColumnEnd)); } else { MappingRegions.push_back(CounterMappingRegion::makeRegion( Region.getCounter(), *CovFileID, SR.LineStart, SR.ColumnStart, @@ -542,6 +563,239 @@ struct EmptyCoverageMappingBuilder : public CoverageMappingBuilder { } }; +/// A wrapper object for maintaining stacks to track the resursive AST visitor +/// walks for the purpose of assigning IDs to leaf-level conditions measured by +/// MC/DC. The object is created with a reference to the MCDCBitmapMap that was +/// created during the initial AST walk. The presence of a bitmap associated +/// with a boolean expression (top-level logical operator nest) indicates that +/// the boolean expression qualified for MC/DC. The resulting condition IDs +/// are preserved in a map reference that is also provided during object +/// creation. +struct MCDCCoverageBuilder { + + /// The AST walk recursively visits nested logical-AND or logical-OR binary + /// operator nodes and then visits their LHS and RHS children nodes. As this + /// happens, the algorithm will assign IDs to each operator's LHS and RHS side + /// as the walk moves deeper into the nest. At each level of the recursive + /// nest, the LHS and RHS may actually correspond to larger subtrees (not + /// leaf-conditions). If this is the case, when that node is visited, the ID + /// assigned to the subtree is re-assigned to its LHS, and a new ID is given + /// to its RHS. At the end of the walk, all leaf-level conditions will have a + /// unique ID -- keep in mind that the final set of IDs may not be in + /// numerical order from left to right. + /// + /// Example: "x = (A && B) || (C && D) || (D && F)" + /// + /// Visit Depth1: + /// (A && B) || (C && D) || (D && F) + /// ^-------LHS--------^ ^-RHS--^ + /// ID=1 ID=2 + /// + /// Visit LHS-Depth2: + /// (A && B) || (C && D) + /// ^-LHS--^ ^-RHS--^ + /// ID=1 ID=3 + /// + /// Visit LHS-Depth3: + /// (A && B) + /// LHS RHS + /// ID=1 ID=4 + /// + /// Visit RHS-Depth3: + /// (C && D) + /// LHS RHS + /// ID=3 ID=5 + /// + /// Visit RHS-Depth2: (D && F) + /// LHS RHS + /// ID=2 ID=6 + /// + /// Visit Depth1: + /// (A && B) || (C && D) || (D && F) + /// ID=1 ID=4 ID=3 ID=5 ID=2 ID=6 + /// + /// A node ID of '0' always means MC/DC isn't being tracked. + /// + /// As the AST walk proceeds recursively, the algorithm will also use stacks + /// to track the IDs of logical-AND and logical-OR operations on the RHS so + /// that it can be determined which nodes are executed next, depending on how + /// a LHS or RHS of a logical-AND or logical-OR is evaluated. This + /// information relies on the assigned IDs and are embedded within the + /// coverage region IDs of each branch region associated with a leaf-level + /// condition. This information helps the visualization tool reconstruct all + /// possible test vectors for the purposes of MC/DC analysis. if a "next" node + /// ID is '0', it means it's the end of the test vector. The following rules + /// are used: + /// + /// For logical-AND ("LHS && RHS"): + /// - If LHS is TRUE, execution goes to the RHS node. + /// - If LHS is FALSE, execution goes to the LHS node of the next logical-OR. + /// If that does not exist, execution exits (ID == 0). + /// + /// - If RHS is TRUE, execution goes to LHS node of the next logical-AND. + /// If that does not exist, execution exits (ID == 0). + /// - If RHS is FALSE, execution goes to the LHS node of the next logical-OR. + /// If that does not exist, execution exits (ID == 0). + /// + /// For logical-OR ("LHS || RHS"): + /// - If LHS is TRUE, execution goes to the LHS node of the next logical-AND. + /// If that does not exist, execution exits (ID == 0). + /// - If LHS is FALSE, execution goes to the RHS node. + /// + /// - If RHS is TRUE, execution goes to LHS node of the next logical-AND. + /// If that does not exist, execution exits (ID == 0). + /// - If RHS is FALSE, execution goes to the LHS node of the next logical-OR. + /// If that does not exist, execution exits (ID == 0). + /// + /// Finally, the condition IDs are also used when instrumenting the code to + /// indicate a unique offset into a temporary bitmap that represents the true + /// or false evaluation of that particular condition. + /// + /// NOTE regarding the use of CodeGenFunction::stripCond(). Even though, for + /// simplicity, parentheses and unary logical-NOT operators are considered + /// part of their underlying condition for both MC/DC and branch coverage, the + /// condition IDs themselves are assigned and tracked using the underlying + /// condition itself. This is done solely for consistency since parentheses + /// and logical-NOTs are ignored when checking whether the condition is + /// actually an instrumentable condition. This can also make debugging a bit + /// easier. + +private: + CodeGenModule &CGM; + + llvm::SmallVector<MCDCConditionID> AndRHS; + llvm::SmallVector<MCDCConditionID> OrRHS; + llvm::SmallVector<const BinaryOperator *> NestLevel; + llvm::DenseMap<const Stmt *, MCDCConditionID> &CondIDs; + llvm::DenseMap<const Stmt *, unsigned> &MCDCBitmapMap; + MCDCConditionID NextID = 1; + bool NotMapped = false; + + /// Is this a logical-AND operation? + bool isLAnd(const BinaryOperator *E) const { + return E->getOpcode() == BO_LAnd; + } + + /// Push an ID onto the corresponding RHS stack. + void pushRHS(const BinaryOperator *E) { + llvm::SmallVector<MCDCConditionID> &rhs = isLAnd(E) ? AndRHS : OrRHS; + rhs.push_back(CondIDs[CodeGenFunction::stripCond(E->getRHS())]); + } + + /// Pop an ID from the corresponding RHS stack. + void popRHS(const BinaryOperator *E) { + llvm::SmallVector<MCDCConditionID> &rhs = isLAnd(E) ? AndRHS : OrRHS; + if (!rhs.empty()) + rhs.pop_back(); + } + + /// If the expected ID is on top, pop it off the corresponding RHS stack. + void popRHSifTop(const BinaryOperator *E) { + if (!OrRHS.empty() && CondIDs[E] == OrRHS.back()) + OrRHS.pop_back(); + else if (!AndRHS.empty() && CondIDs[E] == AndRHS.back()) + AndRHS.pop_back(); + } + +public: + MCDCCoverageBuilder(CodeGenModule &CGM, + llvm::DenseMap<const Stmt *, MCDCConditionID> &CondIDMap, + llvm::DenseMap<const Stmt *, unsigned> &MCDCBitmapMap) + : CGM(CGM), CondIDs(CondIDMap), MCDCBitmapMap(MCDCBitmapMap) {} + + /// Return the ID of the RHS of the next, upper nest-level logical-OR. + MCDCConditionID getNextLOrCondID() const { + return OrRHS.empty() ? 0 : OrRHS.back(); + } + + /// Return the ID of the RHS of the next, upper nest-level logical-AND. + MCDCConditionID getNextLAndCondID() const { + return AndRHS.empty() ? 0 : AndRHS.back(); + } + + /// Return the ID of a given condition. + MCDCConditionID getCondID(const Expr *Cond) const { + auto I = CondIDs.find(CodeGenFunction::stripCond(Cond)); + if (I == CondIDs.end()) + return 0; + else + return I->second; + } + + /// Push the binary operator statement to track the nest level and assign IDs + /// to the operator's LHS and RHS. The RHS may be a larger subtree that is + /// broken up on successive levels. + void pushAndAssignIDs(const BinaryOperator *E) { + if (!CGM.getCodeGenOpts().MCDCCoverage) + return; + + // If binary expression is disqualified, don't do mapping. + if (NestLevel.empty() && MCDCBitmapMap.find(CodeGenFunction::stripCond( + E)) == MCDCBitmapMap.end()) + NotMapped = true; + + // Push Stmt on 'NestLevel' stack to keep track of nest location. + NestLevel.push_back(E); + + // Don't go any further if we don't need to map condition IDs. + if (NotMapped) + return; + + // If the operator itself has an assigned ID, this means it represents a + // larger subtree. In this case, pop its ID out of the RHS stack and + // assign that ID to its LHS node. Its RHS will receive a new ID. + if (CondIDs.find(CodeGenFunction::stripCond(E)) != CondIDs.end()) { + // If Stmt has an ID, assign its ID to LHS + CondIDs[CodeGenFunction::stripCond(E->getLHS())] = CondIDs[E]; + + // Since the operator's LHS assumes the operator's same ID, pop the + // operator from the RHS stack so that if LHS short-circuits, it won't be + // incorrectly re-used as the node executed next. + popRHSifTop(E); + } else { + // Otherwise, assign ID+1 to LHS. + CondIDs[CodeGenFunction::stripCond(E->getLHS())] = NextID++; + } + + // Assign ID+1 to RHS. + CondIDs[CodeGenFunction::stripCond(E->getRHS())] = NextID++; + + // Push ID of Stmt's RHS so that LHS nodes know about it + pushRHS(E); + } + + /// Pop the binary operator from the next level. If the walk is at the top of + /// the next, assign the total number of conditions. + unsigned popAndReturnCondCount(const BinaryOperator *E) { + if (!CGM.getCodeGenOpts().MCDCCoverage) + return 0; + + unsigned TotalConds = 0; + + // Pop Stmt from 'NestLevel' stack. + assert(NestLevel.back() == E); + NestLevel.pop_back(); + + // Reset state if not doing mapping. + if (NestLevel.empty() && NotMapped) { + NotMapped = false; + return 0; + } + + // Pop RHS ID. + popRHS(E); + + // If at the parent (NestLevel=0), set conds and reset. + if (NestLevel.empty()) { + TotalConds = NextID - 1; + + // Reset ID back to beginning. + NextID = 1; + } + return TotalConds; + } +}; + /// A StmtVisitor that creates coverage mapping regions which map /// from the source code locations to the PGO counters. struct CounterCoverageMappingBuilder @@ -550,8 +804,14 @@ struct CounterCoverageMappingBuilder /// The map of statements to count values. llvm::DenseMap<const Stmt *, unsigned> &CounterMap; + /// The map of statements to bitmap coverage object values. + llvm::DenseMap<const Stmt *, unsigned> &MCDCBitmapMap; + /// A stack of currently live regions. - std::vector<SourceMappingRegion> RegionStack; + llvm::SmallVector<SourceMappingRegion> RegionStack; + + /// An object to manage MCDC regions. + MCDCCoverageBuilder MCDCBuilder; CounterExpressionBuilder Builder; @@ -589,6 +849,8 @@ struct CounterCoverageMappingBuilder return Counter::getCounter(CounterMap[S]); } + unsigned getRegionBitmap(const Stmt *S) { return MCDCBitmapMap[S]; } + /// Push a region onto the stack. /// /// Returns the index on the stack where the region was pushed. This can be @@ -596,7 +858,9 @@ struct CounterCoverageMappingBuilder size_t pushRegion(Counter Count, std::optional<SourceLocation> StartLoc = std::nullopt, std::optional<SourceLocation> EndLoc = std::nullopt, - std::optional<Counter> FalseCount = std::nullopt) { + std::optional<Counter> FalseCount = std::nullopt, + MCDCConditionID ID = 0, MCDCConditionID TrueID = 0, + MCDCConditionID FalseID = 0) { if (StartLoc && !FalseCount) { MostRecentLocation = *StartLoc; @@ -615,7 +879,19 @@ struct CounterCoverageMappingBuilder StartLoc = std::nullopt; if (EndLoc && EndLoc->isInvalid()) EndLoc = std::nullopt; - RegionStack.emplace_back(Count, FalseCount, StartLoc, EndLoc); + RegionStack.emplace_back(Count, FalseCount, + MCDCParameters{0, 0, ID, TrueID, FalseID}, + StartLoc, EndLoc); + + return RegionStack.size() - 1; + } + + size_t pushRegion(unsigned BitmapIdx, unsigned Conditions, + std::optional<SourceLocation> StartLoc = std::nullopt, + std::optional<SourceLocation> EndLoc = std::nullopt) { + + RegionStack.emplace_back(MCDCParameters{BitmapIdx, Conditions}, StartLoc, + EndLoc); return RegionStack.size() - 1; } @@ -746,7 +1022,9 @@ struct CounterCoverageMappingBuilder /// and add it to the function's SourceRegions. A branch region tracks a /// "True" counter and a "False" counter for boolean expressions that /// result in the generation of a branch. - void createBranchRegion(const Expr *C, Counter TrueCnt, Counter FalseCnt) { + void createBranchRegion(const Expr *C, Counter TrueCnt, Counter FalseCnt, + MCDCConditionID ID = 0, MCDCConditionID TrueID = 0, + MCDCConditionID FalseID = 0) { // Check for NULL conditions. if (!C) return; @@ -764,13 +1042,21 @@ struct CounterCoverageMappingBuilder // CodeGenFunction.c always returns false, but that is very heavy-handed. if (ConditionFoldsToBool(C)) popRegions(pushRegion(Counter::getZero(), getStart(C), getEnd(C), - Counter::getZero())); + Counter::getZero(), ID, TrueID, FalseID)); else // Otherwise, create a region with the True counter and False counter. - popRegions(pushRegion(TrueCnt, getStart(C), getEnd(C), FalseCnt)); + popRegions(pushRegion(TrueCnt, getStart(C), getEnd(C), FalseCnt, ID, + TrueID, FalseID)); } } + /// Create a Decision Region with a BitmapIdx and number of Conditions. This + /// type of region "contains" branch regions, one for each of the conditions. + /// The visualization tool will group everything together. + void createDecisionRegion(const Expr *C, unsigned BitmapIdx, unsigned Conds) { + popRegions(pushRegion(BitmapIdx, Conds, getStart(C), getEnd(C))); + } + /// Create a Branch Region around a SwitchCase for code coverage /// and add it to the function's SourceRegions. void createSwitchCaseRegion(const SwitchCase *SC, Counter TrueCnt, @@ -851,8 +1137,12 @@ struct CounterCoverageMappingBuilder // we've seen this region. if (StartLocs.insert(Loc).second) { if (I.isBranch()) - SourceRegions.emplace_back(I.getCounter(), I.getFalseCounter(), Loc, - getEndOfFileOrMacro(Loc), I.isBranch()); + SourceRegions.emplace_back( + I.getCounter(), I.getFalseCounter(), + MCDCParameters{0, 0, I.getMCDCParams().ID, + I.getMCDCParams().TrueID, + I.getMCDCParams().FalseID}, + Loc, getEndOfFileOrMacro(Loc), I.isBranch()); else SourceRegions.emplace_back(I.getCounter(), Loc, getEndOfFileOrMacro(Loc)); @@ -971,9 +1261,13 @@ struct CounterCoverageMappingBuilder CounterCoverageMappingBuilder( CoverageMappingModuleGen &CVM, - llvm::DenseMap<const Stmt *, unsigned> &CounterMap, SourceManager &SM, - const LangOptions &LangOpts) - : CoverageMappingBuilder(CVM, SM, LangOpts), CounterMap(CounterMap) {} + llvm::DenseMap<const Stmt *, unsigned> &CounterMap, + llvm::DenseMap<const Stmt *, unsigned> &MCDCBitmapMap, + llvm::DenseMap<const Stmt *, MCDCConditionID> &CondIDMap, + SourceManager &SM, const LangOptions &LangOpts) + : CoverageMappingBuilder(CVM, SM, LangOpts), CounterMap(CounterMap), + MCDCBitmapMap(MCDCBitmapMap), + MCDCBuilder(CVM.getCodeGenModule(), CondIDMap, MCDCBitmapMap) {} /// Write the mapping data to the output stream void write(llvm::raw_ostream &OS) { @@ -1519,6 +1813,9 @@ struct CounterCoverageMappingBuilder } void VisitBinLAnd(const BinaryOperator *E) { + // Keep track of Binary Operator and assign MCDC condition IDs + MCDCBuilder.pushAndAssignIDs(E); + extendRegion(E->getLHS()); propagateCounts(getRegion().getCounter(), E->getLHS()); handleFileExit(getEnd(E->getLHS())); @@ -1527,6 +1824,11 @@ struct CounterCoverageMappingBuilder extendRegion(E->getRHS()); propagateCounts(getRegionCounter(E), E->getRHS()); + // Process Binary Operator and create MCDC Decision Region if top-level + unsigned NumConds = 0; + if ((NumConds = MCDCBuilder.popAndReturnCondCount(E))) + createDecisionRegion(E, getRegionBitmap(E), NumConds); + // Extract the RHS's Execution Counter. Counter RHSExecCnt = getRegionCounter(E); @@ -1536,13 +1838,30 @@ struct CounterCoverageMappingBuilder // Extract the Parent Region Counter. Counter ParentCnt = getRegion().getCounter(); + // Extract the MCDC condition IDs (returns 0 if not needed). + MCDCConditionID NextOrID = MCDCBuilder.getNextLOrCondID(); + MCDCConditionID NextAndID = MCDCBuilder.getNextLAndCondID(); + MCDCConditionID LHSid = MCDCBuilder.getCondID(E->getLHS()); + MCDCConditionID RHSid = MCDCBuilder.getCondID(E->getRHS()); + // Create Branch Region around LHS condition. + // MC/DC: For "LHS && RHS" + // - If LHS is TRUE, execution goes to the RHS. + // - If LHS is FALSE, execution goes to the LHS of the next logical-OR. + // If that does not exist, execution exits (ID == 0). createBranchRegion(E->getLHS(), RHSExecCnt, - subtractCounters(ParentCnt, RHSExecCnt)); + subtractCounters(ParentCnt, RHSExecCnt), LHSid, RHSid, + NextOrID); // Create Branch Region around RHS condition. + // MC/DC: For "LHS && RHS" + // - If RHS is TRUE, execution goes to LHS of the next logical-AND. + // If that does not exist, execution exits (ID == 0). + // - If RHS is FALSE, execution goes to the LHS of the next logical-OR. + // If that does not exist, execution exits (ID == 0). createBranchRegion(E->getRHS(), RHSTrueCnt, - subtractCounters(RHSExecCnt, RHSTrueCnt)); + subtractCounters(RHSExecCnt, RHSTrueCnt), RHSid, + NextAndID, NextOrID); } // Determine whether the right side of OR operation need to be visited. @@ -1556,6 +1875,9 @@ struct CounterCoverageMappingBuilder } void VisitBinLOr(const BinaryOperator *E) { + // Keep track of Binary Operator and assign MCDC condition IDs + MCDCBuilder.pushAndAssignIDs(E); + extendRegion(E->getLHS()); Counter OutCount = propagateCounts(getRegion().getCounter(), E->getLHS()); handleFileExit(getEnd(E->getLHS())); @@ -1564,6 +1886,11 @@ struct CounterCoverageMappingBuilder extendRegion(E->getRHS()); propagateCounts(getRegionCounter(E), E->getRHS()); + // Process Binary Operator and create MCDC Decision Region if top-level + unsigned NumConds = 0; + if ((NumConds = MCDCBuilder.popAndReturnCondCount(E))) + createDecisionRegion(E, getRegionBitmap(E), NumConds); + // Extract the RHS's Execution Counter. Counter RHSExecCnt = getRegionCounter(E); @@ -1577,13 +1904,28 @@ struct CounterCoverageMappingBuilder // Extract the Parent Region Counter. Counter ParentCnt = getRegion().getCounter(); + // Extract the MCDC condition IDs (returns 0 if not needed). + MCDCConditionID NextOrID = MCDCBuilder.getNextLOrCondID(); + MCDCConditionID NextAndID = MCDCBuilder.getNextLAndCondID(); + MCDCConditionID LHSid = MCDCBuilder.getCondID(E->getLHS()); + MCDCConditionID RHSid = MCDCBuilder.getCondID(E->getRHS()); + // Create Branch Region around LHS condition. + // MC/DC: For "LHS || RHS" + // - If LHS is TRUE, execution goes to the LHS of the next logical-AND. + // If that does not exist, execution exits (ID == 0). + // - If LHS is FALSE, execution goes to the RHS. createBranchRegion(E->getLHS(), subtractCounters(ParentCnt, RHSExecCnt), - RHSExecCnt); + RHSExecCnt, LHSid, NextAndID, RHSid); // Create Branch Region around RHS condition. + // MC/DC: For "LHS || RHS" + // - If RHS is TRUE, execution goes to LHS of the next logical-AND. + // If that does not exist, execution exits (ID == 0). + // - If RHS is FALSE, execution goes to the LHS of the next logical-OR. + // If that does not exist, execution exits (ID == 0). createBranchRegion(E->getRHS(), subtractCounters(RHSExecCnt, RHSFalseCnt), - RHSFalseCnt); + RHSFalseCnt, RHSid, NextAndID, NextOrID); } void VisitLambdaExpr(const LambdaExpr *LE) { @@ -1633,11 +1975,23 @@ static void dump(llvm::raw_ostream &OS, StringRef FunctionName, OS << "File " << R.FileID << ", " << R.LineStart << ":" << R.ColumnStart << " -> " << R.LineEnd << ":" << R.ColumnEnd << " = "; - Ctx.dump(R.Count, OS); - if (R.Kind == CounterMappingRegion::BranchRegion) { - OS << ", "; - Ctx.dump(R.FalseCount, OS); + if (R.Kind == CounterMappingRegion::MCDCDecisionRegion) { + OS << "M:" << R.MCDCParams.BitmapIdx; + OS << ", C:" << R.MCDCParams.NumConditions; + } else { + Ctx.dump(R.Count, OS); + + if (R.Kind == CounterMappingRegion::BranchRegion || + R.Kind == CounterMappingRegion::MCDCBranchRegion) { + OS << ", "; + Ctx.dump(R.FalseCount, OS); + } + } + + if (R.Kind == CounterMappingRegion::MCDCBranchRegion) { + OS << " [" << R.MCDCParams.ID << "," << R.MCDCParams.TrueID; + OS << "," << R.MCDCParams.FalseID << "] "; } if (R.Kind == CounterMappingRegion::ExpansionRegion) @@ -1846,8 +2200,9 @@ unsigned CoverageMappingModuleGen::getFileID(FileEntryRef File) { void CoverageMappingGen::emitCounterMapping(const Decl *D, llvm::raw_ostream &OS) { - assert(CounterMap); - CounterCoverageMappingBuilder Walker(CVM, *CounterMap, SM, LangOpts); + assert(CounterMap && MCDCBitmapMap); + CounterCoverageMappingBuilder Walker(CVM, *CounterMap, *MCDCBitmapMap, + *CondIDMap, SM, LangOpts); Walker.VisitDecl(D); Walker.write(OS); } diff --git a/clang/lib/CodeGen/CoverageMappingGen.h b/clang/lib/CodeGen/CoverageMappingGen.h index 77d7c6cd87cf..62cea173c9fc 100644 --- a/clang/lib/CodeGen/CoverageMappingGen.h +++ b/clang/lib/CodeGen/CoverageMappingGen.h @@ -150,16 +150,22 @@ class CoverageMappingGen { SourceManager &SM; const LangOptions &LangOpts; llvm::DenseMap<const Stmt *, unsigned> *CounterMap; + llvm::DenseMap<const Stmt *, unsigned> *MCDCBitmapMap; + llvm::DenseMap<const Stmt *, unsigned> *CondIDMap; public: CoverageMappingGen(CoverageMappingModuleGen &CVM, SourceManager &SM, const LangOptions &LangOpts) - : CVM(CVM), SM(SM), LangOpts(LangOpts), CounterMap(nullptr) {} + : CVM(CVM), SM(SM), LangOpts(LangOpts), CounterMap(nullptr), + MCDCBitmapMap(nullptr), CondIDMap(nullptr) {} CoverageMappingGen(CoverageMappingModuleGen &CVM, SourceManager &SM, const LangOptions &LangOpts, - llvm::DenseMap<const Stmt *, unsigned> *CounterMap) - : CVM(CVM), SM(SM), LangOpts(LangOpts), CounterMap(CounterMap) {} + llvm::DenseMap<const Stmt *, unsigned> *CounterMap, + llvm::DenseMap<const Stmt *, unsigned> *MCDCBitmapMap, + llvm::DenseMap<const Stmt *, unsigned> *CondIDMap) + : CVM(CVM), SM(SM), LangOpts(LangOpts), CounterMap(CounterMap), + MCDCBitmapMap(MCDCBitmapMap), CondIDMap(CondIDMap) {} /// Emit the coverage mapping data which maps the regions of /// code to counters that will be used to find the execution diff --git a/clang/lib/CodeGen/Targets/X86.cpp b/clang/lib/CodeGen/Targets/X86.cpp index 2af240350438..d053f41ab168 100644 --- a/clang/lib/CodeGen/Targets/X86.cpp +++ b/clang/lib/CodeGen/Targets/X86.cpp @@ -1797,6 +1797,9 @@ void X86_64ABIInfo::classify(QualType Ty, uint64_t OffsetBase, Class &Lo, } else if (k == BuiltinType::Float || k == BuiltinType::Double || k == BuiltinType::Float16 || k == BuiltinType::BFloat16) { Current = SSE; + } else if (k == BuiltinType::Float128) { + Lo = SSE; + Hi = SSEUp; } else if (k == BuiltinType::LongDouble) { const llvm::fltSemantics *LDF = &getTarget().getLongDoubleFormat(); if (LDF == &llvm::APFloat::IEEEquad()) { diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index 9b2f2a374809..1889ea28079d 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -1430,6 +1430,17 @@ Compilation *Driver::BuildCompilation(ArrayRef<const char *> ArgList) { const ToolChain &TC = getToolChain( *UArgs, computeTargetTriple(*this, TargetTriple, *UArgs)); + if (TC.getTriple().isAndroid()) { + llvm::Triple Triple = TC.getTriple(); + StringRef TripleVersionName = Triple.getEnvironmentVersionString(); + + if (Triple.getEnvironmentVersion().empty() && TripleVersionName != "") { + Diags.Report(diag::err_drv_triple_version_invalid) + << TripleVersionName << TC.getTripleString(); + ContainsError = true; + } + } + // Report warning when arm64EC option is overridden by specified target if ((TC.getTriple().getArch() != llvm::Triple::aarch64 || TC.getTriple().getSubArch() != llvm::Triple::AArch64SubArch_arm64ec) && diff --git a/clang/lib/Driver/ToolChains/Arch/Mips.cpp b/clang/lib/Driver/ToolChains/Arch/Mips.cpp index f9f14c01b2b9..fe9d112b8800 100644 --- a/clang/lib/Driver/ToolChains/Arch/Mips.cpp +++ b/clang/lib/Driver/ToolChains/Arch/Mips.cpp @@ -221,6 +221,7 @@ void mips::getMIPSTargetFeatures(const Driver &D, const llvm::Triple &Triple, bool IsN64 = ABIName == "64"; bool IsPIC = false; bool NonPIC = false; + bool HasNaN2008Opt = false; Arg *LastPICArg = Args.getLastArg(options::OPT_fPIC, options::OPT_fno_PIC, options::OPT_fpic, options::OPT_fno_pic, @@ -285,9 +286,10 @@ void mips::getMIPSTargetFeatures(const Driver &D, const llvm::Triple &Triple, if (Arg *A = Args.getLastArg(options::OPT_mnan_EQ)) { StringRef Val = StringRef(A->getValue()); if (Val == "2008") { - if (mips::getIEEE754Standard(CPUName) & mips::Std2008) + if (mips::getIEEE754Standard(CPUName) & mips::Std2008) { Features.push_back("+nan2008"); - else { + HasNaN2008Opt = true; + } else { Features.push_back("-nan2008"); D.Diag(diag::warn_target_unsupported_nan2008) << CPUName; } @@ -323,6 +325,8 @@ void mips::getMIPSTargetFeatures(const Driver &D, const llvm::Triple &Triple, D.Diag(diag::err_drv_unsupported_option_argument) << A->getSpelling() << Val; } + } else if (HasNaN2008Opt) { + Features.push_back("+abs2008"); } AddTargetFeature(Args, Features, options::OPT_msingle_float, diff --git a/clang/lib/Driver/ToolChains/Arch/RISCV.cpp b/clang/lib/Driver/ToolChains/Arch/RISCV.cpp index 0717e3b813e1..16a8b3cc42ba 100644 --- a/clang/lib/Driver/ToolChains/Arch/RISCV.cpp +++ b/clang/lib/Driver/ToolChains/Arch/RISCV.cpp @@ -42,9 +42,9 @@ static bool getArchFeatures(const Driver &D, StringRef Arch, return false; } - (*ISAInfo)->toFeatures( - Features, [&Args](const Twine &Str) { return Args.MakeArgString(Str); }, - /*AddAllExtensions=*/true); + for (const std::string &Str : (*ISAInfo)->toFeatures(/*AddAllExtension=*/true, + /*IgnoreUnknown=*/false)) + Features.push_back(Args.MakeArgString(Str)); if (EnableExperimentalExtensions) Features.push_back(Args.MakeArgString("+experimental")); diff --git a/clang/lib/Driver/ToolChains/BareMetal.cpp b/clang/lib/Driver/ToolChains/BareMetal.cpp index 42c8336e626c..391c47f88bde 100644 --- a/clang/lib/Driver/ToolChains/BareMetal.cpp +++ b/clang/lib/Driver/ToolChains/BareMetal.cpp @@ -293,9 +293,8 @@ void BareMetal::addClangTargetOptions(const ArgList &DriverArgs, void BareMetal::AddClangCXXStdlibIncludeArgs(const ArgList &DriverArgs, ArgStringList &CC1Args) const { - if (DriverArgs.hasArg(options::OPT_nostdinc) || - DriverArgs.hasArg(options::OPT_nostdlibinc) || - DriverArgs.hasArg(options::OPT_nostdincxx)) + if (DriverArgs.hasArg(options::OPT_nostdinc, options::OPT_nostdlibinc, + options::OPT_nostdincxx)) return; const Driver &D = getDriver(); diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index acfa11980506..2d8ef841d4f6 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -698,6 +698,17 @@ static void addPGOAndCoverageFlags(const ToolChain &TC, Compilation &C, CmdArgs.push_back("-fcoverage-mapping"); } + if (Args.hasFlag(options::OPT_fmcdc_coverage, options::OPT_fno_mcdc_coverage, + false)) { + if (!Args.hasFlag(options::OPT_fcoverage_mapping, + options::OPT_fno_coverage_mapping, false)) + D.Diag(clang::diag::err_drv_argument_only_allowed_with) + << "-fcoverage-mcdc" + << "-fcoverage-mapping"; + + CmdArgs.push_back("-fcoverage-mcdc"); + } + if (Arg *A = Args.getLastArg(options::OPT_ffile_compilation_dir_EQ, options::OPT_fcoverage_compilation_dir_EQ)) { if (A->getOption().matches(options::OPT_ffile_compilation_dir_EQ)) diff --git a/clang/lib/Driver/ToolChains/Gnu.cpp b/clang/lib/Driver/ToolChains/Gnu.cpp index a610a94a39a2..24681dfdc99c 100644 --- a/clang/lib/Driver/ToolChains/Gnu.cpp +++ b/clang/lib/Driver/ToolChains/Gnu.cpp @@ -2251,6 +2251,15 @@ void Generic_GCC::GCCInstallationDetector::init( return; } + // If --gcc-triple is specified use this instead of trying to + // auto-detect a triple. + if (const Arg *A = + Args.getLastArg(clang::driver::options::OPT_gcc_triple_EQ)) { + StringRef GCCTriple = A->getValue(); + CandidateTripleAliases.clear(); + CandidateTripleAliases.push_back(GCCTriple); + } + // Compute the set of prefixes for our search. SmallVector<std::string, 8> Prefixes; StringRef GCCToolchainDir = getGCCToolchainDir(Args, D.SysRoot); diff --git a/clang/lib/Driver/ToolChains/MinGW.cpp b/clang/lib/Driver/ToolChains/MinGW.cpp index 65512f16357d..18fc9d4b6807 100644 --- a/clang/lib/Driver/ToolChains/MinGW.cpp +++ b/clang/lib/Driver/ToolChains/MinGW.cpp @@ -471,12 +471,23 @@ findClangRelativeSysroot(const Driver &D, const llvm::Triple &LiteralTriple, return make_error_code(std::errc::no_such_file_or_directory); } +static bool looksLikeMinGWSysroot(const std::string &Directory) { + StringRef Sep = llvm::sys::path::get_separator(); + if (!llvm::sys::fs::exists(Directory + Sep + "include" + Sep + "_mingw.h")) + return false; + if (!llvm::sys::fs::exists(Directory + Sep + "lib" + Sep + "libkernel32.a")) + return false; + return true; +} + toolchains::MinGW::MinGW(const Driver &D, const llvm::Triple &Triple, const ArgList &Args) : ToolChain(D, Triple, Args), CudaInstallation(D, Triple, Args), RocmInstallation(D, Triple, Args) { getProgramPaths().push_back(getDriver().getInstalledDir()); + std::string InstallBase = + std::string(llvm::sys::path::parent_path(getDriver().getInstalledDir())); // The sequence for detecting a sysroot here should be kept in sync with // the testTriple function below. llvm::Triple LiteralTriple = getLiteralTriple(D, getTriple()); @@ -487,13 +498,17 @@ toolchains::MinGW::MinGW(const Driver &D, const llvm::Triple &Triple, else if (llvm::ErrorOr<std::string> TargetSubdir = findClangRelativeSysroot( getDriver(), LiteralTriple, getTriple(), SubdirName)) Base = std::string(llvm::sys::path::parent_path(TargetSubdir.get())); + // If the install base of Clang seems to have mingw sysroot files directly + // in the toplevel include and lib directories, use this as base instead of + // looking for a triple prefixed GCC in the path. + else if (looksLikeMinGWSysroot(InstallBase)) + Base = InstallBase; else if (llvm::ErrorOr<std::string> GPPName = findGcc(LiteralTriple, getTriple())) Base = std::string(llvm::sys::path::parent_path( llvm::sys::path::parent_path(GPPName.get()))); else - Base = std::string( - llvm::sys::path::parent_path(getDriver().getInstalledDir())); + Base = InstallBase; Base += llvm::sys::path::get_separator(); findGccLibDir(LiteralTriple); @@ -778,9 +793,15 @@ static bool testTriple(const Driver &D, const llvm::Triple &Triple, if (D.SysRoot.size()) return true; llvm::Triple LiteralTriple = getLiteralTriple(D, Triple); + std::string InstallBase = + std::string(llvm::sys::path::parent_path(D.getInstalledDir())); if (llvm::ErrorOr<std::string> TargetSubdir = findClangRelativeSysroot(D, LiteralTriple, Triple, SubdirName)) return true; + // If the install base itself looks like a mingw sysroot, we'll use that + // - don't use any potentially unrelated gcc to influence what triple to use. + if (looksLikeMinGWSysroot(InstallBase)) + return false; if (llvm::ErrorOr<std::string> GPPName = findGcc(LiteralTriple, Triple)) return true; // If we neither found a colocated sysroot or a matching gcc executable, diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp index 3ac3aa3c5e3a..8b43438c72df 100644 --- a/clang/lib/Format/TokenAnnotator.cpp +++ b/clang/lib/Format/TokenAnnotator.cpp @@ -5151,6 +5151,14 @@ bool TokenAnnotator::mustBreakBefore(const AnnotatedLine &Line, return true; if (Left.IsUnterminatedLiteral) return true; + // FIXME: Breaking after newlines seems useful in general. Turn this into an + // option and recognize more cases like endl etc, and break independent of + // what comes after operator lessless. + if (Right.is(tok::lessless) && Right.Next && + Right.Next->is(tok::string_literal) && Left.is(tok::string_literal) && + Left.TokenText.ends_with("\\n\"")) { + return true; + } if (Right.is(TT_RequiresClause)) { switch (Style.RequiresClausePosition) { case FormatStyle::RCPS_OwnLine: diff --git a/clang/lib/Headers/arm_acle.h b/clang/lib/Headers/arm_acle.h index 61d80258d166..9aae2285aeb1 100644 --- a/clang/lib/Headers/arm_acle.h +++ b/clang/lib/Headers/arm_acle.h @@ -756,6 +756,65 @@ __arm_st64bv0(void *__addr, data512_t __value) { __builtin_arm_mops_memset_tag(__tagged_address, __value, __size) #endif +/* Coprocessor Intrinsics */ +#if defined(__ARM_FEATURE_COPROC) + +#if (__ARM_FEATURE_COPROC & 0x1) + +#if (__ARM_ARCH < 8) +#define __arm_cdp(coproc, opc1, CRd, CRn, CRm, opc2) \ + __builtin_arm_cdp(coproc, opc1, CRd, CRn, CRm, opc2) +#endif /* __ARM_ARCH < 8 */ + +#define __arm_ldc(coproc, CRd, p) __builtin_arm_ldc(coproc, CRd, p) +#define __arm_stc(coproc, CRd, p) __builtin_arm_stc(coproc, CRd, p) + +#define __arm_mcr(coproc, opc1, value, CRn, CRm, opc2) \ + __builtin_arm_mcr(coproc, opc1, value, CRn, CRm, opc2) +#define __arm_mrc(coproc, opc1, CRn, CRm, opc2) \ + __builtin_arm_mrc(coproc, opc1, CRn, CRm, opc2) + +#if (__ARM_ARCH != 4) && (__ARM_ARCH < 8) +#define __arm_ldcl(coproc, CRd, p) __builtin_arm_ldcl(coproc, CRd, p) +#define __arm_stcl(coproc, CRd, p) __builtin_arm_stcl(coproc, CRd, p) +#endif /* (__ARM_ARCH != 4) && (__ARM_ARCH != 8) */ + +#if (__ARM_ARCH_8M_MAIN__) || (__ARM_ARCH_8_1M_MAIN__) +#define __arm_cdp(coproc, opc1, CRd, CRn, CRm, opc2) \ + __builtin_arm_cdp(coproc, opc1, CRd, CRn, CRm, opc2) +#define __arm_ldcl(coproc, CRd, p) __builtin_arm_ldcl(coproc, CRd, p) +#define __arm_stcl(coproc, CRd, p) __builtin_arm_stcl(coproc, CRd, p) +#endif /* ___ARM_ARCH_8M_MAIN__ */ + +#endif /* __ARM_FEATURE_COPROC & 0x1 */ + +#if (__ARM_FEATURE_COPROC & 0x2) +#define __arm_cdp2(coproc, opc1, CRd, CRn, CRm, opc2) \ + __builtin_arm_cdp2(coproc, opc1, CRd, CRn, CRm, opc2) +#define __arm_ldc2(coproc, CRd, p) __builtin_arm_ldc2(coproc, CRd, p) +#define __arm_stc2(coproc, CRd, p) __builtin_arm_stc2(coproc, CRd, p) +#define __arm_ldc2l(coproc, CRd, p) __builtin_arm_ldc2l(coproc, CRd, p) +#define __arm_stc2l(coproc, CRd, p) __builtin_arm_stc2l(coproc, CRd, p) +#define __arm_mcr2(coproc, opc1, value, CRn, CRm, opc2) \ + __builtin_arm_mcr2(coproc, opc1, value, CRn, CRm, opc2) +#define __arm_mrc2(coproc, opc1, CRn, CRm, opc2) \ + __builtin_arm_mrc2(coproc, opc1, CRn, CRm, opc2) +#endif + +#if (__ARM_FEATURE_COPROC & 0x4) +#define __arm_mcrr(coproc, opc1, value, CRm) \ + __builtin_arm_mcrr(coproc, opc1, value, CRm) +#define __arm_mrrc(coproc, opc1, CRm) __builtin_arm_mrrc(coproc, opc1, CRm) +#endif + +#if (__ARM_FEATURE_COPROC & 0x8) +#define __arm_mcrr2(coproc, opc1, value, CRm) \ + __builtin_arm_mcrr2(coproc, opc1, value, CRm) +#define __arm_mrrc2(coproc, opc1, CRm) __builtin_arm_mrrc2(coproc, opc1, CRm) +#endif + +#endif // __ARM_FEATURE_COPROC + /* Transactional Memory Extension (TME) Intrinsics */ #if defined(__ARM_FEATURE_TME) && __ARM_FEATURE_TME diff --git a/clang/lib/Headers/llvm_libc_wrappers/stdio.h b/clang/lib/Headers/llvm_libc_wrappers/stdio.h index 0870f3e741ec..950f91b3763e 100644 --- a/clang/lib/Headers/llvm_libc_wrappers/stdio.h +++ b/clang/lib/Headers/llvm_libc_wrappers/stdio.h @@ -6,15 +6,41 @@ // //===----------------------------------------------------------------------===// -#ifndef __CLANG_LLVM_LIBC_WRAPPERS_STDIO_H__ -#define __CLANG_LLVM_LIBC_WRAPPERS_STDIO_H__ - #if !defined(_OPENMP) && !defined(__HIP__) && !defined(__CUDA__) #error "This file is for GPU offloading compilation only" #endif #include_next <stdio.h> +// In some old versions of glibc, other standard headers sometimes define +// special macros (e.g., __need_FILE) before including stdio.h to cause stdio.h +// to produce special definitions. Future includes of stdio.h when those +// special macros are undefined are expected to produce the normal definitions +// from stdio.h. +// +// We do not apply our include guard (__CLANG_LLVM_LIBC_WRAPPERS_STDIO_H__) +// unconditionally to the above include_next. Otherwise, after an occurrence of +// the first glibc stdio.h use case described above, the include_next would be +// skipped for remaining includes of stdio.h, leaving required symbols +// undefined. +// +// We make the following assumptions to handle all use cases: +// +// 1. If the above include_next produces special glibc definitions, then (a) it +// does not produce the normal definitions that we must intercept below, (b) +// the current file was included from a glibc header that already defined +// __GLIBC__ (usually by including glibc's <features.h>), and (c) the above +// include_next does not define _STDIO_H. In that case, we skip the rest of +// the current file and don't guard against future includes. +// 2. If the above include_next produces the normal stdio.h definitions, then +// either (a) __GLIBC__ is not defined because C headers are from some other +// libc implementation or (b) the above include_next defines _STDIO_H to +// prevent the above include_next from having any effect in the future. +#if !defined(__GLIBC__) || defined(_STDIO_H) + +#ifndef __CLANG_LLVM_LIBC_WRAPPERS_STDIO_H__ +#define __CLANG_LLVM_LIBC_WRAPPERS_STDIO_H__ + #if __has_include(<llvm-libc-decls/stdio.h>) #if defined(__HIP__) || defined(__CUDA__) @@ -50,3 +76,5 @@ #endif #endif // __CLANG_LLVM_LIBC_WRAPPERS_STDIO_H__ + +#endif diff --git a/clang/lib/Lex/ModuleMap.cpp b/clang/lib/Lex/ModuleMap.cpp index ea5d13deb114..42d55d09ea5a 100644 --- a/clang/lib/Lex/ModuleMap.cpp +++ b/clang/lib/Lex/ModuleMap.cpp @@ -984,7 +984,9 @@ static void inferFrameworkLink(Module *Mod) { assert(!Mod->isSubFramework() && "Can only infer linking for top-level frameworks"); - Mod->LinkLibraries.push_back(Module::LinkLibrary(Mod->Name, + StringRef FrameworkName(Mod->Name); + FrameworkName.consume_back("_Private"); + Mod->LinkLibraries.push_back(Module::LinkLibrary(FrameworkName.str(), /*IsFramework=*/true)); } diff --git a/clang/lib/Parse/ParseDecl.cpp b/clang/lib/Parse/ParseDecl.cpp index ed006f9d67de..b60ae293ef8c 100644 --- a/clang/lib/Parse/ParseDecl.cpp +++ b/clang/lib/Parse/ParseDecl.cpp @@ -3483,7 +3483,8 @@ void Parser::ParseDeclarationSpecifiers( case tok::coloncolon: // ::foo::bar // C++ scope specifier. Annotate and loop, or bail out on error. - if (TryAnnotateCXXScopeToken(EnteringContext)) { + if (getLangOpts().CPlusPlus && + TryAnnotateCXXScopeToken(EnteringContext)) { if (!DS.hasTypeSpecifier()) DS.SetTypeSpecError(); goto DoneWithDeclSpec; diff --git a/clang/lib/Parse/ParseDeclCXX.cpp b/clang/lib/Parse/ParseDeclCXX.cpp index 910112ecae96..d97081da4200 100644 --- a/clang/lib/Parse/ParseDeclCXX.cpp +++ b/clang/lib/Parse/ParseDeclCXX.cpp @@ -2679,6 +2679,8 @@ Parser::ParseCXXClassMemberDeclaration(AccessSpecifier AS, ParsedAttributes &AccessAttrs, const ParsedTemplateInfo &TemplateInfo, ParsingDeclRAIIObject *TemplateDiags) { + assert(getLangOpts().CPlusPlus && + "ParseCXXClassMemberDeclaration should only be called in C++ mode"); if (Tok.is(tok::at)) { if (getLangOpts().ObjC && NextToken().isObjCAtKeyword(tok::objc_defs)) Diag(Tok, diag::err_at_defs_cxx); diff --git a/clang/lib/Parse/ParseOpenACC.cpp b/clang/lib/Parse/ParseOpenACC.cpp index 67325f0a286a..c9224d3ae910 100644 --- a/clang/lib/Parse/ParseOpenACC.cpp +++ b/clang/lib/Parse/ParseOpenACC.cpp @@ -76,16 +76,27 @@ OpenACCClauseKind getOpenACCClauseKind(Token Tok) { if (Tok.is(tok::kw_auto)) return OpenACCClauseKind::Auto; + // default is a keyword, so make sure we parse it correctly. + if (Tok.is(tok::kw_default)) + return OpenACCClauseKind::Default; + + // if is also a keyword, make sure we parse it correctly. + if (Tok.is(tok::kw_if)) + return OpenACCClauseKind::If; + if (!Tok.is(tok::identifier)) return OpenACCClauseKind::Invalid; return llvm::StringSwitch<OpenACCClauseKind>( Tok.getIdentifierInfo()->getName()) .Case("auto", OpenACCClauseKind::Auto) + .Case("default", OpenACCClauseKind::Default) .Case("finalize", OpenACCClauseKind::Finalize) + .Case("if", OpenACCClauseKind::If) .Case("if_present", OpenACCClauseKind::IfPresent) .Case("independent", OpenACCClauseKind::Independent) .Case("nohost", OpenACCClauseKind::NoHost) + .Case("self", OpenACCClauseKind::Self) .Case("seq", OpenACCClauseKind::Seq) .Case("vector", OpenACCClauseKind::Vector) .Case("worker", OpenACCClauseKind::Worker) @@ -106,6 +117,17 @@ OpenACCAtomicKind getOpenACCAtomicKind(Token Tok) { .Default(OpenACCAtomicKind::Invalid); } +OpenACCDefaultClauseKind getOpenACCDefaultClauseKind(Token Tok) { + if (!Tok.is(tok::identifier)) + return OpenACCDefaultClauseKind::Invalid; + + return llvm::StringSwitch<OpenACCDefaultClauseKind>( + Tok.getIdentifierInfo()->getName()) + .Case("none", OpenACCDefaultClauseKind::None) + .Case("present", OpenACCDefaultClauseKind::Present) + .Default(OpenACCDefaultClauseKind::Invalid); +} + enum class OpenACCSpecialTokenKind { ReadOnly, DevNum, @@ -176,6 +198,22 @@ bool isOpenACCDirectiveKind(OpenACCDirectiveKind Kind, Token Tok) { llvm_unreachable("Unknown 'Kind' Passed"); } +/// Used for cases where we expect an identifier-like token, but don't want to +/// give awkward error messages in cases where it is accidentially a keyword. +bool expectIdentifierOrKeyword(Parser &P) { + Token Tok = P.getCurToken(); + + if (Tok.is(tok::identifier)) + return false; + + if (!Tok.isAnnotation() && Tok.getIdentifierInfo() && + Tok.getIdentifierInfo()->isKeyword(P.getLangOpts())) + return false; + + P.Diag(P.getCurToken(), diag::err_expected) << tok::identifier; + return true; +} + OpenACCDirectiveKind ParseOpenACCEnterExitDataDirective(Parser &P, Token FirstTok, OpenACCDirectiveKindEx ExtDirKind) { @@ -291,14 +329,94 @@ OpenACCDirectiveKind ParseOpenACCDirectiveKind(Parser &P) { return DirKind; } +bool ClauseHasOptionalParens(OpenACCClauseKind Kind) { + return Kind == OpenACCClauseKind::Self; +} + +bool ClauseHasRequiredParens(OpenACCClauseKind Kind) { + return Kind == OpenACCClauseKind::Default || Kind == OpenACCClauseKind::If; +} + +ExprResult ParseOpenACCConditionalExpr(Parser &P) { + // FIXME: It isn't clear if the spec saying 'condition' means the same as + // it does in an if/while/etc (See ParseCXXCondition), however as it was + // written with Fortran/C in mind, we're going to assume it just means an + // 'expression evaluating to boolean'. + return P.getActions().CorrectDelayedTyposInExpr(P.ParseExpression()); +} + +bool ParseOpenACCClauseParams(Parser &P, OpenACCClauseKind Kind) { + BalancedDelimiterTracker Parens(P, tok::l_paren, + tok::annot_pragma_openacc_end); + + if (ClauseHasRequiredParens(Kind)) { + if (Parens.expectAndConsume()) { + // We are missing a paren, so assume that the person just forgot the + // parameter. Return 'false' so we try to continue on and parse the next + // clause. + P.SkipUntil(tok::comma, tok::r_paren, tok::annot_pragma_openacc_end, + Parser::StopBeforeMatch); + return false; + } + + switch (Kind) { + case OpenACCClauseKind::Default: { + Token DefKindTok = P.getCurToken(); + + if (expectIdentifierOrKeyword(P)) + break; + + P.ConsumeToken(); + + if (getOpenACCDefaultClauseKind(DefKindTok) == + OpenACCDefaultClauseKind::Invalid) + P.Diag(DefKindTok, diag::err_acc_invalid_default_clause_kind); + + break; + } + case OpenACCClauseKind::If: { + ExprResult CondExpr = ParseOpenACCConditionalExpr(P); + // An invalid expression can be just about anything, so just give up on + // this clause list. + if (CondExpr.isInvalid()) + return true; + break; + } + default: + llvm_unreachable("Not a required parens type?"); + } + + return Parens.consumeClose(); + } else if (ClauseHasOptionalParens(Kind)) { + if (!Parens.consumeOpen()) { + switch (Kind) { + case OpenACCClauseKind::Self: { + ExprResult CondExpr = ParseOpenACCConditionalExpr(P); + // An invalid expression can be just about anything, so just give up on + // this clause list. + if (CondExpr.isInvalid()) + return true; + break; + } + default: + llvm_unreachable("Not an optional parens type?"); + } + Parens.consumeClose(); + } + } + return false; +} + // The OpenACC Clause List is a comma or space-delimited list of clauses (see // the comment on ParseOpenACCClauseList). The concept of a 'clause' doesn't // really have its owner grammar and each individual one has its own definition. -// However, they all are named with a single-identifier (or auto!) token, -// followed in some cases by either braces or parens. +// However, they all are named with a single-identifier (or auto/default!) +// token, followed in some cases by either braces or parens. bool ParseOpenACCClause(Parser &P) { - if (!P.getCurToken().isOneOf(tok::identifier, tok::kw_auto)) - return P.Diag(P.getCurToken(), diag::err_expected) << tok::identifier; + // A number of clause names are actually keywords, so accept a keyword that + // can be converted to a name. + if (expectIdentifierOrKeyword(P)) + return true; OpenACCClauseKind Kind = getOpenACCClauseKind(P.getCurToken()); @@ -309,8 +427,7 @@ bool ParseOpenACCClause(Parser &P) { // Consume the clause name. P.ConsumeToken(); - // FIXME: For future clauses, we need to handle parens/etc below. - return false; + return ParseOpenACCClauseParams(P, Kind); } // Skip until we see the end of pragma token, but don't consume it. This is us diff --git a/clang/lib/Sema/AnalysisBasedWarnings.cpp b/clang/lib/Sema/AnalysisBasedWarnings.cpp index 0947e8b0f526..9eb1df5f0240 100644 --- a/clang/lib/Sema/AnalysisBasedWarnings.cpp +++ b/clang/lib/Sema/AnalysisBasedWarnings.cpp @@ -2226,8 +2226,8 @@ public: UnsafeBufferUsageReporter(Sema &S, bool SuggestSuggestions) : S(S), SuggestSuggestions(SuggestSuggestions) {} - void handleUnsafeOperation(const Stmt *Operation, - bool IsRelatedToDecl) override { + void handleUnsafeOperation(const Stmt *Operation, bool IsRelatedToDecl, + ASTContext &Ctx) override { SourceLocation Loc; SourceRange Range; unsigned MsgParam = 0; @@ -2261,6 +2261,18 @@ public: // note_unsafe_buffer_operation doesn't have this mode yet. assert(!IsRelatedToDecl && "Not implemented yet!"); MsgParam = 3; + } else if (const auto *ECE = dyn_cast<ExplicitCastExpr>(Operation)) { + QualType destType = ECE->getType(); + const uint64_t dSize = + Ctx.getTypeSize(destType.getTypePtr()->getPointeeType()); + if (const auto *CE = dyn_cast<CXXMemberCallExpr>(ECE->getSubExpr())) { + QualType srcType = CE->getType(); + const uint64_t sSize = + Ctx.getTypeSize(srcType.getTypePtr()->getPointeeType()); + if (sSize >= dSize) + return; + } + MsgParam = 4; } Loc = Operation->getBeginLoc(); Range = Operation->getSourceRange(); diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index da0570b7b0f1..74f8f626fb16 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -2998,7 +2998,12 @@ static QualType getNeonEltType(NeonTypeFlags Flags, ASTContext &Context, llvm_unreachable("Invalid NeonTypeFlag!"); } -enum ArmStreamingType { ArmNonStreaming, ArmStreaming, ArmStreamingCompatible }; +enum ArmStreamingType { + ArmNonStreaming, + ArmStreaming, + ArmStreamingCompatible, + ArmStreamingOrSVE2p1 +}; bool Sema::ParseSVEImmChecks( CallExpr *TheCall, SmallVector<std::tuple<int, int, int>, 3> &ImmChecks) { @@ -3156,6 +3161,16 @@ static void checkArmStreamingBuiltin(Sema &S, CallExpr *TheCall, const FunctionDecl *FD, ArmStreamingType BuiltinType) { ArmStreamingType FnType = getArmStreamingFnType(FD); + if (BuiltinType == ArmStreamingOrSVE2p1) { + // Check intrinsics that are available in [sve2p1 or sme/sme2]. + llvm::StringMap<bool> CallerFeatureMap; + S.Context.getFunctionFeatureMap(CallerFeatureMap, FD); + if (Builtin::evaluateRequiredTargetFeatures("sve2p1", CallerFeatureMap)) + BuiltinType = ArmStreamingCompatible; + else + BuiltinType = ArmStreaming; + } + if (FnType == ArmStreaming && BuiltinType == ArmNonStreaming) { S.Diag(TheCall->getBeginLoc(), diag::warn_attribute_arm_sm_incompat_builtin) << TheCall->getSourceRange() << "streaming"; @@ -16677,7 +16692,7 @@ class SequenceChecker : public ConstEvaluatedExprVisitor<SequenceChecker> { /// Have we issued a diagnostic for this object already? bool Diagnosed = false; - UsageInfo() = default; + UsageInfo(); }; using UsageInfoMap = llvm::SmallDenseMap<Object, UsageInfo, 16>; @@ -17436,6 +17451,8 @@ public: } }; +SequenceChecker::UsageInfo::UsageInfo() = default; + } // namespace void Sema::CheckUnsequencedOperations(const Expr *E) { @@ -18359,7 +18376,7 @@ static bool isSetterLikeSelector(Selector sel) { if (sel.isUnarySelector()) return false; StringRef str = sel.getNameForSlot(0); - while (!str.empty() && str.front() == '_') str = str.substr(1); + str = str.ltrim('_'); if (str.starts_with("set")) str = str.substr(3); else if (str.starts_with("add")) { diff --git a/clang/lib/Sema/SemaConcept.cpp b/clang/lib/Sema/SemaConcept.cpp index 719c6aab74e0..acfc00f41254 100644 --- a/clang/lib/Sema/SemaConcept.cpp +++ b/clang/lib/Sema/SemaConcept.cpp @@ -771,10 +771,9 @@ namespace { }; } // namespace -static const Expr * -SubstituteConstraintExpression(Sema &S, - const Sema::TemplateCompareNewDeclInfo &DeclInfo, - const Expr *ConstrExpr) { +static const Expr *SubstituteConstraintExpressionWithoutSatisfaction( + Sema &S, const Sema::TemplateCompareNewDeclInfo &DeclInfo, + const Expr *ConstrExpr) { MultiLevelTemplateArgumentList MLTAL = S.getTemplateInstantiationArgs( DeclInfo.getDecl(), DeclInfo.getLexicalDeclContext(), /*Final=*/false, /*Innermost=*/nullptr, @@ -797,8 +796,8 @@ SubstituteConstraintExpression(Sema &S, std::optional<Sema::CXXThisScopeRAII> ThisScope; if (auto *RD = dyn_cast<CXXRecordDecl>(DeclInfo.getDeclContext())) ThisScope.emplace(S, const_cast<CXXRecordDecl *>(RD), Qualifiers()); - ExprResult SubstConstr = - S.SubstConstraintExpr(const_cast<clang::Expr *>(ConstrExpr), MLTAL); + ExprResult SubstConstr = S.SubstConstraintExprWithoutSatisfaction( + const_cast<clang::Expr *>(ConstrExpr), MLTAL); if (SFINAE.hasErrorOccurred() || !SubstConstr.isUsable()) return nullptr; return SubstConstr.get(); @@ -814,12 +813,14 @@ bool Sema::AreConstraintExpressionsEqual(const NamedDecl *Old, if (Old && !New.isInvalid() && !New.ContainsDecl(Old) && Old->getLexicalDeclContext() != New.getLexicalDeclContext()) { if (const Expr *SubstConstr = - SubstituteConstraintExpression(*this, Old, OldConstr)) + SubstituteConstraintExpressionWithoutSatisfaction(*this, Old, + OldConstr)) OldConstr = SubstConstr; else return false; if (const Expr *SubstConstr = - SubstituteConstraintExpression(*this, New, NewConstr)) + SubstituteConstraintExpressionWithoutSatisfaction(*this, New, + NewConstr)) NewConstr = SubstConstr; else return false; diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp index ffbe317d5599..8e46c4984d93 100644 --- a/clang/lib/Sema/SemaDecl.cpp +++ b/clang/lib/Sema/SemaDecl.cpp @@ -9900,15 +9900,15 @@ Sema::ActOnFunctionDeclarator(Scope *S, Declarator &D, DeclContext *DC, // Match up the template parameter lists with the scope specifier, then // determine whether we have a template or a template specialization. bool Invalid = false; + TemplateIdAnnotation *TemplateId = + D.getName().getKind() == UnqualifiedIdKind::IK_TemplateId + ? D.getName().TemplateId + : nullptr; TemplateParameterList *TemplateParams = MatchTemplateParametersToScopeSpecifier( D.getDeclSpec().getBeginLoc(), D.getIdentifierLoc(), - D.getCXXScopeSpec(), - D.getName().getKind() == UnqualifiedIdKind::IK_TemplateId - ? D.getName().TemplateId - : nullptr, - TemplateParamLists, isFriend, isMemberSpecialization, - Invalid); + D.getCXXScopeSpec(), TemplateId, TemplateParamLists, isFriend, + isMemberSpecialization, Invalid); if (TemplateParams) { // Check that we can declare a template here. if (CheckTemplateDeclScope(S, TemplateParams)) @@ -9921,6 +9921,11 @@ Sema::ActOnFunctionDeclarator(Scope *S, Declarator &D, DeclContext *DC, if (Name.getNameKind() == DeclarationName::CXXDestructorName) { Diag(NewFD->getLocation(), diag::err_destructor_template); NewFD->setInvalidDecl(); + // Function template with explicit template arguments. + } else if (TemplateId) { + Diag(D.getIdentifierLoc(), diag::err_function_template_partial_spec) + << SourceRange(TemplateId->LAngleLoc, TemplateId->RAngleLoc); + NewFD->setInvalidDecl(); } // If we're adding a template to a dependent context, we may need to @@ -9973,6 +9978,11 @@ Sema::ActOnFunctionDeclarator(Scope *S, Declarator &D, DeclContext *DC, << FixItHint::CreateRemoval(RemoveRange) << FixItHint::CreateInsertion(InsertLoc, "<>"); Invalid = true; + + // Recover by faking up an empty template argument list. + HasExplicitTemplateArgs = true; + TemplateArgs.setLAngleLoc(InsertLoc); + TemplateArgs.setRAngleLoc(InsertLoc); } } } else { @@ -9986,6 +9996,33 @@ Sema::ActOnFunctionDeclarator(Scope *S, Declarator &D, DeclContext *DC, if (TemplateParamLists.size() > 0) // For source fidelity, store all the template param lists. NewFD->setTemplateParameterListsInfo(Context, TemplateParamLists); + + // "friend void foo<>(int);" is an implicit specialization decl. + if (isFriend && TemplateId) + isFunctionTemplateSpecialization = true; + } + + // If this is a function template specialization and the unqualified-id of + // the declarator-id is a template-id, convert the template argument list + // into our AST format and check for unexpanded packs. + if (isFunctionTemplateSpecialization && TemplateId) { + HasExplicitTemplateArgs = true; + + TemplateArgs.setLAngleLoc(TemplateId->LAngleLoc); + TemplateArgs.setRAngleLoc(TemplateId->RAngleLoc); + ASTTemplateArgsPtr TemplateArgsPtr(TemplateId->getTemplateArgs(), + TemplateId->NumArgs); + translateTemplateArguments(TemplateArgsPtr, TemplateArgs); + + // FIXME: Should we check for unexpanded packs if this was an (invalid) + // declaration of a function template partial specialization? Should we + // consider the unexpanded pack context to be a partial specialization? + for (const TemplateArgumentLoc &ArgLoc : TemplateArgs.arguments()) { + if (DiagnoseUnexpandedParameterPack( + ArgLoc, isFriend ? UPPC_FriendDeclaration + : UPPC_ExplicitSpecialization)) + NewFD->setInvalidDecl(); + } } if (Invalid) { @@ -10438,46 +10475,6 @@ Sema::ActOnFunctionDeclarator(Scope *S, Declarator &D, DeclContext *DC, diag::ext_operator_new_delete_declared_inline) << NewFD->getDeclName(); - // If the declarator is a template-id, translate the parser's template - // argument list into our AST format. - if (D.getName().getKind() == UnqualifiedIdKind::IK_TemplateId) { - TemplateIdAnnotation *TemplateId = D.getName().TemplateId; - TemplateArgs.setLAngleLoc(TemplateId->LAngleLoc); - TemplateArgs.setRAngleLoc(TemplateId->RAngleLoc); - ASTTemplateArgsPtr TemplateArgsPtr(TemplateId->getTemplateArgs(), - TemplateId->NumArgs); - translateTemplateArguments(TemplateArgsPtr, - TemplateArgs); - - HasExplicitTemplateArgs = true; - - if (NewFD->isInvalidDecl()) { - HasExplicitTemplateArgs = false; - } else if (FunctionTemplate) { - // Function template with explicit template arguments. - Diag(D.getIdentifierLoc(), diag::err_function_template_partial_spec) - << SourceRange(TemplateId->LAngleLoc, TemplateId->RAngleLoc); - - HasExplicitTemplateArgs = false; - } else if (isFriend) { - // "friend void foo<>(int);" is an implicit specialization decl. - isFunctionTemplateSpecialization = true; - } else { - assert(isFunctionTemplateSpecialization && - "should have a 'template<>' for this decl"); - } - } else if (isFriend && isFunctionTemplateSpecialization) { - // This combination is only possible in a recovery case; the user - // wrote something like: - // template <> friend void foo(int); - // which we're recovering from as if the user had written: - // friend void foo<>(int); - // Go ahead and fake up a template id. - HasExplicitTemplateArgs = true; - TemplateArgs.setLAngleLoc(D.getIdentifierLoc()); - TemplateArgs.setRAngleLoc(D.getIdentifierLoc()); - } - // We do not add HD attributes to specializations here because // they may have different constexpr-ness compared to their // templates and, after maybeAddCUDAHostDeviceAttrs() is applied, @@ -15845,8 +15842,6 @@ static void diagnoseImplicitlyRetainedSelf(Sema &S) { } void Sema::CheckCoroutineWrapper(FunctionDecl *FD) { - if (!FD) - return; RecordDecl *RD = FD->getReturnType()->getAsRecordDecl(); if (!RD || !RD->getUnderlyingDecl()->hasAttr<CoroReturnTypeAttr>()) return; @@ -15869,7 +15864,8 @@ Decl *Sema::ActOnFinishFunctionBody(Decl *dcl, Stmt *Body, sema::AnalysisBasedWarnings::Policy WP = AnalysisWarnings.getDefaultPolicy(); sema::AnalysisBasedWarnings::Policy *ActivePolicy = nullptr; - if (getLangOpts().Coroutines) { + // If we skip function body, we can't tell if a function is a coroutine. + if (getLangOpts().Coroutines && FD && !FD->hasSkippedBody()) { if (FSI->isCoroutine()) CheckCompletedCoroutineBody(FD, Body); else diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp index 4a385a396fa6..d059b406ef86 100644 --- a/clang/lib/Sema/SemaDeclAttr.cpp +++ b/clang/lib/Sema/SemaDeclAttr.cpp @@ -3369,6 +3369,22 @@ static void handleSectionAttr(Sema &S, Decl *D, const ParsedAttr &AL) { } } +static void handleCodeModelAttr(Sema &S, Decl *D, const ParsedAttr &AL) { + StringRef Str; + SourceLocation LiteralLoc; + // Check that it is a string. + if (!S.checkStringLiteralArgumentAttr(AL, 0, Str, &LiteralLoc)) + return; + + llvm::CodeModel::Model CM; + if (!CodeModelAttr::ConvertStrToModel(Str, CM)) { + S.Diag(LiteralLoc, diag::err_attr_codemodel_arg) << Str; + return; + } + + D->addAttr(::new (S.Context) CodeModelAttr(S.Context, AL, CM)); +} + // This is used for `__declspec(code_seg("segname"))` on a decl. // `#pragma code_seg("segname")` uses checkSectionName() instead. static bool checkCodeSegName(Sema &S, SourceLocation LiteralLoc, @@ -9253,6 +9269,9 @@ ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D, const ParsedAttr &AL, case ParsedAttr::AT_Section: handleSectionAttr(S, D, AL); break; + case ParsedAttr::AT_CodeModel: + handleCodeModelAttr(S, D, AL); + break; case ParsedAttr::AT_RandomizeLayout: handleRandomizeLayoutAttr(S, D, AL); break; diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp index 960f513d1111..60ad035570c8 100644 --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -8691,10 +8691,10 @@ ExprResult Sema::ActOnParenListExpr(SourceLocation L, /// Emit a specialized diagnostic when one expression is a null pointer /// constant and the other is not a pointer. Returns true if a diagnostic is /// emitted. -bool Sema::DiagnoseConditionalForNull(Expr *LHSExpr, Expr *RHSExpr, +bool Sema::DiagnoseConditionalForNull(const Expr *LHSExpr, const Expr *RHSExpr, SourceLocation QuestionLoc) { - Expr *NullExpr = LHSExpr; - Expr *NonPointerExpr = RHSExpr; + const Expr *NullExpr = LHSExpr; + const Expr *NonPointerExpr = RHSExpr; Expr::NullPointerConstantKind NullKind = NullExpr->isNullPointerConstant(Context, Expr::NPC_ValueDependentIsNotNull); @@ -8730,7 +8730,8 @@ bool Sema::DiagnoseConditionalForNull(Expr *LHSExpr, Expr *RHSExpr, } /// Return false if the condition expression is valid, true otherwise. -static bool checkCondition(Sema &S, Expr *Cond, SourceLocation QuestionLoc) { +static bool checkCondition(Sema &S, const Expr *Cond, + SourceLocation QuestionLoc) { QualType CondTy = Cond->getType(); // OpenCL v1.1 s6.3.i says the condition cannot be a floating point type. @@ -9542,28 +9543,27 @@ static bool IsArithmeticOp(BinaryOperatorKind Opc) { /// expression, either using a built-in or overloaded operator, /// and sets *OpCode to the opcode and *RHSExprs to the right-hand side /// expression. -static bool IsArithmeticBinaryExpr(Expr *E, BinaryOperatorKind *Opcode, - Expr **RHSExprs) { +static bool IsArithmeticBinaryExpr(const Expr *E, BinaryOperatorKind *Opcode, + const Expr **RHSExprs) { // Don't strip parenthesis: we should not warn if E is in parenthesis. E = E->IgnoreImpCasts(); E = E->IgnoreConversionOperatorSingleStep(); E = E->IgnoreImpCasts(); - if (auto *MTE = dyn_cast<MaterializeTemporaryExpr>(E)) { + if (const auto *MTE = dyn_cast<MaterializeTemporaryExpr>(E)) { E = MTE->getSubExpr(); E = E->IgnoreImpCasts(); } // Built-in binary operator. - if (BinaryOperator *OP = dyn_cast<BinaryOperator>(E)) { - if (IsArithmeticOp(OP->getOpcode())) { - *Opcode = OP->getOpcode(); - *RHSExprs = OP->getRHS(); - return true; - } + if (const auto *OP = dyn_cast<BinaryOperator>(E); + OP && IsArithmeticOp(OP->getOpcode())) { + *Opcode = OP->getOpcode(); + *RHSExprs = OP->getRHS(); + return true; } // Overloaded operator. - if (CXXOperatorCallExpr *Call = dyn_cast<CXXOperatorCallExpr>(E)) { + if (const auto *Call = dyn_cast<CXXOperatorCallExpr>(E)) { if (Call->getNumArgs() != 2) return false; @@ -9588,14 +9588,14 @@ static bool IsArithmeticBinaryExpr(Expr *E, BinaryOperatorKind *Opcode, /// ExprLooksBoolean - Returns true if E looks boolean, i.e. it has boolean type /// or is a logical expression such as (x==y) which has int type, but is /// commonly interpreted as boolean. -static bool ExprLooksBoolean(Expr *E) { +static bool ExprLooksBoolean(const Expr *E) { E = E->IgnoreParenImpCasts(); if (E->getType()->isBooleanType()) return true; - if (BinaryOperator *OP = dyn_cast<BinaryOperator>(E)) + if (const auto *OP = dyn_cast<BinaryOperator>(E)) return OP->isComparisonOp() || OP->isLogicalOp(); - if (UnaryOperator *OP = dyn_cast<UnaryOperator>(E)) + if (const auto *OP = dyn_cast<UnaryOperator>(E)) return OP->getOpcode() == UO_LNot; if (E->getType()->isPointerType()) return true; @@ -9609,13 +9609,11 @@ static bool ExprLooksBoolean(Expr *E) { /// and binary operator are mixed in a way that suggests the programmer assumed /// the conditional operator has higher precedence, for example: /// "int x = a + someBinaryCondition ? 1 : 2". -static void DiagnoseConditionalPrecedence(Sema &Self, - SourceLocation OpLoc, - Expr *Condition, - Expr *LHSExpr, - Expr *RHSExpr) { +static void DiagnoseConditionalPrecedence(Sema &Self, SourceLocation OpLoc, + Expr *Condition, const Expr *LHSExpr, + const Expr *RHSExpr) { BinaryOperatorKind CondOpcode; - Expr *CondRHS; + const Expr *CondRHS; if (!IsArithmeticBinaryExpr(Condition, &CondOpcode, &CondRHS)) return; diff --git a/clang/lib/Sema/SemaInit.cpp b/clang/lib/Sema/SemaInit.cpp index cc9db5ded114..408ee5f77580 100644 --- a/clang/lib/Sema/SemaInit.cpp +++ b/clang/lib/Sema/SemaInit.cpp @@ -7589,7 +7589,8 @@ static void visitLifetimeBoundArguments(IndirectLocalPath &Path, Expr *Call, bool CheckCoroCall = false; if (const auto *RD = Callee->getReturnType()->getAsRecordDecl()) { CheckCoroCall = RD->hasAttr<CoroLifetimeBoundAttr>() && - RD->hasAttr<CoroReturnTypeAttr>(); + RD->hasAttr<CoroReturnTypeAttr>() && + !Callee->hasAttr<CoroDisableLifetimeBoundAttr>(); } for (unsigned I = 0, N = std::min<unsigned>(Callee->getNumParams(), Args.size()); @@ -10376,11 +10377,6 @@ void InitializationSequence::dump() const { dump(llvm::errs()); } -static bool NarrowingErrs(const LangOptions &L) { - return L.CPlusPlus11 && - (!L.MicrosoftExt || L.isCompatibleWithMSVC(LangOptions::MSVC2015)); -} - static void DiagnoseNarrowingInInitList(Sema &S, const ImplicitConversionSequence &ICS, QualType PreNarrowingType, @@ -10401,6 +10397,19 @@ static void DiagnoseNarrowingInInitList(Sema &S, return; } + auto MakeDiag = [&](bool IsConstRef, unsigned DefaultDiagID, + unsigned ConstRefDiagID, unsigned WarnDiagID) { + unsigned DiagID; + auto &L = S.getLangOpts(); + if (L.CPlusPlus11 && + (!L.MicrosoftExt || L.isCompatibleWithMSVC(LangOptions::MSVC2015))) + DiagID = IsConstRef ? ConstRefDiagID : DefaultDiagID; + else + DiagID = WarnDiagID; + return S.Diag(PostInit->getBeginLoc(), DiagID) + << PostInit->getSourceRange(); + }; + // C++11 [dcl.init.list]p7: Check whether this is a narrowing conversion. APValue ConstantValue; QualType ConstantType; @@ -10416,13 +10425,9 @@ static void DiagnoseNarrowingInInitList(Sema &S, // narrowing conversion even if the value is a constant and can be // represented exactly as an integer. QualType T = EntityType.getNonReferenceType(); - S.Diag(PostInit->getBeginLoc(), - NarrowingErrs(S.getLangOpts()) - ? (T == EntityType - ? diag::ext_init_list_type_narrowing - : diag::ext_init_list_type_narrowing_const_reference) - : diag::warn_init_list_type_narrowing) - << PostInit->getSourceRange() + MakeDiag(T != EntityType, diag::ext_init_list_type_narrowing, + diag::ext_init_list_type_narrowing_const_reference, + diag::warn_init_list_type_narrowing) << PreNarrowingType.getLocalUnqualifiedType() << T.getLocalUnqualifiedType(); break; @@ -10430,14 +10435,10 @@ static void DiagnoseNarrowingInInitList(Sema &S, case NK_Constant_Narrowing: { // A constant value was narrowed. - QualType T = EntityType.getNonReferenceType(); - S.Diag(PostInit->getBeginLoc(), - NarrowingErrs(S.getLangOpts()) - ? (T == EntityType - ? diag::ext_init_list_constant_narrowing - : diag::ext_init_list_constant_narrowing_const_reference) - : diag::warn_init_list_constant_narrowing) - << PostInit->getSourceRange() + MakeDiag(EntityType.getNonReferenceType() != EntityType, + diag::ext_init_list_constant_narrowing, + diag::ext_init_list_constant_narrowing_const_reference, + diag::warn_init_list_constant_narrowing) << ConstantValue.getAsString(S.getASTContext(), ConstantType) << EntityType.getNonReferenceType().getLocalUnqualifiedType(); break; @@ -10445,14 +10446,10 @@ static void DiagnoseNarrowingInInitList(Sema &S, case NK_Variable_Narrowing: { // A variable's value may have been narrowed. - QualType T = EntityType.getNonReferenceType(); - S.Diag(PostInit->getBeginLoc(), - NarrowingErrs(S.getLangOpts()) - ? (T == EntityType - ? diag::ext_init_list_variable_narrowing - : diag::ext_init_list_variable_narrowing_const_reference) - : diag::warn_init_list_variable_narrowing) - << PostInit->getSourceRange() + MakeDiag(EntityType.getNonReferenceType() != EntityType, + diag::ext_init_list_variable_narrowing, + diag::ext_init_list_variable_narrowing_const_reference, + diag::warn_init_list_variable_narrowing) << PreNarrowingType.getLocalUnqualifiedType() << EntityType.getNonReferenceType().getLocalUnqualifiedType(); break; diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp index f34d2959dc61..365032c96421 100644 --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -5072,6 +5072,18 @@ static bool checkNestingOfRegions(Sema &SemaRef, const DSAStackTy *Stack, CurrentRegion != OMPD_cancellation_point && CurrentRegion != OMPD_cancel && CurrentRegion != OMPD_scan) return false; + // Checks needed for mapping "loop" construct. Please check mapLoopConstruct + // for a detailed explanation + if (SemaRef.LangOpts.OpenMP >= 50 && CurrentRegion == OMPD_loop && + (BindKind == OMPC_BIND_parallel || BindKind == OMPC_BIND_teams) && + (isOpenMPWorksharingDirective(ParentRegion) || + ParentRegion == OMPD_loop)) { + int ErrorMsgNumber = (BindKind == OMPC_BIND_parallel) ? 1 : 4; + SemaRef.Diag(StartLoc, diag::err_omp_prohibited_region) + << true << getOpenMPDirectiveName(ParentRegion) << ErrorMsgNumber + << getOpenMPDirectiveName(CurrentRegion); + return true; + } if (CurrentRegion == OMPD_cancellation_point || CurrentRegion == OMPD_cancel) { // OpenMP [2.16, Nesting of Regions] @@ -6124,21 +6136,25 @@ processImplicitMapsWithDefaultMappers(Sema &S, DSAStackTy *Stack, bool Sema::mapLoopConstruct(llvm::SmallVector<OMPClause *> &ClausesWithoutBind, ArrayRef<OMPClause *> Clauses, - OpenMPBindClauseKind BindKind, + OpenMPBindClauseKind &BindKind, OpenMPDirectiveKind &Kind, - OpenMPDirectiveKind &PrevMappedDirective) { + OpenMPDirectiveKind &PrevMappedDirective, + SourceLocation StartLoc, SourceLocation EndLoc, + const DeclarationNameInfo &DirName, + OpenMPDirectiveKind CancelRegion) { bool UseClausesWithoutBind = false; // Restricting to "#pragma omp loop bind" if (getLangOpts().OpenMP >= 50 && Kind == OMPD_loop) { + + const OpenMPDirectiveKind ParentDirective = DSAStack->getParentDirective(); + if (BindKind == OMPC_BIND_unknown) { // Setting the enclosing teams or parallel construct for the loop // directive without bind clause. BindKind = OMPC_BIND_thread; // Default bind(thread) if binding is unknown - const OpenMPDirectiveKind ParentDirective = - DSAStack->getParentDirective(); if (ParentDirective == OMPD_unknown) { Diag(DSAStack->getDefaultDSALocation(), diag::err_omp_bind_required_on_loop); @@ -6150,9 +6166,10 @@ bool Sema::mapLoopConstruct(llvm::SmallVector<OMPClause *> &ClausesWithoutBind, BindKind = OMPC_BIND_teams; } } else { - // bind clause is present, so we should set flag indicating to only - // use the clauses that aren't the bind clause for the new directive that - // loop is lowered to. + // bind clause is present in loop directive. When the loop directive is + // changed to a new directive the bind clause is not used. So, we should + // set flag indicating to only use the clauses that aren't the + // bind clause. UseClausesWithoutBind = true; } @@ -6213,26 +6230,35 @@ StmtResult Sema::ActOnOpenMPExecutableDirective( OpenMPDirectiveKind PrevMappedDirective) { StmtResult Res = StmtError(); OpenMPBindClauseKind BindKind = OMPC_BIND_unknown; + llvm::SmallVector<OMPClause *> ClausesWithoutBind; + bool UseClausesWithoutBind = false; + if (const OMPBindClause *BC = OMPExecutableDirective::getSingleClause<OMPBindClause>(Clauses)) BindKind = BC->getBindKind(); + + // Variable used to note down the DirectiveKind because mapLoopConstruct may + // change "Kind" variable, due to mapping of "omp loop" to other directives. + OpenMPDirectiveKind DK = Kind; + if (Kind == OMPD_loop || PrevMappedDirective == OMPD_loop) { + UseClausesWithoutBind = mapLoopConstruct( + ClausesWithoutBind, Clauses, BindKind, Kind, PrevMappedDirective, + StartLoc, EndLoc, DirName, CancelRegion); + DK = OMPD_loop; + } + // First check CancelRegion which is then used in checkNestingOfRegions. if (checkCancelRegion(*this, Kind, CancelRegion, StartLoc) || - checkNestingOfRegions(*this, DSAStack, Kind, DirName, CancelRegion, - BindKind, StartLoc)) + checkNestingOfRegions(*this, DSAStack, DK, DirName, CancelRegion, + BindKind, StartLoc)) { return StmtError(); + } // Report affected OpenMP target offloading behavior when in HIP lang-mode. if (getLangOpts().HIP && (isOpenMPTargetExecutionDirective(Kind) || isOpenMPTargetDataManagementDirective(Kind))) Diag(StartLoc, diag::warn_hip_omp_target_directives); - llvm::SmallVector<OMPClause *> ClausesWithoutBind; - bool UseClausesWithoutBind = false; - - UseClausesWithoutBind = mapLoopConstruct(ClausesWithoutBind, Clauses, - BindKind, Kind, PrevMappedDirective); - llvm::SmallVector<OMPClause *, 8> ClausesWithImplicit; VarsWithInheritedDSAType VarsWithInheritedDSA; bool ErrorFound = false; diff --git a/clang/lib/Sema/SemaOverload.cpp b/clang/lib/Sema/SemaOverload.cpp index 5026e1d603e5..e6c267bb79e6 100644 --- a/clang/lib/Sema/SemaOverload.cpp +++ b/clang/lib/Sema/SemaOverload.cpp @@ -1259,6 +1259,43 @@ static bool IsOverloadOrOverrideImpl(Sema &SemaRef, FunctionDecl *New, if ((OldTemplate == nullptr) != (NewTemplate == nullptr)) return true; + if (NewTemplate) { + // C++ [temp.over.link]p4: + // The signature of a function template consists of its function + // signature, its return type and its template parameter list. The names + // of the template parameters are significant only for establishing the + // relationship between the template parameters and the rest of the + // signature. + // + // We check the return type and template parameter lists for function + // templates first; the remaining checks follow. + bool SameTemplateParameterList = SemaRef.TemplateParameterListsAreEqual( + NewTemplate, NewTemplate->getTemplateParameters(), OldTemplate, + OldTemplate->getTemplateParameters(), false, Sema::TPL_TemplateMatch); + bool SameReturnType = SemaRef.Context.hasSameType( + Old->getDeclaredReturnType(), New->getDeclaredReturnType()); + // FIXME(GH58571): Match template parameter list even for non-constrained + // template heads. This currently ensures that the code prior to C++20 is + // not newly broken. + bool ConstraintsInTemplateHead = + NewTemplate->getTemplateParameters()->hasAssociatedConstraints() || + OldTemplate->getTemplateParameters()->hasAssociatedConstraints(); + // C++ [namespace.udecl]p11: + // The set of declarations named by a using-declarator that inhabits a + // class C does not include member functions and member function + // templates of a base class that "correspond" to (and thus would + // conflict with) a declaration of a function or function template in + // C. + // Comparing return types is not required for the "correspond" check to + // decide whether a member introduced by a shadow declaration is hidden. + if (UseMemberUsingDeclRules && ConstraintsInTemplateHead && + !SameTemplateParameterList) + return true; + if (!UseMemberUsingDeclRules && + (!SameTemplateParameterList || !SameReturnType)) + return true; + } + // Is the function New an overload of the function Old? QualType OldQType = SemaRef.Context.getCanonicalType(Old->getType()); QualType NewQType = SemaRef.Context.getCanonicalType(New->getType()); @@ -1410,43 +1447,6 @@ static bool IsOverloadOrOverrideImpl(Sema &SemaRef, FunctionDecl *New, } } - if (NewTemplate) { - // C++ [temp.over.link]p4: - // The signature of a function template consists of its function - // signature, its return type and its template parameter list. The names - // of the template parameters are significant only for establishing the - // relationship between the template parameters and the rest of the - // signature. - // - // We check the return type and template parameter lists for function - // templates first; the remaining checks follow. - bool SameTemplateParameterList = SemaRef.TemplateParameterListsAreEqual( - NewTemplate, NewTemplate->getTemplateParameters(), OldTemplate, - OldTemplate->getTemplateParameters(), false, Sema::TPL_TemplateMatch); - bool SameReturnType = SemaRef.Context.hasSameType( - Old->getDeclaredReturnType(), New->getDeclaredReturnType()); - // FIXME(GH58571): Match template parameter list even for non-constrained - // template heads. This currently ensures that the code prior to C++20 is - // not newly broken. - bool ConstraintsInTemplateHead = - NewTemplate->getTemplateParameters()->hasAssociatedConstraints() || - OldTemplate->getTemplateParameters()->hasAssociatedConstraints(); - // C++ [namespace.udecl]p11: - // The set of declarations named by a using-declarator that inhabits a - // class C does not include member functions and member function - // templates of a base class that "correspond" to (and thus would - // conflict with) a declaration of a function or function template in - // C. - // Comparing return types is not required for the "correspond" check to - // decide whether a member introduced by a shadow declaration is hidden. - if (UseMemberUsingDeclRules && ConstraintsInTemplateHead && - !SameTemplateParameterList) - return true; - if (!UseMemberUsingDeclRules && - (!SameTemplateParameterList || !SameReturnType)) - return true; - } - if (!UseOverrideRules) { Expr *NewRC = New->getTrailingRequiresClause(), *OldRC = Old->getTrailingRequiresClause(); @@ -13995,6 +13995,22 @@ ExprResult Sema::BuildOverloadedCallExpr(Scope *S, Expr *Fn, OverloadingResult OverloadResult = CandidateSet.BestViableFunction(*this, Fn->getBeginLoc(), Best); + // Model the case with a call to a templated function whose definition + // encloses the call and whose return type contains a placeholder type as if + // the UnresolvedLookupExpr was type-dependent. + if (OverloadResult == OR_Success) { + const FunctionDecl *FDecl = Best->Function; + if (FDecl && FDecl->isTemplateInstantiation() && + FDecl->getReturnType()->isUndeducedType()) { + if (const auto *TP = + FDecl->getTemplateInstantiationPattern(/*ForDefinition=*/false); + TP && TP->willHaveBody()) { + return CallExpr::Create(Context, Fn, Args, Context.DependentTy, + VK_PRValue, RParenLoc, CurFPFeatureOverrides()); + } + } + } + return FinishOverloadedCallExpr(*this, S, Fn, ULE, LParenLoc, Args, RParenLoc, ExecConfig, &CandidateSet, &Best, OverloadResult, AllowTypoCorrection); diff --git a/clang/lib/Sema/SemaTemplateDeduction.cpp b/clang/lib/Sema/SemaTemplateDeduction.cpp index 699e0985e595..015b0abaf0e5 100644 --- a/clang/lib/Sema/SemaTemplateDeduction.cpp +++ b/clang/lib/Sema/SemaTemplateDeduction.cpp @@ -4737,6 +4737,7 @@ namespace { QualType Replacement; bool ReplacementIsPack; bool UseTypeSugar; + using inherited = TreeTransform<SubstituteDeducedTypeTransform>; public: SubstituteDeducedTypeTransform(Sema &SemaRef, DependentAuto DA) @@ -4797,6 +4798,16 @@ namespace { // Lambdas never need to be transformed. return E; } + bool TransformExceptionSpec(SourceLocation Loc, + FunctionProtoType::ExceptionSpecInfo &ESI, + SmallVectorImpl<QualType> &Exceptions, + bool &Changed) { + if (ESI.Type == EST_Uninstantiated) { + ESI.instantiate(); + Changed = true; + } + return inherited::TransformExceptionSpec(Loc, ESI, Exceptions, Changed); + } QualType Apply(TypeLoc TL) { // Create some scratch storage for the transformed type locations. diff --git a/clang/lib/Sema/SemaTemplateInstantiate.cpp b/clang/lib/Sema/SemaTemplateInstantiate.cpp index df6b40999e64..7f20413c104e 100644 --- a/clang/lib/Sema/SemaTemplateInstantiate.cpp +++ b/clang/lib/Sema/SemaTemplateInstantiate.cpp @@ -35,7 +35,6 @@ #include "clang/Sema/Template.h" #include "clang/Sema/TemplateDeduction.h" #include "clang/Sema/TemplateInstCallback.h" -#include "llvm/ADT/ScopeExit.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/TimeProfiler.h" @@ -345,15 +344,26 @@ MultiLevelTemplateArgumentList Sema::getTemplateInstantiationArgs( using namespace TemplateInstArgsHelpers; const Decl *CurDecl = ND; + + if (!CurDecl) + CurDecl = Decl::castFromDeclContext(DC); + if (Innermost) { Result.addOuterTemplateArguments(const_cast<NamedDecl *>(ND), Innermost->asArray(), Final); - CurDecl = Response::UseNextDecl(ND).NextDecl; + // Populate placeholder template arguments for TemplateTemplateParmDecls. + // This is essential for the case e.g. + // + // template <class> concept Concept = false; + // template <template <Concept C> class T> void foo(T<int>) + // + // where parameter C has a depth of 1 but the substituting argument `int` + // has a depth of 0. + if (const auto *TTP = dyn_cast<TemplateTemplateParmDecl>(CurDecl)) + HandleDefaultTempArgIntoTempTempParam(TTP, Result); + CurDecl = Response::UseNextDecl(CurDecl).NextDecl; } - if (!ND) - CurDecl = Decl::castFromDeclContext(DC); - while (!CurDecl->isFileContextDecl()) { Response R; if (const auto *VarTemplSpec = @@ -381,10 +391,8 @@ MultiLevelTemplateArgumentList Sema::getTemplateInstantiationArgs( R = Response::ChangeDecl(CTD->getLexicalDeclContext()); } else if (!isa<DeclContext>(CurDecl)) { R = Response::DontClearRelativeToPrimaryNextDecl(CurDecl); - if (CurDecl->getDeclContext()->isTranslationUnit()) { - if (const auto *TTP = dyn_cast<TemplateTemplateParmDecl>(CurDecl)) { - R = HandleDefaultTempArgIntoTempTempParam(TTP, Result); - } + if (const auto *TTP = dyn_cast<TemplateTemplateParmDecl>(CurDecl)) { + R = HandleDefaultTempArgIntoTempTempParam(TTP, Result); } } else { R = HandleGenericDeclContext(CurDecl); @@ -1142,8 +1150,7 @@ std::optional<TemplateDeductionInfo *> Sema::isSFINAEContext() const { case CodeSynthesisContext::DeducedTemplateArgumentSubstitution: // We're either substituting explicitly-specified template arguments, // deduced template arguments. SFINAE applies unless we are in a lambda - // expression, see [temp.deduct]p9. - [[fallthrough]]; + // body, see [temp.deduct]p9. case CodeSynthesisContext::ConstraintSubstitution: case CodeSynthesisContext::RequirementInstantiation: case CodeSynthesisContext::RequirementParameterInstantiation: @@ -1190,6 +1197,7 @@ namespace { const MultiLevelTemplateArgumentList &TemplateArgs; SourceLocation Loc; DeclarationName Entity; + // Whether to evaluate the C++20 constraints or simply substitute into them. bool EvaluateConstraints = true; public: @@ -1444,13 +1452,6 @@ namespace { LocalInstantiationScope Scope(SemaRef, /*CombineWithOuterScope=*/true); Sema::ConstraintEvalRAII<TemplateInstantiator> RAII(*this); - Sema::CodeSynthesisContext C; - C.Kind = clang::Sema::CodeSynthesisContext::LambdaExpressionSubstitution; - C.PointOfInstantiation = E->getBeginLoc(); - SemaRef.pushCodeSynthesisContext(C); - auto PopCtx = - llvm::make_scope_exit([this] { SemaRef.popCodeSynthesisContext(); }); - ExprResult Result = inherited::TransformLambdaExpr(E); if (Result.isInvalid()) return Result; @@ -1478,6 +1479,23 @@ namespace { return Result; } + StmtResult TransformLambdaBody(LambdaExpr *E, Stmt *Body) { + // Currently, we instantiate the body when instantiating the lambda + // expression. However, `EvaluateConstraints` is disabled during the + // instantiation of the lambda expression, causing the instantiation + // failure of the return type requirement in the body. If p0588r1 is fully + // implemented, the body will be lazily instantiated, and this problem + // will not occur. Here, `EvaluateConstraints` is temporarily set to + // `true` to temporarily fix this issue. + // FIXME: This temporary fix can be removed after fully implementing + // p0588r1. + bool Prev = EvaluateConstraints; + EvaluateConstraints = true; + StmtResult Stmt = inherited::TransformLambdaBody(E, Body); + EvaluateConstraints = Prev; + return Stmt; + } + ExprResult TransformRequiresExpr(RequiresExpr *E) { LocalInstantiationScope Scope(SemaRef, /*CombineWithOuterScope=*/true); ExprResult TransReq = inherited::TransformRequiresExpr(E); @@ -1630,9 +1648,7 @@ bool TemplateInstantiator::TransformExceptionSpec( SourceLocation Loc, FunctionProtoType::ExceptionSpecInfo &ESI, SmallVectorImpl<QualType> &Exceptions, bool &Changed) { if (ESI.Type == EST_Uninstantiated) { - ESI.NoexceptExpr = cast<FunctionProtoType>(ESI.SourceTemplate->getType()) - ->getNoexceptExpr(); - ESI.Type = EST_DependentNoexcept; + ESI.instantiate(); Changed = true; } return inherited::TransformExceptionSpec(Loc, ESI, Exceptions, Changed); @@ -2499,6 +2515,17 @@ TemplateInstantiator::TransformNestedRequirement( Req->getConstraintExpr()->getBeginLoc(), Req, Sema::InstantiatingTemplate::ConstraintsCheck{}, Req->getConstraintExpr()->getSourceRange()); + if (!getEvaluateConstraints()) { + ExprResult TransConstraint = TransformExpr(Req->getConstraintExpr()); + if (TransConstraint.isInvalid() || !TransConstraint.get()) + return nullptr; + if (TransConstraint.get()->isInstantiationDependent()) + return new (SemaRef.Context) + concepts::NestedRequirement(TransConstraint.get()); + ConstraintSatisfaction Satisfaction; + return new (SemaRef.Context) concepts::NestedRequirement( + SemaRef.Context, TransConstraint.get(), Satisfaction); + } ExprResult TransConstraint; ConstraintSatisfaction Satisfaction; @@ -4093,13 +4120,19 @@ Sema::SubstExpr(Expr *E, const MultiLevelTemplateArgumentList &TemplateArgs) { ExprResult Sema::SubstConstraintExpr(Expr *E, const MultiLevelTemplateArgumentList &TemplateArgs) { + // FIXME: should call SubstExpr directly if this function is equivalent or + // should it be different? + return SubstExpr(E, TemplateArgs); +} + +ExprResult Sema::SubstConstraintExprWithoutSatisfaction( + Expr *E, const MultiLevelTemplateArgumentList &TemplateArgs) { if (!E) return E; - // This is where we need to make sure we 'know' constraint checking needs to - // happen. TemplateInstantiator Instantiator(*this, TemplateArgs, SourceLocation(), DeclarationName()); + Instantiator.setEvaluateConstraints(false); return Instantiator.TransformExpr(E); } diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h index 7df5bf0cb713..c8c5a51bf9f9 100644 --- a/clang/lib/Sema/TreeTransform.h +++ b/clang/lib/Sema/TreeTransform.h @@ -674,6 +674,10 @@ public: Qualifiers ThisTypeQuals, Fn TransformExceptionSpec); + template <typename Fn> + QualType TransformAttributedType(TypeLocBuilder &TLB, AttributedTypeLoc TL, + Fn TransformModifiedType); + bool TransformExceptionSpec(SourceLocation Loc, FunctionProtoType::ExceptionSpecInfo &ESI, SmallVectorImpl<QualType> &Exceptions, @@ -7050,12 +7054,12 @@ TreeTransform<Derived>::TransformElaboratedType(TypeLocBuilder &TLB, return Result; } -template<typename Derived> +template <typename Derived> +template <typename Fn> QualType TreeTransform<Derived>::TransformAttributedType( - TypeLocBuilder &TLB, - AttributedTypeLoc TL) { + TypeLocBuilder &TLB, AttributedTypeLoc TL, Fn TransformModifiedTypeFn) { const AttributedType *oldType = TL.getTypePtr(); - QualType modifiedType = getDerived().TransformType(TLB, TL.getModifiedLoc()); + QualType modifiedType = TransformModifiedTypeFn(TLB, TL.getModifiedLoc()); if (modifiedType.isNull()) return QualType(); @@ -7100,6 +7104,15 @@ QualType TreeTransform<Derived>::TransformAttributedType( } template <typename Derived> +QualType TreeTransform<Derived>::TransformAttributedType(TypeLocBuilder &TLB, + AttributedTypeLoc TL) { + return getDerived().TransformAttributedType( + TLB, TL, [&](TypeLocBuilder &TLB, TypeLoc ModifiedLoc) -> QualType { + return getDerived().TransformType(TLB, ModifiedLoc); + }); +} + +template <typename Derived> QualType TreeTransform<Derived>::TransformBTFTagAttributedType( TypeLocBuilder &TLB, BTFTagAttributedTypeLoc TL) { // The BTFTagAttributedType is available for C only. @@ -13600,32 +13613,56 @@ TreeTransform<Derived>::TransformLambdaExpr(LambdaExpr *E) { // transformed parameters. TypeSourceInfo *NewCallOpTSI = nullptr; { - TypeSourceInfo *OldCallOpTSI = E->getCallOperator()->getTypeSourceInfo(); - auto OldCallOpFPTL = - OldCallOpTSI->getTypeLoc().getAs<FunctionProtoTypeLoc>(); + auto OldCallOpTypeLoc = + E->getCallOperator()->getTypeSourceInfo()->getTypeLoc(); + + auto TransformFunctionProtoTypeLoc = + [this](TypeLocBuilder &TLB, FunctionProtoTypeLoc FPTL) -> QualType { + SmallVector<QualType, 4> ExceptionStorage; + TreeTransform *This = this; // Work around gcc.gnu.org/PR56135. + return this->TransformFunctionProtoType( + TLB, FPTL, nullptr, Qualifiers(), + [&](FunctionProtoType::ExceptionSpecInfo &ESI, bool &Changed) { + return This->TransformExceptionSpec(FPTL.getBeginLoc(), ESI, + ExceptionStorage, Changed); + }); + }; + QualType NewCallOpType; TypeLocBuilder NewCallOpTLBuilder; - SmallVector<QualType, 4> ExceptionStorage; - TreeTransform *This = this; // Work around gcc.gnu.org/PR56135. - QualType NewCallOpType = TransformFunctionProtoType( - NewCallOpTLBuilder, OldCallOpFPTL, nullptr, Qualifiers(), - [&](FunctionProtoType::ExceptionSpecInfo &ESI, bool &Changed) { - return This->TransformExceptionSpec(OldCallOpFPTL.getBeginLoc(), ESI, - ExceptionStorage, Changed); - }); + + if (auto ATL = OldCallOpTypeLoc.getAs<AttributedTypeLoc>()) { + NewCallOpType = this->TransformAttributedType( + NewCallOpTLBuilder, ATL, + [&](TypeLocBuilder &TLB, TypeLoc TL) -> QualType { + return TransformFunctionProtoTypeLoc( + TLB, TL.castAs<FunctionProtoTypeLoc>()); + }); + } else { + auto FPTL = OldCallOpTypeLoc.castAs<FunctionProtoTypeLoc>(); + NewCallOpType = TransformFunctionProtoTypeLoc(NewCallOpTLBuilder, FPTL); + } + if (NewCallOpType.isNull()) return ExprError(); NewCallOpTSI = NewCallOpTLBuilder.getTypeSourceInfo(getSema().Context, NewCallOpType); } + ArrayRef<ParmVarDecl *> Params; + if (auto ATL = NewCallOpTSI->getTypeLoc().getAs<AttributedTypeLoc>()) { + Params = ATL.getModifiedLoc().castAs<FunctionProtoTypeLoc>().getParams(); + } else { + auto FPTL = NewCallOpTSI->getTypeLoc().castAs<FunctionProtoTypeLoc>(); + Params = FPTL.getParams(); + } + getSema().CompleteLambdaCallOperator( NewCallOperator, E->getCallOperator()->getLocation(), E->getCallOperator()->getInnerLocStart(), E->getCallOperator()->getTrailingRequiresClause(), NewCallOpTSI, E->getCallOperator()->getConstexprKind(), - E->getCallOperator()->getStorageClass(), - NewCallOpTSI->getTypeLoc().castAs<FunctionProtoTypeLoc>().getParams(), + E->getCallOperator()->getStorageClass(), Params, E->hasExplicitResultType()); getDerived().transformAttrs(E->getCallOperator(), NewCallOperator); @@ -13648,10 +13685,17 @@ TreeTransform<Derived>::TransformLambdaExpr(LambdaExpr *E) { getSema().PushExpressionEvaluationContext( Sema::ExpressionEvaluationContext::PotentiallyEvaluated); + Sema::CodeSynthesisContext C; + C.Kind = clang::Sema::CodeSynthesisContext::LambdaExpressionSubstitution; + C.PointOfInstantiation = E->getBody()->getBeginLoc(); + getSema().pushCodeSynthesisContext(C); + // Instantiate the body of the lambda expression. StmtResult Body = Invalid ? StmtError() : getDerived().TransformLambdaBody(E, E->getBody()); + getSema().popCodeSynthesisContext(); + // ActOnLambda* will pop the function scope for us. FuncScopeCleanup.disable(); diff --git a/clang/lib/StaticAnalyzer/Checkers/StdLibraryFunctionsChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/StdLibraryFunctionsChecker.cpp index 6560fd239ce6..034825d88a44 100644 --- a/clang/lib/StaticAnalyzer/Checkers/StdLibraryFunctionsChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/StdLibraryFunctionsChecker.cpp @@ -2507,16 +2507,30 @@ void StdLibraryFunctionsChecker::initFunctionSummaries( .ArgConstraint(NotNull(ArgNo(0)))); // char *mkdtemp(char *template); - // FIXME: Improve for errno modeling. addToFunctionSummaryMap( "mkdtemp", Signature(ArgTypes{CharPtrTy}, RetType{CharPtrTy}), - Summary(NoEvalCall).ArgConstraint(NotNull(ArgNo(0)))); + Summary(NoEvalCall) + .Case({ReturnValueCondition(BO_EQ, ArgNo(0))}, + ErrnoMustNotBeChecked, GenericSuccessMsg) + .Case({IsNull(Ret)}, ErrnoNEZeroIrrelevant, GenericFailureMsg) + .ArgConstraint(NotNull(ArgNo(0)))); // char *getcwd(char *buf, size_t size); - // FIXME: Improve for errno modeling. addToFunctionSummaryMap( "getcwd", Signature(ArgTypes{CharPtrTy, SizeTy}, RetType{CharPtrTy}), Summary(NoEvalCall) + .Case({ArgumentCondition(1, WithinRange, Range(1, SizeMax)), + ReturnValueCondition(BO_EQ, ArgNo(0))}, + ErrnoMustNotBeChecked, GenericSuccessMsg) + .Case({ArgumentCondition(1, WithinRange, SingleValue(0)), + IsNull(Ret)}, + ErrnoNEZeroIrrelevant, "Assuming that argument 'size' is 0") + .Case({ArgumentCondition(1, WithinRange, Range(1, SizeMax)), + IsNull(Ret)}, + ErrnoNEZeroIrrelevant, GenericFailureMsg) + .ArgConstraint(NotNull(ArgNo(0))) + .ArgConstraint( + BufferSize(/*Buffer*/ ArgNo(0), /*BufSize*/ ArgNo(1))) .ArgConstraint( ArgumentCondition(1, WithinRange, Range(0, SizeMax)))); diff --git a/clang/lib/StaticAnalyzer/Checkers/StreamChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/StreamChecker.cpp index 254b36ed0396..25da3c18e851 100644 --- a/clang/lib/StaticAnalyzer/Checkers/StreamChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/StreamChecker.cpp @@ -239,6 +239,7 @@ public: private: CallDescriptionMap<FnDescription> FnDescriptions = { {{{"fopen"}, 2}, {nullptr, &StreamChecker::evalFopen, ArgNone}}, + {{{"fdopen"}, 2}, {nullptr, &StreamChecker::evalFopen, ArgNone}}, {{{"freopen"}, 3}, {&StreamChecker::preFreopen, &StreamChecker::evalFreopen, 2}}, {{{"tmpfile"}, 0}, {nullptr, &StreamChecker::evalFopen, ArgNone}}, diff --git a/clang/lib/Tooling/Tooling.cpp b/clang/lib/Tooling/Tooling.cpp index d192c7f42939..d82cd5e886e4 100644 --- a/clang/lib/Tooling/Tooling.cpp +++ b/clang/lib/Tooling/Tooling.cpp @@ -554,6 +554,8 @@ int ClangTool::run(ToolAction *Action) { << CWD.getError().message() << "\n"; } + size_t NumOfTotalFiles = AbsolutePaths.size(); + unsigned ProcessedFileCounter = 0; for (llvm::StringRef File : AbsolutePaths) { // Currently implementations of CompilationDatabase::getCompileCommands can // change the state of the file system (e.g. prepare generated headers), so @@ -609,7 +611,11 @@ int ClangTool::run(ToolAction *Action) { // FIXME: We need a callback mechanism for the tool writer to output a // customized message for each file. - LLVM_DEBUG({ llvm::dbgs() << "Processing: " << File << ".\n"; }); + if (NumOfTotalFiles > 1) + llvm::errs() << "[" + std::to_string(++ProcessedFileCounter) + "/" + + std::to_string(NumOfTotalFiles) + + "] Processing file " + File + << ".\n"; ToolInvocation Invocation(std::move(CommandLine), Action, Files.get(), PCHContainerOps); Invocation.setDiagnosticConsumer(DiagConsumer); diff --git a/clang/utils/TableGen/ClangASTNodesEmitter.cpp b/clang/utils/TableGen/ClangASTNodesEmitter.cpp index 16a1c74b9d91..07ddafce3291 100644 --- a/clang/utils/TableGen/ClangASTNodesEmitter.cpp +++ b/clang/utils/TableGen/ClangASTNodesEmitter.cpp @@ -33,6 +33,7 @@ class ClangASTNodesEmitter { typedef std::multimap<ASTNode, ASTNode> ChildMap; typedef ChildMap::const_iterator ChildIterator; + std::set<ASTNode> PrioritizedClasses; RecordKeeper &Records; ASTNode Root; const std::string &NodeClassName; @@ -70,8 +71,16 @@ class ClangASTNodesEmitter { std::pair<ASTNode, ASTNode> EmitNode(raw_ostream& OS, ASTNode Base); public: explicit ClangASTNodesEmitter(RecordKeeper &R, const std::string &N, - const std::string &S) - : Records(R), NodeClassName(N), BaseSuffix(S) {} + const std::string &S, + std::string_view PriorizeIfSubclassOf) + : Records(R), NodeClassName(N), BaseSuffix(S) { + auto vecPrioritized = + PriorizeIfSubclassOf.empty() + ? std::vector<Record *>{} + : R.getAllDerivedDefinitions(PriorizeIfSubclassOf); + PrioritizedClasses = + std::set<ASTNode>(vecPrioritized.begin(), vecPrioritized.end()); + } // run - Output the .inc file contents void run(raw_ostream &OS); @@ -95,8 +104,23 @@ std::pair<ASTNode, ASTNode> ClangASTNodesEmitter::EmitNode(raw_ostream &OS, if (!Base.isAbstract()) First = Last = Base; + auto comp = [this](ASTNode LHS, ASTNode RHS) { + auto LHSPrioritized = PrioritizedClasses.count(LHS) > 0; + auto RHSPrioritized = PrioritizedClasses.count(RHS) > 0; + if (LHSPrioritized && !RHSPrioritized) + return true; + if (!LHSPrioritized && RHSPrioritized) + return false; + + return LHS.getName() > RHS.getName(); + }; + auto SortedChildren = std::set<ASTNode, decltype(comp)>(comp); + for (; i != e; ++i) { - ASTNode Child = i->second; + SortedChildren.insert(i->second); + } + + for (const auto &Child : SortedChildren) { bool Abstract = Child.isAbstract(); std::string NodeName = macroName(std::string(Child.getName())); @@ -148,9 +172,7 @@ void ClangASTNodesEmitter::deriveChildTree() { const std::vector<Record*> Stmts = Records.getAllDerivedDefinitions(NodeClassName); - for (unsigned i = 0, e = Stmts.size(); i != e; ++i) { - Record *R = Stmts[i]; - + for (auto *R : Stmts) { if (auto B = R->getValueAsOptionalDef(BaseFieldName)) Tree.insert(std::make_pair(B, R)); else if (Root) @@ -182,9 +204,9 @@ void ClangASTNodesEmitter::run(raw_ostream &OS) { OS << "#endif\n\n"; OS << "#ifndef LAST_" << macroHierarchyName() << "_RANGE\n"; - OS << "# define LAST_" - << macroHierarchyName() << "_RANGE(Base, First, Last) " - << macroHierarchyName() << "_RANGE(Base, First, Last)\n"; + OS << "# define LAST_" << macroHierarchyName() + << "_RANGE(Base, First, Last) " << macroHierarchyName() + << "_RANGE(Base, First, Last)\n"; OS << "#endif\n\n"; EmitNode(OS, Root); @@ -196,8 +218,20 @@ void ClangASTNodesEmitter::run(raw_ostream &OS) { } void clang::EmitClangASTNodes(RecordKeeper &RK, raw_ostream &OS, - const std::string &N, const std::string &S) { - ClangASTNodesEmitter(RK, N, S).run(OS); + const std::string &N, const std::string &S, + std::string_view PriorizeIfSubclassOf) { + ClangASTNodesEmitter(RK, N, S, PriorizeIfSubclassOf).run(OS); +} + +void printDeclContext(const std::multimap<Record *, Record *> &Tree, + Record *DeclContext, raw_ostream &OS) { + if (!DeclContext->getValueAsBit(AbstractFieldName)) + OS << "DECL_CONTEXT(" << DeclContext->getName() << ")\n"; + auto i = Tree.lower_bound(DeclContext); + auto end = Tree.upper_bound(DeclContext); + for (; i != end; ++i) { + printDeclContext(Tree, i->second, OS); + } } // Emits and addendum to a .inc file to enumerate the clang declaration @@ -210,38 +244,25 @@ void clang::EmitClangDeclContext(RecordKeeper &Records, raw_ostream &OS) { OS << "#ifndef DECL_CONTEXT\n"; OS << "# define DECL_CONTEXT(DECL)\n"; OS << "#endif\n"; - - OS << "#ifndef DECL_CONTEXT_BASE\n"; - OS << "# define DECL_CONTEXT_BASE(DECL) DECL_CONTEXT(DECL)\n"; - OS << "#endif\n"; - - typedef std::set<Record*> RecordSet; - typedef std::vector<Record*> RecordVector; - - RecordVector DeclContextsVector - = Records.getAllDerivedDefinitions(DeclContextNodeClassName); - RecordVector Decls = Records.getAllDerivedDefinitions(DeclNodeClassName); - RecordSet DeclContexts (DeclContextsVector.begin(), DeclContextsVector.end()); - - for (RecordVector::iterator i = Decls.begin(), e = Decls.end(); i != e; ++i) { - Record *R = *i; - - if (Record *B = R->getValueAsOptionalDef(BaseFieldName)) { - if (DeclContexts.find(B) != DeclContexts.end()) { - OS << "DECL_CONTEXT_BASE(" << B->getName() << ")\n"; - DeclContexts.erase(B); - } - } + + std::vector<Record *> DeclContextsVector = + Records.getAllDerivedDefinitions(DeclContextNodeClassName); + std::vector<Record *> Decls = + Records.getAllDerivedDefinitions(DeclNodeClassName); + + std::multimap<Record *, Record *> Tree; + + const std::vector<Record *> Stmts = + Records.getAllDerivedDefinitions(DeclNodeClassName); + + for (auto *R : Stmts) { + if (auto *B = R->getValueAsOptionalDef(BaseFieldName)) + Tree.insert(std::make_pair(B, R)); } - // To keep identical order, RecordVector may be used - // instead of RecordSet. - for (RecordVector::iterator - i = DeclContextsVector.begin(), e = DeclContextsVector.end(); - i != e; ++i) - if (DeclContexts.find(*i) != DeclContexts.end()) - OS << "DECL_CONTEXT(" << (*i)->getName() << ")\n"; + for (auto *DeclContext : DeclContextsVector) { + printDeclContext(Tree, DeclContext, OS); + } OS << "#undef DECL_CONTEXT\n"; - OS << "#undef DECL_CONTEXT_BASE\n"; } diff --git a/clang/utils/TableGen/SveEmitter.cpp b/clang/utils/TableGen/SveEmitter.cpp index 6c302da106a2..5de2223e71b0 100644 --- a/clang/utils/TableGen/SveEmitter.cpp +++ b/clang/utils/TableGen/SveEmitter.cpp @@ -1773,11 +1773,14 @@ void SVEEmitter::createStreamingAttrs(raw_ostream &OS, ACLEKind Kind) { llvm::StringMap<std::set<std::string>> StreamingMap; uint64_t IsStreamingFlag = getEnumValueForFlag("IsStreaming"); + uint64_t IsStreamingOrSVE2p1Flag = getEnumValueForFlag("IsStreamingOrSVE2p1"); uint64_t IsStreamingCompatibleFlag = getEnumValueForFlag("IsStreamingCompatible"); for (auto &Def : Defs) { if (Def->isFlagSet(IsStreamingFlag)) StreamingMap["ArmStreaming"].insert(Def->getMangledName()); + else if (Def->isFlagSet(IsStreamingOrSVE2p1Flag)) + StreamingMap["ArmStreamingOrSVE2p1"].insert(Def->getMangledName()); else if (Def->isFlagSet(IsStreamingCompatibleFlag)) StreamingMap["ArmStreamingCompatible"].insert(Def->getMangledName()); else diff --git a/clang/utils/TableGen/TableGen.cpp b/clang/utils/TableGen/TableGen.cpp index c1f2ca15b595..3859555d647f 100644 --- a/clang/utils/TableGen/TableGen.cpp +++ b/clang/utils/TableGen/TableGen.cpp @@ -398,7 +398,8 @@ bool ClangTableGenMain(raw_ostream &OS, RecordKeeper &Records) { EmitClangASTNodes(Records, OS, CommentNodeClassName, ""); break; case GenClangDeclNodes: - EmitClangASTNodes(Records, OS, DeclNodeClassName, "Decl"); + EmitClangASTNodes(Records, OS, DeclNodeClassName, "Decl", + DeclContextNodeClassName); EmitClangDeclContext(Records, OS); break; case GenClangStmtNodes: diff --git a/clang/utils/TableGen/TableGenBackends.h b/clang/utils/TableGen/TableGenBackends.h index 35f2f04c1e81..faa0c5d2cff9 100644 --- a/clang/utils/TableGen/TableGenBackends.h +++ b/clang/utils/TableGen/TableGenBackends.h @@ -25,8 +25,16 @@ class RecordKeeper; namespace clang { void EmitClangDeclContext(llvm::RecordKeeper &RK, llvm::raw_ostream &OS); +/** + @param PriorizeIfSubclassOf These classes should be prioritized in the output. + This is useful to force enum generation/jump tables/lookup tables to be more + compact in both size and surrounding code in hot functions. An example use is + in Decl for classes that inherit from DeclContext, for functions like + castFromDeclContext. + */ void EmitClangASTNodes(llvm::RecordKeeper &RK, llvm::raw_ostream &OS, - const std::string &N, const std::string &S); + const std::string &N, const std::string &S, + std::string_view PriorizeIfSubclassOf = ""); void EmitClangBasicReader(llvm::RecordKeeper &Records, llvm::raw_ostream &OS); void EmitClangBasicWriter(llvm::RecordKeeper &Records, llvm::raw_ostream &OS); void EmitClangTypeNodes(llvm::RecordKeeper &Records, llvm::raw_ostream &OS); diff --git a/compiler-rt/lib/asan/asan_suppressions.cpp b/compiler-rt/lib/asan/asan_suppressions.cpp index 8cb2c3e3b9b6..e71d23182186 100644 --- a/compiler-rt/lib/asan/asan_suppressions.cpp +++ b/compiler-rt/lib/asan/asan_suppressions.cpp @@ -81,9 +81,10 @@ bool IsStackTraceSuppressed(const StackTrace *stack) { } if (suppression_ctx->HasSuppressionType(kInterceptorViaFunction)) { - SymbolizedStack *frames = symbolizer->SymbolizePC(addr); + SymbolizedStackHolder symbolized_stack(symbolizer->SymbolizePC(addr)); + const SymbolizedStack *frames = symbolized_stack.get(); CHECK(frames); - for (SymbolizedStack *cur = frames; cur; cur = cur->next) { + for (const SymbolizedStack *cur = frames; cur; cur = cur->next) { const char *function_name = cur->info.function; if (!function_name) { continue; @@ -91,11 +92,9 @@ bool IsStackTraceSuppressed(const StackTrace *stack) { // Match "interceptor_via_fun" suppressions. if (suppression_ctx->Match(function_name, kInterceptorViaFunction, &s)) { - frames->ClearAll(); return true; } } - frames->ClearAll(); } } return false; diff --git a/compiler-rt/lib/hwasan/hwasan_report.cpp b/compiler-rt/lib/hwasan/hwasan_report.cpp index 1a018a891b56..784cfb904aa2 100644 --- a/compiler-rt/lib/hwasan/hwasan_report.cpp +++ b/compiler-rt/lib/hwasan/hwasan_report.cpp @@ -292,12 +292,14 @@ static void PrintStackAllocations(const StackAllocationsRingBuffer *sa, uptr pc = record & pc_mask; frame_desc.AppendF(" record_addr:0x%zx record:0x%zx", reinterpret_cast<uptr>(record_addr), record); - if (SymbolizedStack *frame = Symbolizer::GetOrInit()->SymbolizePC(pc)) { + SymbolizedStackHolder symbolized_stack( + Symbolizer::GetOrInit()->SymbolizePC(pc)); + const SymbolizedStack *frame = symbolized_stack.get(); + if (frame) { StackTracePrinter::GetOrInit()->RenderFrame( &frame_desc, " %F %L", 0, frame->info.address, &frame->info, common_flags()->symbolize_vs_style, common_flags()->strip_path_prefix); - frame->ClearAll(); } Printf("%s\n", frame_desc.data()); frame_desc.clear(); diff --git a/compiler-rt/lib/lsan/lsan_common.cpp b/compiler-rt/lib/lsan/lsan_common.cpp index e24839c984b3..0ecded8b28cd 100644 --- a/compiler-rt/lib/lsan/lsan_common.cpp +++ b/compiler-rt/lib/lsan/lsan_common.cpp @@ -155,14 +155,15 @@ Suppression *LeakSuppressionContext::GetSuppressionForAddr(uptr addr) { return s; // Suppress by file or function name. - SymbolizedStack *frames = Symbolizer::GetOrInit()->SymbolizePC(addr); - for (SymbolizedStack *cur = frames; cur; cur = cur->next) { + SymbolizedStackHolder symbolized_stack( + Symbolizer::GetOrInit()->SymbolizePC(addr)); + const SymbolizedStack *frames = symbolized_stack.get(); + for (const SymbolizedStack *cur = frames; cur; cur = cur->next) { if (context.Match(cur->info.function, kSuppressionLeak, &s) || context.Match(cur->info.file, kSuppressionLeak, &s)) { break; } } - frames->ClearAll(); return s; } diff --git a/compiler-rt/lib/msan/msan.h b/compiler-rt/lib/msan/msan.h index 25fa2212bdad..710447a3e1a3 100644 --- a/compiler-rt/lib/msan/msan.h +++ b/compiler-rt/lib/msan/msan.h @@ -255,18 +255,19 @@ char *GetProcSelfMaps(); void InitializeInterceptors(); void MsanAllocatorInit(); -void MsanDeallocate(StackTrace *stack, void *ptr); - -void *msan_malloc(uptr size, StackTrace *stack); -void *msan_calloc(uptr nmemb, uptr size, StackTrace *stack); -void *msan_realloc(void *ptr, uptr size, StackTrace *stack); -void *msan_reallocarray(void *ptr, uptr nmemb, uptr size, StackTrace *stack); -void *msan_valloc(uptr size, StackTrace *stack); -void *msan_pvalloc(uptr size, StackTrace *stack); -void *msan_aligned_alloc(uptr alignment, uptr size, StackTrace *stack); -void *msan_memalign(uptr alignment, uptr size, StackTrace *stack); +void MsanDeallocate(BufferedStackTrace *stack, void *ptr); + +void *msan_malloc(uptr size, BufferedStackTrace *stack); +void *msan_calloc(uptr nmemb, uptr size, BufferedStackTrace *stack); +void *msan_realloc(void *ptr, uptr size, BufferedStackTrace *stack); +void *msan_reallocarray(void *ptr, uptr nmemb, uptr size, + BufferedStackTrace *stack); +void *msan_valloc(uptr size, BufferedStackTrace *stack); +void *msan_pvalloc(uptr size, BufferedStackTrace *stack); +void *msan_aligned_alloc(uptr alignment, uptr size, BufferedStackTrace *stack); +void *msan_memalign(uptr alignment, uptr size, BufferedStackTrace *stack); int msan_posix_memalign(void **memptr, uptr alignment, uptr size, - StackTrace *stack); + BufferedStackTrace *stack); void InstallTrapHandler(); void InstallAtExitHandler(); @@ -321,6 +322,17 @@ const int STACK_TRACE_TAG_VPTR = STACK_TRACE_TAG_FIELDS + 1; stack.Unwind(pc, bp, nullptr, common_flags()->fast_unwind_on_fatal); \ } +#define GET_FATAL_STACK_TRACE \ + GET_FATAL_STACK_TRACE_PC_BP(StackTrace::GetCurrentPc(), GET_CURRENT_FRAME()) + +// Unwind the stack for fatal error, as the parameter `stack` is +// empty without origins. +#define GET_FATAL_STACK_TRACE_IF_EMPTY(STACK) \ + if (msan_inited && (STACK)->size == 0) { \ + (STACK)->Unwind(StackTrace::GetCurrentPc(), GET_CURRENT_FRAME(), nullptr, \ + common_flags()->fast_unwind_on_fatal); \ + } + class ScopedThreadLocalStateBackup { public: ScopedThreadLocalStateBackup() { Backup(); } diff --git a/compiler-rt/lib/msan/msan_allocator.cpp b/compiler-rt/lib/msan/msan_allocator.cpp index 72a7f980d39f..0b2dd2b2f188 100644 --- a/compiler-rt/lib/msan/msan_allocator.cpp +++ b/compiler-rt/lib/msan/msan_allocator.cpp @@ -178,18 +178,20 @@ void MsanThreadLocalMallocStorage::CommitBack() { allocator.DestroyCache(GetAllocatorCache(this)); } -static void *MsanAllocate(StackTrace *stack, uptr size, uptr alignment, +static void *MsanAllocate(BufferedStackTrace *stack, uptr size, uptr alignment, bool zeroise) { - if (size > max_malloc_size) { + if (UNLIKELY(size > max_malloc_size)) { if (AllocatorMayReturnNull()) { Report("WARNING: MemorySanitizer failed to allocate 0x%zx bytes\n", size); return nullptr; } + GET_FATAL_STACK_TRACE_IF_EMPTY(stack); ReportAllocationSizeTooBig(size, max_malloc_size, stack); } if (UNLIKELY(IsRssLimitExceeded())) { if (AllocatorMayReturnNull()) return nullptr; + GET_FATAL_STACK_TRACE_IF_EMPTY(stack); ReportRssLimitExceeded(stack); } MsanThread *t = GetCurrentThread(); @@ -206,6 +208,7 @@ static void *MsanAllocate(StackTrace *stack, uptr size, uptr alignment, SetAllocatorOutOfMemory(); if (AllocatorMayReturnNull()) return nullptr; + GET_FATAL_STACK_TRACE_IF_EMPTY(stack); ReportOutOfMemory(size, stack); } Metadata *meta = @@ -229,7 +232,7 @@ static void *MsanAllocate(StackTrace *stack, uptr size, uptr alignment, return allocated; } -void MsanDeallocate(StackTrace *stack, void *p) { +void MsanDeallocate(BufferedStackTrace *stack, void *p) { CHECK(p); UnpoisonParam(1); RunFreeHooks(p); @@ -259,8 +262,8 @@ void MsanDeallocate(StackTrace *stack, void *p) { } } -static void *MsanReallocate(StackTrace *stack, void *old_p, uptr new_size, - uptr alignment) { +static void *MsanReallocate(BufferedStackTrace *stack, void *old_p, + uptr new_size, uptr alignment) { Metadata *meta = reinterpret_cast<Metadata*>(allocator.GetMetaData(old_p)); uptr old_size = meta->requested_size; uptr actually_allocated_size = allocator.GetActuallyAllocatedSize(old_p); @@ -284,10 +287,11 @@ static void *MsanReallocate(StackTrace *stack, void *old_p, uptr new_size, return new_p; } -static void *MsanCalloc(StackTrace *stack, uptr nmemb, uptr size) { +static void *MsanCalloc(BufferedStackTrace *stack, uptr nmemb, uptr size) { if (UNLIKELY(CheckForCallocOverflow(size, nmemb))) { if (AllocatorMayReturnNull()) return nullptr; + GET_FATAL_STACK_TRACE_IF_EMPTY(stack); ReportCallocOverflow(nmemb, size, stack); } return MsanAllocate(stack, nmemb * size, sizeof(u64), true); @@ -320,15 +324,15 @@ static uptr AllocationSizeFast(const void *p) { return reinterpret_cast<Metadata *>(allocator.GetMetaData(p))->requested_size; } -void *msan_malloc(uptr size, StackTrace *stack) { +void *msan_malloc(uptr size, BufferedStackTrace *stack) { return SetErrnoOnNull(MsanAllocate(stack, size, sizeof(u64), false)); } -void *msan_calloc(uptr nmemb, uptr size, StackTrace *stack) { +void *msan_calloc(uptr nmemb, uptr size, BufferedStackTrace *stack) { return SetErrnoOnNull(MsanCalloc(stack, nmemb, size)); } -void *msan_realloc(void *ptr, uptr size, StackTrace *stack) { +void *msan_realloc(void *ptr, uptr size, BufferedStackTrace *stack) { if (!ptr) return SetErrnoOnNull(MsanAllocate(stack, size, sizeof(u64), false)); if (size == 0) { @@ -338,26 +342,29 @@ void *msan_realloc(void *ptr, uptr size, StackTrace *stack) { return SetErrnoOnNull(MsanReallocate(stack, ptr, size, sizeof(u64))); } -void *msan_reallocarray(void *ptr, uptr nmemb, uptr size, StackTrace *stack) { +void *msan_reallocarray(void *ptr, uptr nmemb, uptr size, + BufferedStackTrace *stack) { if (UNLIKELY(CheckForCallocOverflow(size, nmemb))) { errno = errno_ENOMEM; if (AllocatorMayReturnNull()) return nullptr; + GET_FATAL_STACK_TRACE_IF_EMPTY(stack); ReportReallocArrayOverflow(nmemb, size, stack); } return msan_realloc(ptr, nmemb * size, stack); } -void *msan_valloc(uptr size, StackTrace *stack) { +void *msan_valloc(uptr size, BufferedStackTrace *stack) { return SetErrnoOnNull(MsanAllocate(stack, size, GetPageSizeCached(), false)); } -void *msan_pvalloc(uptr size, StackTrace *stack) { +void *msan_pvalloc(uptr size, BufferedStackTrace *stack) { uptr PageSize = GetPageSizeCached(); if (UNLIKELY(CheckForPvallocOverflow(size, PageSize))) { errno = errno_ENOMEM; if (AllocatorMayReturnNull()) return nullptr; + GET_FATAL_STACK_TRACE_IF_EMPTY(stack); ReportPvallocOverflow(size, stack); } // pvalloc(0) should allocate one page. @@ -365,31 +372,34 @@ void *msan_pvalloc(uptr size, StackTrace *stack) { return SetErrnoOnNull(MsanAllocate(stack, size, PageSize, false)); } -void *msan_aligned_alloc(uptr alignment, uptr size, StackTrace *stack) { +void *msan_aligned_alloc(uptr alignment, uptr size, BufferedStackTrace *stack) { if (UNLIKELY(!CheckAlignedAllocAlignmentAndSize(alignment, size))) { errno = errno_EINVAL; if (AllocatorMayReturnNull()) return nullptr; + GET_FATAL_STACK_TRACE_IF_EMPTY(stack); ReportInvalidAlignedAllocAlignment(size, alignment, stack); } return SetErrnoOnNull(MsanAllocate(stack, size, alignment, false)); } -void *msan_memalign(uptr alignment, uptr size, StackTrace *stack) { +void *msan_memalign(uptr alignment, uptr size, BufferedStackTrace *stack) { if (UNLIKELY(!IsPowerOfTwo(alignment))) { errno = errno_EINVAL; if (AllocatorMayReturnNull()) return nullptr; + GET_FATAL_STACK_TRACE_IF_EMPTY(stack); ReportInvalidAllocationAlignment(alignment, stack); } return SetErrnoOnNull(MsanAllocate(stack, size, alignment, false)); } int msan_posix_memalign(void **memptr, uptr alignment, uptr size, - StackTrace *stack) { + BufferedStackTrace *stack) { if (UNLIKELY(!CheckPosixMemalignAlignment(alignment))) { if (AllocatorMayReturnNull()) return errno_EINVAL; + GET_FATAL_STACK_TRACE_IF_EMPTY(stack); ReportInvalidPosixMemalignAlignment(alignment, stack); } void *ptr = MsanAllocate(stack, size, alignment, false); diff --git a/compiler-rt/lib/msan/msan_new_delete.cpp b/compiler-rt/lib/msan/msan_new_delete.cpp index d4e95c0f6513..7daa55474b7d 100644 --- a/compiler-rt/lib/msan/msan_new_delete.cpp +++ b/compiler-rt/lib/msan/msan_new_delete.cpp @@ -30,16 +30,22 @@ namespace std { // TODO(alekseys): throw std::bad_alloc instead of dying on OOM. -#define OPERATOR_NEW_BODY(nothrow) \ - GET_MALLOC_STACK_TRACE; \ - void *res = msan_malloc(size, &stack);\ - if (!nothrow && UNLIKELY(!res)) ReportOutOfMemory(size, &stack);\ - return res -#define OPERATOR_NEW_BODY_ALIGN(nothrow) \ - GET_MALLOC_STACK_TRACE;\ - void *res = msan_memalign((uptr)align, size, &stack);\ - if (!nothrow && UNLIKELY(!res)) ReportOutOfMemory(size, &stack);\ - return res; +# define OPERATOR_NEW_BODY(nothrow) \ + GET_MALLOC_STACK_TRACE; \ + void *res = msan_malloc(size, &stack); \ + if (!nothrow && UNLIKELY(!res)) { \ + GET_FATAL_STACK_TRACE_IF_EMPTY(&stack); \ + ReportOutOfMemory(size, &stack); \ + } \ + return res +# define OPERATOR_NEW_BODY_ALIGN(nothrow) \ + GET_MALLOC_STACK_TRACE; \ + void *res = msan_memalign((uptr)align, size, &stack); \ + if (!nothrow && UNLIKELY(!res)) { \ + GET_FATAL_STACK_TRACE_IF_EMPTY(&stack); \ + ReportOutOfMemory(size, &stack); \ + } \ + return res; INTERCEPTOR_ATTRIBUTE void *operator new(size_t size) { OPERATOR_NEW_BODY(false /*nothrow*/); } diff --git a/compiler-rt/lib/orc/executor_symbol_def.h b/compiler-rt/lib/orc/executor_symbol_def.h new file mode 100644 index 000000000000..454cefe525cf --- /dev/null +++ b/compiler-rt/lib/orc/executor_symbol_def.h @@ -0,0 +1,151 @@ +//===--------- ExecutorSymbolDef.h - (Addr, Flags) pair ---------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Represents a defining location for a symbol in the executing program. +// +// This file was derived from +// llvm/include/llvm/ExecutionEngine/Orc/Shared/ExecutorSymbolDef.h. +// +//===----------------------------------------------------------------------===// + +#ifndef ORC_RT_EXECUTOR_SYMBOL_DEF_H +#define ORC_RT_EXECUTOR_SYMBOL_DEF_H + +#include "bitmask_enum.h" +#include "executor_address.h" +#include "simple_packed_serialization.h" + +namespace __orc_rt { + +/// Flags for symbols in the JIT. +class JITSymbolFlags { +public: + using UnderlyingType = uint8_t; + using TargetFlagsType = uint8_t; + + /// These values must be kept in sync with \c JITSymbolFlags in the JIT. + enum FlagNames : UnderlyingType { + None = 0, + HasError = 1U << 0, + Weak = 1U << 1, + Common = 1U << 2, + Absolute = 1U << 3, + Exported = 1U << 4, + Callable = 1U << 5, + MaterializationSideEffectsOnly = 1U << 6, + ORC_RT_MARK_AS_BITMASK_ENUM( // LargestValue = + MaterializationSideEffectsOnly) + }; + + /// Default-construct a JITSymbolFlags instance. + JITSymbolFlags() = default; + + /// Construct a JITSymbolFlags instance from the given flags and target + /// flags. + JITSymbolFlags(FlagNames Flags, TargetFlagsType TargetFlags) + : TargetFlags(TargetFlags), Flags(Flags) {} + + bool operator==(const JITSymbolFlags &RHS) const { + return Flags == RHS.Flags && TargetFlags == RHS.TargetFlags; + } + + /// Get the underlying flags value as an integer. + UnderlyingType getRawFlagsValue() const { + return static_cast<UnderlyingType>(Flags); + } + + /// Return a reference to the target-specific flags. + TargetFlagsType &getTargetFlags() { return TargetFlags; } + + /// Return a reference to the target-specific flags. + const TargetFlagsType &getTargetFlags() const { return TargetFlags; } + +private: + TargetFlagsType TargetFlags = 0; + FlagNames Flags = None; +}; + +/// Represents a defining location for a JIT symbol. +class ExecutorSymbolDef { +public: + ExecutorSymbolDef() = default; + ExecutorSymbolDef(ExecutorAddr Addr, JITSymbolFlags Flags) + : Addr(Addr), Flags(Flags) {} + + const ExecutorAddr &getAddress() const { return Addr; } + + const JITSymbolFlags &getFlags() const { return Flags; } + + friend bool operator==(const ExecutorSymbolDef &LHS, + const ExecutorSymbolDef &RHS) { + return LHS.getAddress() == RHS.getAddress() && + LHS.getFlags() == RHS.getFlags(); + } + +private: + ExecutorAddr Addr; + JITSymbolFlags Flags; +}; + +using SPSJITSymbolFlags = + SPSTuple<JITSymbolFlags::UnderlyingType, JITSymbolFlags::TargetFlagsType>; + +/// SPS serializatior for JITSymbolFlags. +template <> class SPSSerializationTraits<SPSJITSymbolFlags, JITSymbolFlags> { + using FlagsArgList = SPSJITSymbolFlags::AsArgList; + +public: + static size_t size(const JITSymbolFlags &F) { + return FlagsArgList::size(F.getRawFlagsValue(), F.getTargetFlags()); + } + + static bool serialize(SPSOutputBuffer &BOB, const JITSymbolFlags &F) { + return FlagsArgList::serialize(BOB, F.getRawFlagsValue(), + F.getTargetFlags()); + } + + static bool deserialize(SPSInputBuffer &BIB, JITSymbolFlags &F) { + JITSymbolFlags::UnderlyingType RawFlags; + JITSymbolFlags::TargetFlagsType TargetFlags; + if (!FlagsArgList::deserialize(BIB, RawFlags, TargetFlags)) + return false; + F = JITSymbolFlags{static_cast<JITSymbolFlags::FlagNames>(RawFlags), + TargetFlags}; + return true; + } +}; + +using SPSExecutorSymbolDef = SPSTuple<SPSExecutorAddr, SPSJITSymbolFlags>; + +/// SPS serializatior for ExecutorSymbolDef. +template <> +class SPSSerializationTraits<SPSExecutorSymbolDef, ExecutorSymbolDef> { + using DefArgList = SPSExecutorSymbolDef::AsArgList; + +public: + static size_t size(const ExecutorSymbolDef &ESD) { + return DefArgList::size(ESD.getAddress(), ESD.getFlags()); + } + + static bool serialize(SPSOutputBuffer &BOB, const ExecutorSymbolDef &ESD) { + return DefArgList::serialize(BOB, ESD.getAddress(), ESD.getFlags()); + } + + static bool deserialize(SPSInputBuffer &BIB, ExecutorSymbolDef &ESD) { + ExecutorAddr Addr; + JITSymbolFlags Flags; + if (!DefArgList::deserialize(BIB, Addr, Flags)) + return false; + ESD = ExecutorSymbolDef{Addr, Flags}; + return true; + } +}; + +} // End namespace __orc_rt + +#endif // ORC_RT_EXECUTOR_SYMBOL_DEF_H diff --git a/compiler-rt/lib/orc/tests/unit/executor_symbol_def_test.cpp b/compiler-rt/lib/orc/tests/unit/executor_symbol_def_test.cpp new file mode 100644 index 000000000000..181091ca1e60 --- /dev/null +++ b/compiler-rt/lib/orc/tests/unit/executor_symbol_def_test.cpp @@ -0,0 +1,19 @@ +//===-- executor_symbol_def_test.cpp --------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "executor_symbol_def.h" +#include "simple_packed_serialization_utils.h" +#include "gtest/gtest.h" + +using namespace __orc_rt; + +TEST(ExecutorSymbolDefTest, Serialization) { + blobSerializationRoundTrip<SPSExecutorSymbolDef>(ExecutorSymbolDef{}); + blobSerializationRoundTrip<SPSExecutorSymbolDef>( + ExecutorSymbolDef{ExecutorAddr{0x70}, {JITSymbolFlags::Callable, 9}}); +}
\ No newline at end of file diff --git a/compiler-rt/lib/orc/tests/unit/simple_packed_serialization_test.cpp b/compiler-rt/lib/orc/tests/unit/simple_packed_serialization_test.cpp index e7a78062b210..397114b4017e 100644 --- a/compiler-rt/lib/orc/tests/unit/simple_packed_serialization_test.cpp +++ b/compiler-rt/lib/orc/tests/unit/simple_packed_serialization_test.cpp @@ -11,6 +11,7 @@ //===----------------------------------------------------------------------===// #include "simple_packed_serialization.h" +#include "simple_packed_serialization_utils.h" #include "gtest/gtest.h" using namespace __orc_rt; @@ -48,25 +49,6 @@ TEST(SimplePackedSerializationTest, SPSInputBuffer) { EXPECT_FALSE(IB.read(&C, 1)); } -template <typename SPSTagT, typename T> -static void blobSerializationRoundTrip(const T &Value) { - using BST = SPSSerializationTraits<SPSTagT, T>; - - size_t Size = BST::size(Value); - auto Buffer = std::make_unique<char[]>(Size); - SPSOutputBuffer OB(Buffer.get(), Size); - - EXPECT_TRUE(BST::serialize(OB, Value)); - - SPSInputBuffer IB(Buffer.get(), Size); - - T DSValue; - EXPECT_TRUE(BST::deserialize(IB, DSValue)); - - EXPECT_EQ(Value, DSValue) - << "Incorrect value after serialization/deserialization round-trip"; -} - template <typename T> static void testFixedIntegralTypeSerialization() { blobSerializationRoundTrip<T, T>(0); blobSerializationRoundTrip<T, T>(static_cast<T>(1)); diff --git a/compiler-rt/lib/orc/tests/unit/simple_packed_serialization_utils.h b/compiler-rt/lib/orc/tests/unit/simple_packed_serialization_utils.h new file mode 100644 index 000000000000..746be43d250b --- /dev/null +++ b/compiler-rt/lib/orc/tests/unit/simple_packed_serialization_utils.h @@ -0,0 +1,34 @@ +//===-- simple_packed_serialization_utils.h -------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef ORC_RT_TEST_SIMPLE_PACKED_SERIALIZATION_UTILS_H +#define ORC_RT_TEST_SIMPLE_PACKED_SERIALIZATION_UTILS_H + +#include "simple_packed_serialization.h" +#include "gtest/gtest.h" + +template <typename SPSTagT, typename T> +static void blobSerializationRoundTrip(const T &Value) { + using BST = __orc_rt::SPSSerializationTraits<SPSTagT, T>; + + size_t Size = BST::size(Value); + auto Buffer = std::make_unique<char[]>(Size); + __orc_rt::SPSOutputBuffer OB(Buffer.get(), Size); + + EXPECT_TRUE(BST::serialize(OB, Value)); + + __orc_rt::SPSInputBuffer IB(Buffer.get(), Size); + + T DSValue; + EXPECT_TRUE(BST::deserialize(IB, DSValue)); + + EXPECT_EQ(Value, DSValue) + << "Incorrect value after serialization/deserialization round-trip"; +} + +#endif // ORC_RT_TEST_SIMPLE_PACKED_SERIALIZATION_UTILS_H
\ No newline at end of file diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_common.h b/compiler-rt/lib/sanitizer_common/sanitizer_common.h index 6b327a4aa16f..b99c0cffcbb1 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_common.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_common.h @@ -32,6 +32,7 @@ struct AddressInfo; struct BufferedStackTrace; struct SignalContext; struct StackTrace; +struct SymbolizedStack; // Constants. const uptr kWordSize = SANITIZER_WORDSIZE / 8; @@ -393,6 +394,8 @@ void ReportErrorSummary(const char *error_type, const AddressInfo &info, // Same as above, but obtains AddressInfo by symbolizing top stack trace frame. void ReportErrorSummary(const char *error_type, const StackTrace *trace, const char *alt_tool_name = nullptr); +// Skips frames which we consider internal and not usefull to the users. +const SymbolizedStack *SkipInternalFrames(const SymbolizedStack *frames); void ReportMmapWriteExec(int prot, int mflags); diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h b/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h index 289ae661c343..0ce4e9351bc1 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h @@ -191,7 +191,8 @@ #define SANITIZER_INTERCEPT_PREADV \ (SI_FREEBSD || SI_NETBSD || SI_LINUX_NOT_ANDROID) -#define SANITIZER_INTERCEPT_PWRITEV SI_LINUX_NOT_ANDROID +#define SANITIZER_INTERCEPT_PWRITEV \ + (SI_FREEBSD || SI_NETBSD || SI_LINUX_NOT_ANDROID) #define SANITIZER_INTERCEPT_PREADV64 SI_GLIBC #define SANITIZER_INTERCEPT_PWRITEV64 SI_GLIBC diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_stacktrace_libcdep.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_stacktrace_libcdep.cpp index 9a4c80fcfdd1..561eae9ab780 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_stacktrace_libcdep.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_stacktrace_libcdep.cpp @@ -33,13 +33,14 @@ class StackTraceTextPrinter { stack_trace_fmt)) {} bool ProcessAddressFrames(uptr pc) { - SymbolizedStack *frames = symbolize_ - ? Symbolizer::GetOrInit()->SymbolizePC(pc) - : SymbolizedStack::New(pc); + SymbolizedStackHolder symbolized_stack( + symbolize_ ? Symbolizer::GetOrInit()->SymbolizePC(pc) + : SymbolizedStack::New(pc)); + const SymbolizedStack *frames = symbolized_stack.get(); if (!frames) return false; - for (SymbolizedStack *cur = frames; cur; cur = cur->next) { + for (const SymbolizedStack *cur = frames; cur; cur = cur->next) { uptr prev_len = output_->length(); StackTracePrinter::GetOrInit()->RenderFrame( output_, stack_trace_fmt_, frame_num_++, cur->info.address, @@ -51,13 +52,12 @@ class StackTraceTextPrinter { ExtendDedupToken(cur); } - frames->ClearAll(); return true; } private: // Extend the dedup token by appending a new frame. - void ExtendDedupToken(SymbolizedStack *stack) { + void ExtendDedupToken(const SymbolizedStack *stack) { if (!dedup_token_) return; diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer.h b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer.h index 82cd9bc22791..16ef2f2fd717 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer.h @@ -64,6 +64,26 @@ struct SymbolizedStack { SymbolizedStack(); }; +class SymbolizedStackHolder { + SymbolizedStack *Stack; + + void clear() { + if (Stack) + Stack->ClearAll(); + } + + public: + explicit SymbolizedStackHolder(SymbolizedStack *Stack = nullptr) + : Stack(Stack) {} + ~SymbolizedStackHolder() { clear(); } + void reset(SymbolizedStack *S = nullptr) { + if (Stack != S) + clear(); + Stack = S; + } + const SymbolizedStack *get() const { return Stack; } +}; + // For now, DataInfo is used to describe global variable. struct DataInfo { // Owns all the string members. Storage for them is diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_report.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_report.cpp index 3e4417ae3f57..0cf250f72129 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_report.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_report.cpp @@ -28,6 +28,26 @@ namespace __sanitizer { #if !SANITIZER_GO + +static bool FrameIsInternal(const SymbolizedStack *frame) { + if (!frame) + return true; + const char *file = frame->info.file; + const char *module = frame->info.module; + if (file && (internal_strstr(file, "/compiler-rt/lib/"))) + return true; + if (module && (internal_strstr(module, "libclang_rt."))) + return true; + return false; +} + +const SymbolizedStack *SkipInternalFrames(const SymbolizedStack *frames) { + for (const SymbolizedStack *f = frames; f; f = f->next) + if (!FrameIsInternal(f)) + return f; + return nullptr; +} + void ReportErrorSummary(const char *error_type, const AddressInfo &info, const char *alt_tool_name) { if (!common_flags()->print_summary) return; @@ -82,9 +102,10 @@ void ReportErrorSummary(const char *error_type, const StackTrace *stack, // Currently, we include the first stack frame into the report summary. // Maybe sometimes we need to choose another frame (e.g. skip memcpy/etc). uptr pc = StackTrace::GetPreviousInstructionPc(stack->trace[0]); - SymbolizedStack *frame = Symbolizer::GetOrInit()->SymbolizePC(pc); + SymbolizedStackHolder symbolized_stack( + Symbolizer::GetOrInit()->SymbolizePC(pc)); + const SymbolizedStack *frame = symbolized_stack.get(); ReportErrorSummary(error_type, frame->info, alt_tool_name); - frame->ClearAll(); #endif } diff --git a/compiler-rt/lib/tsan/rtl/tsan_report.cpp b/compiler-rt/lib/tsan/rtl/tsan_report.cpp index 35cb6710a54f..22ba428cc58b 100644 --- a/compiler-rt/lib/tsan/rtl/tsan_report.cpp +++ b/compiler-rt/lib/tsan/rtl/tsan_report.cpp @@ -273,26 +273,10 @@ static ReportStack *ChooseSummaryStack(const ReportDesc *rep) { return 0; } -static bool FrameIsInternal(const SymbolizedStack *frame) { - if (frame == 0) - return false; - const char *file = frame->info.file; - const char *module = frame->info.module; - if (file != 0 && - (internal_strstr(file, "tsan_interceptors_posix.cpp") || - internal_strstr(file, "tsan_interceptors_memintrinsics.cpp") || - internal_strstr(file, "sanitizer_common_interceptors.inc") || - internal_strstr(file, "tsan_interface_"))) - return true; - if (module != 0 && (internal_strstr(module, "libclang_rt.tsan_"))) - return true; - return false; -} - -static SymbolizedStack *SkipTsanInternalFrames(SymbolizedStack *frames) { - while (FrameIsInternal(frames) && frames->next) - frames = frames->next; - return frames; +static const SymbolizedStack *SkipTsanInternalFrames(SymbolizedStack *frames) { + if (const SymbolizedStack *f = SkipInternalFrames(frames)) + return f; + return frames; // Fallback to the top frame. } void PrintReport(const ReportDesc *rep) { @@ -366,7 +350,7 @@ void PrintReport(const ReportDesc *rep) { Printf(" And %d more similar thread leaks.\n\n", rep->count - 1); if (ReportStack *stack = ChooseSummaryStack(rep)) { - if (SymbolizedStack *frame = SkipTsanInternalFrames(stack->frames)) + if (const SymbolizedStack *frame = SkipTsanInternalFrames(stack->frames)) ReportErrorSummary(rep_typ_str, frame->info); } diff --git a/compiler-rt/lib/ubsan/ubsan_diag.cpp b/compiler-rt/lib/ubsan/ubsan_diag.cpp index aac270415318..67e884e4916c 100644 --- a/compiler-rt/lib/ubsan/ubsan_diag.cpp +++ b/compiler-rt/lib/ubsan/ubsan_diag.cpp @@ -88,7 +88,7 @@ static void MaybeReportErrorSummary(Location Loc, ErrorType Type) { AI.file = internal_strdup(SLoc.getFilename()); AI.line = SLoc.getLine(); AI.column = SLoc.getColumn(); - AI.function = internal_strdup(""); // Avoid printing ?? as function name. + AI.function = nullptr; ReportErrorSummary(ErrorKind, AI, GetSanititizerToolName()); AI.Clear(); return; diff --git a/compiler-rt/lib/ubsan/ubsan_diag.h b/compiler-rt/lib/ubsan/ubsan_diag.h index b444e971b228..c836647c98f3 100644 --- a/compiler-rt/lib/ubsan/ubsan_diag.h +++ b/compiler-rt/lib/ubsan/ubsan_diag.h @@ -18,26 +18,6 @@ namespace __ubsan { -class SymbolizedStackHolder { - SymbolizedStack *Stack; - - void clear() { - if (Stack) - Stack->ClearAll(); - } - -public: - explicit SymbolizedStackHolder(SymbolizedStack *Stack = nullptr) - : Stack(Stack) {} - ~SymbolizedStackHolder() { clear(); } - void reset(SymbolizedStack *S) { - if (Stack != S) - clear(); - Stack = S; - } - const SymbolizedStack *get() const { return Stack; } -}; - SymbolizedStack *getSymbolizedLocation(uptr PC); inline SymbolizedStack *getCallerLocation(uptr CallerPC) { diff --git a/libcxx/include/__algorithm/pop_heap.h b/libcxx/include/__algorithm/pop_heap.h index a93a9875f705..798a1d09934b 100644 --- a/libcxx/include/__algorithm/pop_heap.h +++ b/libcxx/include/__algorithm/pop_heap.h @@ -36,7 +36,8 @@ __pop_heap(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare& __comp, typename iterator_traits<_RandomAccessIterator>::difference_type __len) { - _LIBCPP_ASSERT_UNCATEGORIZED(__len > 0, "The heap given to pop_heap must be non-empty"); + // Calling `pop_heap` on an empty range is undefined behavior, but in practice it will be a no-op. + _LIBCPP_ASSERT_PEDANTIC(__len > 0, "The heap given to pop_heap must be non-empty"); __comp_ref_type<_Compare> __comp_ref = __comp; diff --git a/libcxx/include/__algorithm/sift_down.h b/libcxx/include/__algorithm/sift_down.h index 7f152e4dbd7f..42803e30631f 100644 --- a/libcxx/include/__algorithm/sift_down.h +++ b/libcxx/include/__algorithm/sift_down.h @@ -85,7 +85,7 @@ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _RandomAccessIterator __floy _Compare&& __comp, typename iterator_traits<_RandomAccessIterator>::difference_type __len) { using difference_type = typename iterator_traits<_RandomAccessIterator>::difference_type; - _LIBCPP_ASSERT_UNCATEGORIZED(__len >= 2, "shouldn't be called unless __len >= 2"); + _LIBCPP_ASSERT_INTERNAL(__len >= 2, "shouldn't be called unless __len >= 2"); _RandomAccessIterator __hole = __first; _RandomAccessIterator __child_i = __first; diff --git a/libcxx/include/__algorithm/sort.h b/libcxx/include/__algorithm/sort.h index 1b878c33c7a1..ac47489af0aa 100644 --- a/libcxx/include/__algorithm/sort.h +++ b/libcxx/include/__algorithm/sort.h @@ -533,7 +533,7 @@ __bitset_partition(_RandomAccessIterator __first, _RandomAccessIterator __last, using _Ops = _IterOps<_AlgPolicy>; typedef typename std::iterator_traits<_RandomAccessIterator>::value_type value_type; typedef typename std::iterator_traits<_RandomAccessIterator>::difference_type difference_type; - _LIBCPP_ASSERT_UNCATEGORIZED(__last - __first >= difference_type(3), ""); + _LIBCPP_ASSERT_INTERNAL(__last - __first >= difference_type(3), ""); const _RandomAccessIterator __begin = __first; // used for bounds checking, those are not moved around const _RandomAccessIterator __end = __last; (void)__end; // @@ -625,7 +625,7 @@ __partition_with_equals_on_right(_RandomAccessIterator __first, _RandomAccessIte using _Ops = _IterOps<_AlgPolicy>; typedef typename iterator_traits<_RandomAccessIterator>::difference_type difference_type; typedef typename std::iterator_traits<_RandomAccessIterator>::value_type value_type; - _LIBCPP_ASSERT_UNCATEGORIZED(__last - __first >= difference_type(3), ""); + _LIBCPP_ASSERT_INTERNAL(__last - __first >= difference_type(3), ""); const _RandomAccessIterator __begin = __first; // used for bounds checking, those are not moved around const _RandomAccessIterator __end = __last; (void)__end; // diff --git a/libcxx/include/__charconv/to_chars_base_10.h b/libcxx/include/__charconv/to_chars_base_10.h index 33c512e20f04..0dee351521f9 100644 --- a/libcxx/include/__charconv/to_chars_base_10.h +++ b/libcxx/include/__charconv/to_chars_base_10.h @@ -132,14 +132,14 @@ __base_10_u64(char* __buffer, uint64_t __value) noexcept { /// range that can be used. However the range is sufficient for /// \ref __base_10_u128. _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI inline __uint128_t __pow_10(int __exp) noexcept { - _LIBCPP_ASSERT_UNCATEGORIZED(__exp >= __pow10_128_offset, "Index out of bounds"); + _LIBCPP_ASSERT_INTERNAL(__exp >= __pow10_128_offset, "Index out of bounds"); return __pow10_128[__exp - __pow10_128_offset]; } _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI inline char* __base_10_u128(char* __buffer, __uint128_t __value) noexcept { - _LIBCPP_ASSERT_UNCATEGORIZED( - __value > numeric_limits<uint64_t>::max(), "The optimizations for this algorithm fail when this isn't true."); + _LIBCPP_ASSERT_INTERNAL( + __value > numeric_limits<uint64_t>::max(), "The optimizations for this algorithm fails when this isn't true."); // Unlike the 64 to 32 bit case the 128 bit case the "upper half" can't be // stored in the "lower half". Instead we first need to handle the top most diff --git a/libcxx/include/__charconv/to_chars_integral.h b/libcxx/include/__charconv/to_chars_integral.h index f50cc55a4c6d..40fbe334d8d5 100644 --- a/libcxx/include/__charconv/to_chars_integral.h +++ b/libcxx/include/__charconv/to_chars_integral.h @@ -246,7 +246,7 @@ __to_chars_integral(char* __first, char* __last, _Tp __value) { template <typename _Tp> _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI int __to_chars_integral_width(_Tp __value, unsigned __base) { - _LIBCPP_ASSERT_UNCATEGORIZED(__value >= 0, "The function requires a non-negative value."); + _LIBCPP_ASSERT_INTERNAL(__value >= 0, "The function requires a non-negative value."); unsigned __base_2 = __base * __base; unsigned __base_3 = __base_2 * __base; diff --git a/libcxx/include/__charconv/traits.h b/libcxx/include/__charconv/traits.h index d3884b560dfd..b4907c3f7757 100644 --- a/libcxx/include/__charconv/traits.h +++ b/libcxx/include/__charconv/traits.h @@ -101,11 +101,11 @@ struct _LIBCPP_HIDDEN __traits_base<_Tp, __enable_if_t<sizeof(_Tp) == sizeof(__u /// zero is set to one. This means the first element of the lookup table is /// zero. static _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI int __width(_Tp __v) { - _LIBCPP_ASSERT_UNCATEGORIZED( + _LIBCPP_ASSERT_INTERNAL( __v > numeric_limits<uint64_t>::max(), "The optimizations for this algorithm fail when this isn't true."); // There's always a bit set in the upper 64-bits. auto __t = (128 - std::__libcpp_clz(static_cast<uint64_t>(__v >> 64))) * 1233 >> 12; - _LIBCPP_ASSERT_UNCATEGORIZED(__t >= __itoa::__pow10_128_offset, "Index out of bounds"); + _LIBCPP_ASSERT_INTERNAL(__t >= __itoa::__pow10_128_offset, "Index out of bounds"); // __t is adjusted since the lookup table misses the lower entries. return __t - (__v < __itoa::__pow10_128[__t - __itoa::__pow10_128_offset]) + 1; } diff --git a/libcxx/include/__chrono/parser_std_format_spec.h b/libcxx/include/__chrono/parser_std_format_spec.h index 296be8794ec5..785bbae198e4 100644 --- a/libcxx/include/__chrono/parser_std_format_spec.h +++ b/libcxx/include/__chrono/parser_std_format_spec.h @@ -160,10 +160,9 @@ public: private: _LIBCPP_HIDE_FROM_ABI constexpr _ConstIterator __parse_chrono_specs(_ConstIterator __begin, _ConstIterator __end, __flags __flags) { - _LIBCPP_ASSERT_UNCATEGORIZED( - __begin != __end, - "When called with an empty input the function will cause " - "undefined behavior by evaluating data not in the input"); + _LIBCPP_ASSERT_INTERNAL(__begin != __end, + "When called with an empty input the function will cause " + "undefined behavior by evaluating data not in the input"); if (*__begin != _CharT('%') && *__begin != _CharT('}')) std::__throw_format_error("The format specifier expects a '%' or a '}'"); diff --git a/libcxx/include/__config b/libcxx/include/__config index 40e6da8bc03a..082c73e672c7 100644 --- a/libcxx/include/__config +++ b/libcxx/include/__config @@ -283,6 +283,9 @@ // - `_LIBCPP_ASSERT_COMPATIBLE_ALLOCATOR` -- checks any operations that exchange nodes between containers to make sure // the containers have compatible allocators. // +// - `_LIBCPP_ASSERT_PEDANTIC` -- checks prerequisites which are imposed by the Standard, but violating which happens to +// be benign in our implementation. +// // - `_LIBCPP_ASSERT_INTERNAL` -- checks that internal invariants of the library hold. These assertions don't depend on // user input. // @@ -325,6 +328,7 @@ _LIBCPP_HARDENING_MODE_DEBUG // vulnerability. # define _LIBCPP_ASSERT_NON_OVERLAPPING_RANGES(expression, message) _LIBCPP_ASSUME(expression) # define _LIBCPP_ASSERT_COMPATIBLE_ALLOCATOR(expression, message) _LIBCPP_ASSUME(expression) +# define _LIBCPP_ASSERT_PEDANTIC(expression, message) _LIBCPP_ASSUME(expression) # define _LIBCPP_ASSERT_INTERNAL(expression, message) _LIBCPP_ASSUME(expression) # define _LIBCPP_ASSERT_UNCATEGORIZED(expression, message) _LIBCPP_ASSUME(expression) @@ -339,6 +343,7 @@ _LIBCPP_HARDENING_MODE_DEBUG # define _LIBCPP_ASSERT_NON_OVERLAPPING_RANGES(expression, message) _LIBCPP_ASSERT(expression, message) # define _LIBCPP_ASSERT_COMPATIBLE_ALLOCATOR(expression, message) _LIBCPP_ASSERT(expression, message) # define _LIBCPP_ASSERT_UNCATEGORIZED(expression, message) _LIBCPP_ASSERT(expression, message) +# define _LIBCPP_ASSERT_PEDANTIC(expression, message) _LIBCPP_ASSERT(expression, message) // Disabled checks. # define _LIBCPP_ASSERT_INTERNAL(expression, message) _LIBCPP_ASSUME(expression) @@ -352,6 +357,7 @@ _LIBCPP_HARDENING_MODE_DEBUG # define _LIBCPP_ASSERT_NON_NULL(expression, message) _LIBCPP_ASSERT(expression, message) # define _LIBCPP_ASSERT_NON_OVERLAPPING_RANGES(expression, message) _LIBCPP_ASSERT(expression, message) # define _LIBCPP_ASSERT_COMPATIBLE_ALLOCATOR(expression, message) _LIBCPP_ASSERT(expression, message) +# define _LIBCPP_ASSERT_PEDANTIC(expression, message) _LIBCPP_ASSERT(expression, message) # define _LIBCPP_ASSERT_INTERNAL(expression, message) _LIBCPP_ASSERT(expression, message) # define _LIBCPP_ASSERT_UNCATEGORIZED(expression, message) _LIBCPP_ASSERT(expression, message) @@ -365,6 +371,7 @@ _LIBCPP_HARDENING_MODE_DEBUG # define _LIBCPP_ASSERT_NON_NULL(expression, message) _LIBCPP_ASSUME(expression) # define _LIBCPP_ASSERT_NON_OVERLAPPING_RANGES(expression, message) _LIBCPP_ASSUME(expression) # define _LIBCPP_ASSERT_COMPATIBLE_ALLOCATOR(expression, message) _LIBCPP_ASSUME(expression) +# define _LIBCPP_ASSERT_PEDANTIC(expression, message) _LIBCPP_ASSUME(expression) # define _LIBCPP_ASSERT_INTERNAL(expression, message) _LIBCPP_ASSUME(expression) # define _LIBCPP_ASSERT_UNCATEGORIZED(expression, message) _LIBCPP_ASSUME(expression) diff --git a/libcxx/include/__filesystem/directory_iterator.h b/libcxx/include/__filesystem/directory_iterator.h index 29bd8da6caa4..5287a4d8b055 100644 --- a/libcxx/include/__filesystem/directory_iterator.h +++ b/libcxx/include/__filesystem/directory_iterator.h @@ -73,7 +73,8 @@ public: _LIBCPP_HIDE_FROM_ABI ~directory_iterator() = default; _LIBCPP_HIDE_FROM_ABI const directory_entry& operator*() const { - _LIBCPP_ASSERT_UNCATEGORIZED(__imp_, "The end iterator cannot be dereferenced"); + // Note: this check duplicates a check in `__dereference()`. + _LIBCPP_ASSERT_NON_NULL(__imp_, "The end iterator cannot be dereferenced"); return __dereference(); } diff --git a/libcxx/include/__filesystem/path_iterator.h b/libcxx/include/__filesystem/path_iterator.h index 1a9aaf0e7d99..d2d65cd122ca 100644 --- a/libcxx/include/__filesystem/path_iterator.h +++ b/libcxx/include/__filesystem/path_iterator.h @@ -61,7 +61,7 @@ public: _LIBCPP_HIDE_FROM_ABI pointer operator->() const { return &__stashed_elem_; } _LIBCPP_HIDE_FROM_ABI iterator& operator++() { - _LIBCPP_ASSERT_UNCATEGORIZED(__state_ != _Singular, "attempting to increment a singular iterator"); + _LIBCPP_ASSERT_NON_NULL(__state_ != _Singular, "attempting to increment a singular iterator"); _LIBCPP_ASSERT_UNCATEGORIZED(__state_ != _AtEnd, "attempting to increment the end iterator"); return __increment(); } @@ -73,7 +73,7 @@ public: } _LIBCPP_HIDE_FROM_ABI iterator& operator--() { - _LIBCPP_ASSERT_UNCATEGORIZED(__state_ != _Singular, "attempting to decrement a singular iterator"); + _LIBCPP_ASSERT_NON_NULL(__state_ != _Singular, "attempting to decrement a singular iterator"); _LIBCPP_ASSERT_UNCATEGORIZED( __entry_.data() != __path_ptr_->native().data(), "attempting to decrement the begin iterator"); return __decrement(); diff --git a/libcxx/include/__format/buffer.h b/libcxx/include/__format/buffer.h index 7ee583d81394..8598f0a1c039 100644 --- a/libcxx/include/__format/buffer.h +++ b/libcxx/include/__format/buffer.h @@ -115,7 +115,7 @@ public: // The output doesn't fit in the internal buffer. // Copy the data in "__capacity_" sized chunks. - _LIBCPP_ASSERT_UNCATEGORIZED(__size_ == 0, "the buffer should be flushed by __flush_on_overflow"); + _LIBCPP_ASSERT_INTERNAL(__size_ == 0, "the buffer should be flushed by __flush_on_overflow"); const _InCharT* __first = __str.data(); do { size_t __chunk = std::min(__n, __capacity_); @@ -134,7 +134,7 @@ public: class _UnaryOperation, __fmt_char_type _InCharT = typename iterator_traits<_Iterator>::value_type> _LIBCPP_HIDE_FROM_ABI void __transform(_Iterator __first, _Iterator __last, _UnaryOperation __operation) { - _LIBCPP_ASSERT_UNCATEGORIZED(__first <= __last, "not a valid range"); + _LIBCPP_ASSERT_INTERNAL(__first <= __last, "not a valid range"); size_t __n = static_cast<size_t>(__last - __first); __flush_on_overflow(__n); @@ -146,7 +146,7 @@ public: // The output doesn't fit in the internal buffer. // Transform the data in "__capacity_" sized chunks. - _LIBCPP_ASSERT_UNCATEGORIZED(__size_ == 0, "the buffer should be flushed by __flush_on_overflow"); + _LIBCPP_ASSERT_INTERNAL(__size_ == 0, "the buffer should be flushed by __flush_on_overflow"); do { size_t __chunk = std::min(__n, __capacity_); std::transform(__first, __first + __chunk, std::addressof(__ptr_[__size_]), __operation); @@ -168,7 +168,7 @@ public: // The output doesn't fit in the internal buffer. // Fill the buffer in "__capacity_" sized chunks. - _LIBCPP_ASSERT_UNCATEGORIZED(__size_ == 0, "the buffer should be flushed by __flush_on_overflow"); + _LIBCPP_ASSERT_INTERNAL(__size_ == 0, "the buffer should be flushed by __flush_on_overflow"); do { size_t __chunk = std::min(__n, __capacity_); std::fill_n(std::addressof(__ptr_[__size_]), __chunk, __value); @@ -596,7 +596,7 @@ public: class _UnaryOperation, __fmt_char_type _InCharT = typename iterator_traits<_Iterator>::value_type> _LIBCPP_HIDE_FROM_ABI void __transform(_Iterator __first, _Iterator __last, _UnaryOperation __operation) { - _LIBCPP_ASSERT_UNCATEGORIZED(__first <= __last, "not a valid range"); + _LIBCPP_ASSERT_INTERNAL(__first <= __last, "not a valid range"); size_t __n = static_cast<size_t>(__last - __first); if (__size_ + __n >= __capacity_) @@ -623,7 +623,7 @@ private: _LIBCPP_HIDE_FROM_ABI void __grow_buffer() { __grow_buffer(__capacity_ * 1.6); } _LIBCPP_HIDE_FROM_ABI void __grow_buffer(size_t __capacity) { - _LIBCPP_ASSERT_UNCATEGORIZED(__capacity > __capacity_, "the buffer must grow"); + _LIBCPP_ASSERT_INTERNAL(__capacity > __capacity_, "the buffer must grow"); auto __result = std::__allocate_at_least(__alloc_, __capacity); auto __guard = std::__make_exception_guard([&] { allocator_traits<_Alloc>::deallocate(__alloc_, __result.ptr, __result.count); diff --git a/libcxx/include/__format/format_arg.h b/libcxx/include/__format/format_arg.h index 280c91082417..10fca15d5a7a 100644 --- a/libcxx/include/__format/format_arg.h +++ b/libcxx/include/__format/format_arg.h @@ -83,7 +83,7 @@ _LIBCPP_HIDE_FROM_ABI constexpr bool __use_packed_format_arg_store(size_t __size } _LIBCPP_HIDE_FROM_ABI constexpr __arg_t __get_packed_type(uint64_t __types, size_t __id) { - _LIBCPP_ASSERT_UNCATEGORIZED(__id <= __packed_types_max, ""); + _LIBCPP_ASSERT_INTERNAL(__id <= __packed_types_max, ""); if (__id > 0) __types >>= __id * __packed_arg_t_bits; diff --git a/libcxx/include/__format/format_arg_store.h b/libcxx/include/__format/format_arg_store.h index c481992d2d71..066cd369eb89 100644 --- a/libcxx/include/__format/format_arg_store.h +++ b/libcxx/include/__format/format_arg_store.h @@ -228,15 +228,15 @@ _LIBCPP_HIDE_FROM_ABI void __store_basic_format_arg(basic_format_arg<_Context>* ([&] { *__data++ = __format::__create_format_arg<_Context>(__args); }(), ...); } -template <class _Context, size_t N> +template <class _Context, size_t _Np> struct __packed_format_arg_store { - __basic_format_arg_value<_Context> __values_[N]; + __basic_format_arg_value<_Context> __values_[_Np]; uint64_t __types_ = 0; }; -template <class _Context, size_t N> +template <class _Context, size_t _Np> struct __unpacked_format_arg_store { - basic_format_arg<_Context> __args_[N]; + basic_format_arg<_Context> __args_[_Np]; }; } // namespace __format diff --git a/libcxx/include/__format/formatter_bool.h b/libcxx/include/__format/formatter_bool.h index 3c8ae95f55fa..1c479501b675 100644 --- a/libcxx/include/__format/formatter_bool.h +++ b/libcxx/include/__format/formatter_bool.h @@ -62,7 +62,7 @@ public: static_cast<unsigned>(__value), __ctx, __parser_.__get_parsed_std_specifications(__ctx)); default: - _LIBCPP_ASSERT_UNCATEGORIZED(false, "The parse function should have validated the type"); + _LIBCPP_ASSERT_INTERNAL(false, "The parse function should have validated the type"); __libcpp_unreachable(); } } diff --git a/libcxx/include/__format/formatter_floating_point.h b/libcxx/include/__format/formatter_floating_point.h index 33cc2a4ed661..6802a8b7bd4c 100644 --- a/libcxx/include/__format/formatter_floating_point.h +++ b/libcxx/include/__format/formatter_floating_point.h @@ -57,21 +57,21 @@ namespace __formatter { template <floating_point _Tp> _LIBCPP_HIDE_FROM_ABI char* __to_buffer(char* __first, char* __last, _Tp __value) { to_chars_result __r = std::to_chars(__first, __last, __value); - _LIBCPP_ASSERT_UNCATEGORIZED(__r.ec == errc(0), "Internal buffer too small"); + _LIBCPP_ASSERT_INTERNAL(__r.ec == errc(0), "Internal buffer too small"); return __r.ptr; } template <floating_point _Tp> _LIBCPP_HIDE_FROM_ABI char* __to_buffer(char* __first, char* __last, _Tp __value, chars_format __fmt) { to_chars_result __r = std::to_chars(__first, __last, __value, __fmt); - _LIBCPP_ASSERT_UNCATEGORIZED(__r.ec == errc(0), "Internal buffer too small"); + _LIBCPP_ASSERT_INTERNAL(__r.ec == errc(0), "Internal buffer too small"); return __r.ptr; } template <floating_point _Tp> _LIBCPP_HIDE_FROM_ABI char* __to_buffer(char* __first, char* __last, _Tp __value, chars_format __fmt, int __precision) { to_chars_result __r = std::to_chars(__first, __last, __value, __fmt, __precision); - _LIBCPP_ASSERT_UNCATEGORIZED(__r.ec == errc(0), "Internal buffer too small"); + _LIBCPP_ASSERT_INTERNAL(__r.ec == errc(0), "Internal buffer too small"); return __r.ptr; } @@ -252,10 +252,10 @@ __format_buffer_default(const __float_buffer<_Fp>& __buffer, _Tp __value, char* __result.__radix_point = __result.__last; // clang-format off - _LIBCPP_ASSERT_UNCATEGORIZED((__result.__integral != __result.__last) && - (__result.__radix_point == __result.__last || *__result.__radix_point == '.') && - (__result.__exponent == __result.__last || *__result.__exponent == 'e'), - "Post-condition failure."); + _LIBCPP_ASSERT_INTERNAL((__result.__integral != __result.__last) && + (__result.__radix_point == __result.__last || *__result.__radix_point == '.') && + (__result.__exponent == __result.__last || *__result.__exponent == 'e'), + "Post-condition failure."); // clang-format on return __result; @@ -304,10 +304,10 @@ _LIBCPP_HIDE_FROM_ABI __float_result __format_buffer_hexadecimal_lower_case( } // clang-format off - _LIBCPP_ASSERT_UNCATEGORIZED((__result.__integral != __result.__last) && - (__result.__radix_point == __result.__last || *__result.__radix_point == '.') && - (__result.__exponent != __result.__last && *__result.__exponent == 'p'), - "Post-condition failure."); + _LIBCPP_ASSERT_INTERNAL((__result.__integral != __result.__last) && + (__result.__radix_point == __result.__last || *__result.__radix_point == '.') && + (__result.__exponent != __result.__last && *__result.__exponent == 'p'), + "Post-condition failure."); // clang-format on return __result; @@ -332,7 +332,7 @@ _LIBCPP_HIDE_FROM_ABI __float_result __format_buffer_scientific_lower_case( __formatter::__to_buffer(__integral, __buffer.end(), __value, chars_format::scientific, __precision); char* __first = __integral + 1; - _LIBCPP_ASSERT_UNCATEGORIZED(__first != __result.__last, "No exponent present"); + _LIBCPP_ASSERT_INTERNAL(__first != __result.__last, "No exponent present"); if (*__first == '.') { __result.__radix_point = __first; __result.__exponent = __formatter::__find_exponent(__first + 1, __result.__last); @@ -342,10 +342,10 @@ _LIBCPP_HIDE_FROM_ABI __float_result __format_buffer_scientific_lower_case( } // clang-format off - _LIBCPP_ASSERT_UNCATEGORIZED((__result.__integral != __result.__last) && - (__result.__radix_point == __result.__last || *__result.__radix_point == '.') && - (__result.__exponent != __result.__last && *__result.__exponent == 'e'), - "Post-condition failure."); + _LIBCPP_ASSERT_INTERNAL((__result.__integral != __result.__last) && + (__result.__radix_point == __result.__last || *__result.__radix_point == '.') && + (__result.__exponent != __result.__last && *__result.__exponent == 'e'), + "Post-condition failure."); // clang-format on return __result; } @@ -374,10 +374,10 @@ __format_buffer_fixed(const __float_buffer<_Fp>& __buffer, _Tp __value, int __pr __result.__exponent = __result.__last; // clang-format off - _LIBCPP_ASSERT_UNCATEGORIZED((__result.__integral != __result.__last) && - (__result.__radix_point == __result.__last || *__result.__radix_point == '.') && - (__result.__exponent == __result.__last), - "Post-condition failure."); + _LIBCPP_ASSERT_INTERNAL((__result.__integral != __result.__last) && + (__result.__radix_point == __result.__last || *__result.__radix_point == '.') && + (__result.__exponent == __result.__last), + "Post-condition failure."); // clang-format on return __result; } @@ -410,10 +410,10 @@ __format_buffer_general_lower_case(__float_buffer<_Fp>& __buffer, _Tp __value, i } // clang-format off - _LIBCPP_ASSERT_UNCATEGORIZED((__result.__integral != __result.__last) && - (__result.__radix_point == __result.__last || *__result.__radix_point == '.') && - (__result.__exponent == __result.__last || *__result.__exponent == 'e'), - "Post-condition failure."); + _LIBCPP_ASSERT_INTERNAL((__result.__integral != __result.__last) && + (__result.__radix_point == __result.__last || *__result.__radix_point == '.') && + (__result.__exponent == __result.__last || *__result.__exponent == 'e'), + "Post-condition failure."); // clang-format on return __result; @@ -485,7 +485,7 @@ _LIBCPP_HIDE_FROM_ABI __float_result __format_buffer( return __formatter::__format_buffer_general_upper_case(__buffer, __value, __buffer.__precision(), __first); default: - _LIBCPP_ASSERT_UNCATEGORIZED(false, "The parser should have validated the type"); + _LIBCPP_ASSERT_INTERNAL(false, "The parser should have validated the type"); __libcpp_unreachable(); } } @@ -620,9 +620,8 @@ _LIBCPP_HIDE_FROM_ABI auto __write_using_trailing_zeros( size_t __size, const _CharT* __exponent, size_t __num_trailing_zeros) -> decltype(__out_it) { - _LIBCPP_ASSERT_UNCATEGORIZED(__first <= __last, "Not a valid range"); - _LIBCPP_ASSERT_UNCATEGORIZED( - __num_trailing_zeros > 0, "The overload not writing trailing zeros should have been used"); + _LIBCPP_ASSERT_INTERNAL(__first <= __last, "Not a valid range"); + _LIBCPP_ASSERT_INTERNAL(__num_trailing_zeros > 0, "The overload not writing trailing zeros should have been used"); __padding_size_result __padding = __formatter::__padding_size(__size + __num_trailing_zeros, __specs.__width_, __specs.__alignment_); diff --git a/libcxx/include/__format/formatter_integral.h b/libcxx/include/__format/formatter_integral.h index ca66e26ede10..e0217a240027 100644 --- a/libcxx/include/__format/formatter_integral.h +++ b/libcxx/include/__format/formatter_integral.h @@ -90,10 +90,8 @@ _LIBCPP_HIDE_FROM_ABI inline _Iterator __insert_sign(_Iterator __buf, bool __neg * regardless whether the @c std::numpunct's type is @c char or @c wchar_t. */ _LIBCPP_HIDE_FROM_ABI inline string __determine_grouping(ptrdiff_t __size, const string& __grouping) { - _LIBCPP_ASSERT_UNCATEGORIZED( - !__grouping.empty() && __size > __grouping[0], - "The slow grouping formatting is used while there will be no " - "separators written"); + _LIBCPP_ASSERT_INTERNAL(!__grouping.empty() && __size > __grouping[0], + "The slow grouping formatting is used while there will be no separators written"); string __r; auto __end = __grouping.end() - 1; auto __ptr = __grouping.begin(); @@ -161,7 +159,7 @@ _LIBCPP_HIDE_FROM_ABI _Iterator __to_buffer(_Iterator __first, _Iterator __last, // TODO FMT Evaluate code overhead due to not calling the internal function // directly. (Should be zero overhead.) to_chars_result __r = std::to_chars(std::to_address(__first), std::to_address(__last), __value, __base); - _LIBCPP_ASSERT_UNCATEGORIZED(__r.ec == errc(0), "Internal buffer too small"); + _LIBCPP_ASSERT_INTERNAL(__r.ec == errc(0), "Internal buffer too small"); auto __diff = __r.ptr - std::to_address(__first); return __first + __diff; } @@ -248,10 +246,8 @@ _LIBCPP_HIDE_FROM_ABI _OutIt __write_using_decimal_separators( auto __r = __grouping.rbegin(); auto __e = __grouping.rend() - 1; - _LIBCPP_ASSERT_UNCATEGORIZED( - __r != __e, - "The slow grouping formatting is used while " - "there will be no separators written."); + _LIBCPP_ASSERT_INTERNAL( + __r != __e, "The slow grouping formatting is used while there will be no separators written."); // The output is divided in small groups of numbers to write: // - A group before the first separator. // - A separator and a group, repeated for the number of separators. @@ -380,7 +376,7 @@ __format_integer(_Tp __value, return __formatter::__format_integer(__value, __ctx, __specs, __negative, __array.begin(), __array.end(), "0X", 16); } default: - _LIBCPP_ASSERT_UNCATEGORIZED(false, "The parse function should have validated the type"); + _LIBCPP_ASSERT_INTERNAL(false, "The parse function should have validated the type"); __libcpp_unreachable(); } } diff --git a/libcxx/include/__format/formatter_output.h b/libcxx/include/__format/formatter_output.h index 31e06425703a..eebe880d69ef 100644 --- a/libcxx/include/__format/formatter_output.h +++ b/libcxx/include/__format/formatter_output.h @@ -66,8 +66,8 @@ struct _LIBCPP_EXPORTED_FROM_ABI __padding_size_result { _LIBCPP_HIDE_FROM_ABI constexpr __padding_size_result __padding_size(size_t __size, size_t __width, __format_spec::__alignment __align) { - _LIBCPP_ASSERT_UNCATEGORIZED(__width > __size, "don't call this function when no padding is required"); - _LIBCPP_ASSERT_UNCATEGORIZED( + _LIBCPP_ASSERT_INTERNAL(__width > __size, "don't call this function when no padding is required"); + _LIBCPP_ASSERT_INTERNAL( __align != __format_spec::__alignment::__zero_padding, "the caller should have handled the zero-padding"); size_t __fill = __width - __size; @@ -296,7 +296,7 @@ _LIBCPP_HIDE_FROM_ABI auto __write_string_no_precision( basic_string_view<_CharT> __str, output_iterator<const _CharT&> auto __out_it, __format_spec::__parsed_specifications<_CharT> __specs) -> decltype(__out_it) { - _LIBCPP_ASSERT_UNCATEGORIZED(!__specs.__has_precision(), "use __write_string"); + _LIBCPP_ASSERT_INTERNAL(!__specs.__has_precision(), "use __write_string"); // No padding -> copy the string if (!__specs.__has_width()) diff --git a/libcxx/include/__format/formatter_string.h b/libcxx/include/__format/formatter_string.h index 4ba5617a49c8..d1ccfb9b5f7d 100644 --- a/libcxx/include/__format/formatter_string.h +++ b/libcxx/include/__format/formatter_string.h @@ -64,10 +64,7 @@ struct _LIBCPP_TEMPLATE_VIS formatter<const _CharT*, _CharT> : public __formatte template <class _FormatContext> _LIBCPP_HIDE_FROM_ABI typename _FormatContext::iterator format(const _CharT* __str, _FormatContext& __ctx) const { - _LIBCPP_ASSERT_UNCATEGORIZED( - __str, - "The basic_format_arg constructor should have " - "prevented an invalid pointer."); + _LIBCPP_ASSERT_INTERNAL(__str, "The basic_format_arg constructor should have prevented an invalid pointer."); __format_spec::__parsed_specifications<_CharT> __specs = _Base::__parser_.__get_parsed_std_specifications(__ctx); # if _LIBCPP_STD_VER >= 23 diff --git a/libcxx/include/__format/parser_std_format_spec.h b/libcxx/include/__format/parser_std_format_spec.h index e38729db965c..cf8af87b2128 100644 --- a/libcxx/include/__format/parser_std_format_spec.h +++ b/libcxx/include/__format/parser_std_format_spec.h @@ -733,10 +733,9 @@ private: __format::__parse_number_result __r = __format::__parse_number(__begin, __end); __width_ = __r.__value; - _LIBCPP_ASSERT_UNCATEGORIZED( - __width_ != 0, - "A zero value isn't allowed and should be impossible, " - "due to validations in this function"); + _LIBCPP_ASSERT_INTERNAL(__width_ != 0, + "A zero value isn't allowed and should be impossible, " + "due to validations in this function"); __begin = __r.__last; return true; } diff --git a/libcxx/include/__format/range_formatter.h b/libcxx/include/__format/range_formatter.h index d13278009fcf..691563074349 100644 --- a/libcxx/include/__format/range_formatter.h +++ b/libcxx/include/__format/range_formatter.h @@ -246,9 +246,8 @@ private: __parse_empty_range_underlying_spec(_ParseContext& __ctx, typename _ParseContext::iterator __begin) { __ctx.advance_to(__begin); [[maybe_unused]] typename _ParseContext::iterator __result = __underlying_.parse(__ctx); - _LIBCPP_ASSERT_UNCATEGORIZED( - __result == __begin, - "the underlying's parse function should not advance the input beyond the end of the input"); + _LIBCPP_ASSERT_INTERNAL(__result == __begin, + "the underlying's parse function should not advance the input beyond the end of the input"); return __begin; } diff --git a/libcxx/include/__format/unicode.h b/libcxx/include/__format/unicode.h index 8e1e7bb192a0..40067ca3448b 100644 --- a/libcxx/include/__format/unicode.h +++ b/libcxx/include/__format/unicode.h @@ -153,7 +153,7 @@ public: // - The parser always needs to consume these code units // - The code is optimized for well-formed UTF-8 [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr __consume_result __consume() noexcept { - _LIBCPP_ASSERT_UNCATEGORIZED(__first_ != __last_, "can't move beyond the end of input"); + _LIBCPP_ASSERT_INTERNAL(__first_ != __last_, "can't move beyond the end of input"); // Based on the number of leading 1 bits the number of code units in the // code point can be determined. See @@ -259,7 +259,7 @@ public: _LIBCPP_HIDE_FROM_ABI constexpr bool __at_end() const noexcept { return __first_ == __last_; } [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr __consume_result __consume() noexcept { - _LIBCPP_ASSERT_UNCATEGORIZED(__first_ != __last_, "can't move beyond the end of input"); + _LIBCPP_ASSERT_INTERNAL(__first_ != __last_, "can't move beyond the end of input"); char32_t __value = static_cast<char32_t>(*__first_++); if constexpr (sizeof(wchar_t) == 2) { @@ -305,8 +305,8 @@ _LIBCPP_HIDE_FROM_ABI constexpr bool __at_extended_grapheme_cluster_break( // *** Break at the start and end of text, unless the text is empty. *** - _LIBCPP_ASSERT_UNCATEGORIZED(__prev != __property::__sot, "should be handled in the constructor"); // GB1 - _LIBCPP_ASSERT_UNCATEGORIZED(__prev != __property::__eot, "should be handled by our caller"); // GB2 + _LIBCPP_ASSERT_INTERNAL(__prev != __property::__sot, "should be handled in the constructor"); // GB1 + _LIBCPP_ASSERT_INTERNAL(__prev != __property::__eot, "should be handled by our caller"); // GB2 // *** Do not break between a CR and LF. Otherwise, break before and after controls. *** if (__prev == __property::__CR && __next == __property::__LF) // GB3 @@ -401,8 +401,8 @@ public: }; _LIBCPP_HIDE_FROM_ABI constexpr __cluster __consume() { - _LIBCPP_ASSERT_UNCATEGORIZED(__next_prop_ != __extended_grapheme_custer_property_boundary::__property::__eot, - "can't move beyond the end of input"); + _LIBCPP_ASSERT_INTERNAL(__next_prop_ != __extended_grapheme_custer_property_boundary::__property::__eot, + "can't move beyond the end of input"); char32_t __code_point = __next_code_point_; if (!__code_point_view_.__at_end()) @@ -459,7 +459,7 @@ public: _LIBCPP_HIDE_FROM_ABI constexpr _Iterator __position() const noexcept { return __first_; } [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr __consume_result __consume() noexcept { - _LIBCPP_ASSERT_UNCATEGORIZED(__first_ != __last_, "can't move beyond the end of input"); + _LIBCPP_ASSERT_INTERNAL(__first_ != __last_, "can't move beyond the end of input"); return {static_cast<char32_t>(*__first_++)}; } diff --git a/libcxx/include/__format/write_escaped.h b/libcxx/include/__format/write_escaped.h index 15141eebc029..ec1283a173e9 100644 --- a/libcxx/include/__format/write_escaped.h +++ b/libcxx/include/__format/write_escaped.h @@ -71,7 +71,7 @@ __write_escaped_code_unit(basic_string<_CharT>& __str, char32_t __value, const _ char __buffer[8]; to_chars_result __r = std::to_chars(std::begin(__buffer), std::end(__buffer), __value, 16); - _LIBCPP_ASSERT_UNCATEGORIZED(__r.ec == errc(0), "Internal buffer too small"); + _LIBCPP_ASSERT_INTERNAL(__r.ec == errc(0), "Internal buffer too small"); std::ranges::copy(std::begin(__buffer), __r.ptr, __out_it); __str += _CharT('}'); diff --git a/libcxx/include/__hash_table b/libcxx/include/__hash_table index 3cee48ef8538..4ca49fe42606 100644 --- a/libcxx/include/__hash_table +++ b/libcxx/include/__hash_table @@ -915,7 +915,10 @@ public: return __bc != 0 ? (float)size() / __bc : 0.f; } _LIBCPP_HIDE_FROM_ABI void max_load_factor(float __mlf) _NOEXCEPT { - _LIBCPP_ASSERT_UNCATEGORIZED(__mlf > 0, "unordered container::max_load_factor(lf) called with lf <= 0"); + // While passing a non-positive load factor is undefined behavior, in practice the result will be benign (the + // call will be equivalent to `max_load_factor(load_factor())`, which is also the case for passing a valid value + // less than the current `load_factor`). + _LIBCPP_ASSERT_PEDANTIC(__mlf > 0, "unordered container::max_load_factor(lf) called with lf <= 0"); max_load_factor() = std::max(__mlf, load_factor()); } diff --git a/libcxx/include/__iterator/advance.h b/libcxx/include/__iterator/advance.h index 64c8d249f78f..73473f899eac 100644 --- a/libcxx/include/__iterator/advance.h +++ b/libcxx/include/__iterator/advance.h @@ -65,8 +65,9 @@ template < class _InputIter, _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 void advance(_InputIter& __i, _Distance __orig_n) { typedef typename iterator_traits<_InputIter>::difference_type _Difference; _Difference __n = static_cast<_Difference>(std::__convert_to_integral(__orig_n)); - _LIBCPP_ASSERT_UNCATEGORIZED(__n >= 0 || __has_bidirectional_iterator_category<_InputIter>::value, - "Attempt to advance(it, n) with negative n on a non-bidirectional iterator"); + // Calling `advance` with a negative value on a non-bidirectional iterator is a no-op in the current implementation. + _LIBCPP_ASSERT_PEDANTIC(__n >= 0 || __has_bidirectional_iterator_category<_InputIter>::value, + "Attempt to advance(it, n) with negative n on a non-bidirectional iterator"); std::__advance(__i, __n, typename iterator_traits<_InputIter>::iterator_category()); } @@ -99,7 +100,8 @@ public: // Preconditions: If `I` does not model `bidirectional_iterator`, `n` is not negative. template <input_or_output_iterator _Ip> _LIBCPP_HIDE_FROM_ABI constexpr void operator()(_Ip& __i, iter_difference_t<_Ip> __n) const { - _LIBCPP_ASSERT_UNCATEGORIZED( + // Calling `advance` with a negative value on a non-bidirectional iterator is a no-op in the current implementation. + _LIBCPP_ASSERT_PEDANTIC( __n >= 0 || bidirectional_iterator<_Ip>, "If `n < 0`, then `bidirectional_iterator<I>` must be true."); // If `I` models `random_access_iterator`, equivalent to `i += n`. @@ -149,8 +151,9 @@ public: template <input_or_output_iterator _Ip, sentinel_for<_Ip> _Sp> _LIBCPP_HIDE_FROM_ABI constexpr iter_difference_t<_Ip> operator()(_Ip& __i, iter_difference_t<_Ip> __n, _Sp __bound_sentinel) const { - _LIBCPP_ASSERT_UNCATEGORIZED((__n >= 0) || (bidirectional_iterator<_Ip> && same_as<_Ip, _Sp>), - "If `n < 0`, then `bidirectional_iterator<I> && same_as<I, S>` must be true."); + // Calling `advance` with a negative value on a non-bidirectional iterator is a no-op in the current implementation. + _LIBCPP_ASSERT_PEDANTIC((__n >= 0) || (bidirectional_iterator<_Ip> && same_as<_Ip, _Sp>), + "If `n < 0`, then `bidirectional_iterator<I> && same_as<I, S>` must be true."); // If `S` and `I` model `sized_sentinel_for<S, I>`: if constexpr (sized_sentinel_for<_Sp, _Ip>) { // If |n| >= |bound_sentinel - i|, equivalent to `ranges::advance(i, bound_sentinel)`. diff --git a/libcxx/include/__iterator/next.h b/libcxx/include/__iterator/next.h index da60aacfd08d..21d3688ad9eb 100644 --- a/libcxx/include/__iterator/next.h +++ b/libcxx/include/__iterator/next.h @@ -27,8 +27,10 @@ _LIBCPP_BEGIN_NAMESPACE_STD template <class _InputIter, __enable_if_t<__has_input_iterator_category<_InputIter>::value, int> = 0> inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 _InputIter next(_InputIter __x, typename iterator_traits<_InputIter>::difference_type __n = 1) { - _LIBCPP_ASSERT_UNCATEGORIZED(__n >= 0 || __has_bidirectional_iterator_category<_InputIter>::value, - "Attempt to next(it, n) with negative n on a non-bidirectional iterator"); + // Calling `advance` with a negative value on a non-bidirectional iterator is a no-op in the current implementation. + // Note that this check duplicates the similar check in `std::advance`. + _LIBCPP_ASSERT_PEDANTIC(__n >= 0 || __has_bidirectional_iterator_category<_InputIter>::value, + "Attempt to next(it, n) with negative n on a non-bidirectional iterator"); std::advance(__x, __n); return __x; diff --git a/libcxx/include/__iterator/prev.h b/libcxx/include/__iterator/prev.h index 1651942acea9..2f0e6a088edb 100644 --- a/libcxx/include/__iterator/prev.h +++ b/libcxx/include/__iterator/prev.h @@ -27,8 +27,10 @@ _LIBCPP_BEGIN_NAMESPACE_STD template <class _InputIter, __enable_if_t<__has_input_iterator_category<_InputIter>::value, int> = 0> inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 _InputIter prev(_InputIter __x, typename iterator_traits<_InputIter>::difference_type __n = 1) { - _LIBCPP_ASSERT_UNCATEGORIZED(__n <= 0 || __has_bidirectional_iterator_category<_InputIter>::value, - "Attempt to prev(it, n) with a positive n on a non-bidirectional iterator"); + // Calling `advance` with a negative value on a non-bidirectional iterator is a no-op in the current implementation. + // Note that this check duplicates the similar check in `std::advance`. + _LIBCPP_ASSERT_PEDANTIC(__n <= 0 || __has_bidirectional_iterator_category<_InputIter>::value, + "Attempt to prev(it, n) with a positive n on a non-bidirectional iterator"); std::advance(__x, -__n); return __x; } diff --git a/libcxx/include/__random/negative_binomial_distribution.h b/libcxx/include/__random/negative_binomial_distribution.h index 580c74d46440..eed4f511e871 100644 --- a/libcxx/include/__random/negative_binomial_distribution.h +++ b/libcxx/include/__random/negative_binomial_distribution.h @@ -113,10 +113,9 @@ _IntType negative_binomial_distribution<_IntType>::operator()(_URNG& __urng, con else ++__f; } - _LIBCPP_ASSERT_UNCATEGORIZED( - __f >= 0, - "std::negative_binomial_distribution should never produce negative values. " - "This is almost certainly a signed integer overflow issue on __f."); + _LIBCPP_ASSERT_INTERNAL(__f >= 0, + "std::negative_binomial_distribution should never produce negative values. " + "This is almost certainly a signed integer overflow issue on __f."); return __f; } return poisson_distribution<result_type>(gamma_distribution<double>(__k, (1 - __p) / __p)(__urng))(__urng); diff --git a/libcxx/include/__ranges/chunk_by_view.h b/libcxx/include/__ranges/chunk_by_view.h index 3ecc018cac9d..c5b3240a7d0b 100644 --- a/libcxx/include/__ranges/chunk_by_view.h +++ b/libcxx/include/__ranges/chunk_by_view.h @@ -66,7 +66,8 @@ class _LIBCPP_ABI_2023_OVERLAPPING_SUBOBJECT_FIX_TAG chunk_by_view class __iterator; _LIBCPP_HIDE_FROM_ABI constexpr iterator_t<_View> __find_next(iterator_t<_View> __current) { - _LIBCPP_ASSERT_UNCATEGORIZED( + // Note: this duplicates a check in `optional` but provides a better error message. + _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS( __pred_.__has_value(), "Trying to call __find_next() on a chunk_by_view that does not have a valid predicate."); auto __reversed_pred = [this]<class _Tp, class _Up>(_Tp&& __x, _Up&& __y) -> bool { return !std::invoke(*__pred_, std::forward<_Tp>(__x), std::forward<_Up>(__y)); @@ -78,9 +79,10 @@ class _LIBCPP_ABI_2023_OVERLAPPING_SUBOBJECT_FIX_TAG chunk_by_view _LIBCPP_HIDE_FROM_ABI constexpr iterator_t<_View> __find_prev(iterator_t<_View> __current) requires bidirectional_range<_View> { - _LIBCPP_ASSERT_UNCATEGORIZED( - __current != ranges::begin(__base_), "Trying to call __find_prev() on a begin iterator."); - _LIBCPP_ASSERT_UNCATEGORIZED( + // Attempting to decrement a begin iterator is a no-op (`__find_prev` would return the same argument given to it). + _LIBCPP_ASSERT_PEDANTIC(__current != ranges::begin(__base_), "Trying to call __find_prev() on a begin iterator."); + // Note: this duplicates a check in `optional` but provides a better error message. + _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS( __pred_.__has_value(), "Trying to call __find_prev() on a chunk_by_view that does not have a valid predicate."); auto __first = ranges::begin(__base_); @@ -110,7 +112,8 @@ public: _LIBCPP_HIDE_FROM_ABI constexpr const _Pred& pred() const { return *__pred_; } _LIBCPP_HIDE_FROM_ABI constexpr __iterator begin() { - _LIBCPP_ASSERT_UNCATEGORIZED( + // Note: this duplicates a check in `optional` but provides a better error message. + _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS( __pred_.__has_value(), "Trying to call begin() on a chunk_by_view that does not have a valid predicate."); auto __first = ranges::begin(__base_); @@ -154,12 +157,15 @@ public: _LIBCPP_HIDE_FROM_ABI __iterator() = default; _LIBCPP_HIDE_FROM_ABI constexpr value_type operator*() const { - _LIBCPP_ASSERT_UNCATEGORIZED(__current_ != __next_, "Trying to dereference past-the-end chunk_by_view iterator."); + // If the iterator is at end, this would return an empty range which can be checked by the calling code and doesn't + // necessarily lead to a bad access. + _LIBCPP_ASSERT_PEDANTIC(__current_ != __next_, "Trying to dereference past-the-end chunk_by_view iterator."); return {__current_, __next_}; } _LIBCPP_HIDE_FROM_ABI constexpr __iterator& operator++() { - _LIBCPP_ASSERT_UNCATEGORIZED(__current_ != __next_, "Trying to increment past end chunk_by_view iterator."); + // Attempting to increment an end iterator is a no-op (`__find_next` would return the same argument given to it). + _LIBCPP_ASSERT_PEDANTIC(__current_ != __next_, "Trying to increment past end chunk_by_view iterator."); __current_ = __next_; __next_ = __parent_->__find_next(__current_); return *this; diff --git a/libcxx/include/__ranges/drop_while_view.h b/libcxx/include/__ranges/drop_while_view.h index eb3783eb42f1..b367f735c141 100644 --- a/libcxx/include/__ranges/drop_while_view.h +++ b/libcxx/include/__ranges/drop_while_view.h @@ -66,7 +66,8 @@ public: _LIBCPP_HIDE_FROM_ABI constexpr const _Pred& pred() const { return *__pred_; } _LIBCPP_HIDE_FROM_ABI constexpr auto begin() { - _LIBCPP_ASSERT_UNCATEGORIZED( + // Note: this duplicates a check in `optional` but provides a better error message. + _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS( __pred_.__has_value(), "drop_while_view needs to have a non-empty predicate before calling begin() -- did a previous " "assignment to this drop_while_view fail?"); diff --git a/libcxx/include/__ranges/filter_view.h b/libcxx/include/__ranges/filter_view.h index 868ad128e894..ecb78eee3810 100644 --- a/libcxx/include/__ranges/filter_view.h +++ b/libcxx/include/__ranges/filter_view.h @@ -83,7 +83,8 @@ public: _LIBCPP_HIDE_FROM_ABI constexpr _Pred const& pred() const { return *__pred_; } _LIBCPP_HIDE_FROM_ABI constexpr __iterator begin() { - _LIBCPP_ASSERT_UNCATEGORIZED( + // Note: this duplicates a check in `optional` but provides a better error message. + _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS( __pred_.__has_value(), "Trying to call begin() on a filter_view that does not have a valid predicate."); if constexpr (_UseCache) { if (!__cached_begin_.__has_value()) { diff --git a/libcxx/include/__thread/thread.h b/libcxx/include/__thread/thread.h index f3300752ac9e..463bbd677255 100644 --- a/libcxx/include/__thread/thread.h +++ b/libcxx/include/__thread/thread.h @@ -104,7 +104,7 @@ __thread_specific_ptr<_Tp>::~__thread_specific_ptr() { template <class _Tp> void __thread_specific_ptr<_Tp>::set_pointer(pointer __p) { - _LIBCPP_ASSERT_UNCATEGORIZED(get() == nullptr, "Attempting to overwrite thread local data"); + _LIBCPP_ASSERT_INTERNAL(get() == nullptr, "Attempting to overwrite thread local data"); std::__libcpp_tls_set(__key_, __p); } diff --git a/libcxx/include/__utility/exception_guard.h b/libcxx/include/__utility/exception_guard.h index 389fca6c7101..8d90dfd5f190 100644 --- a/libcxx/include/__utility/exception_guard.h +++ b/libcxx/include/__utility/exception_guard.h @@ -115,7 +115,7 @@ struct __exception_guard_noexceptions { } _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_NODEBUG ~__exception_guard_noexceptions() { - _LIBCPP_ASSERT_UNCATEGORIZED(__completed_, "__exception_guard not completed with exceptions disabled"); + _LIBCPP_ASSERT_INTERNAL(__completed_, "__exception_guard not completed with exceptions disabled"); } private: diff --git a/libcxx/include/__utility/unreachable.h b/libcxx/include/__utility/unreachable.h index 49334decc8f6..d833f74c2e4f 100644 --- a/libcxx/include/__utility/unreachable.h +++ b/libcxx/include/__utility/unreachable.h @@ -19,7 +19,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD _LIBCPP_NORETURN _LIBCPP_HIDE_FROM_ABI inline void __libcpp_unreachable() { - _LIBCPP_ASSERT_UNCATEGORIZED(false, "std::unreachable() was reached"); + _LIBCPP_ASSERT_INTERNAL(false, "std::unreachable() was reached"); __builtin_unreachable(); } diff --git a/libcxx/include/fstream b/libcxx/include/fstream index 7a4e15b55d56..203cc6dfb4b1 100644 --- a/libcxx/include/fstream +++ b/libcxx/include/fstream @@ -73,6 +73,7 @@ public: typedef typename traits_type::int_type int_type; typedef typename traits_type::pos_type pos_type; typedef typename traits_type::off_type off_type; + using native_handle_type = typename basic_filebuf<charT, traits>::native_handle_type; // Since C++26 basic_ifstream(); explicit basic_ifstream(const char* s, ios_base::openmode mode = ios_base::in); @@ -85,6 +86,7 @@ public: void swap(basic_ifstream& rhs); basic_filebuf<char_type, traits_type>* rdbuf() const; + native_handle_type native_handle() const noexcept; // Since C++26 bool is_open() const; void open(const char* s, ios_base::openmode mode = ios_base::in); void open(const string& s, ios_base::openmode mode = ios_base::in); @@ -110,6 +112,7 @@ public: typedef typename traits_type::int_type int_type; typedef typename traits_type::pos_type pos_type; typedef typename traits_type::off_type off_type; + using native_handle_type = typename basic_filebuf<charT, traits>::native_handle_type; // Since C++26 basic_ofstream(); explicit basic_ofstream(const char* s, ios_base::openmode mode = ios_base::out); @@ -122,6 +125,8 @@ public: void swap(basic_ofstream& rhs); basic_filebuf<char_type, traits_type>* rdbuf() const; + native_handle_type native_handle() const noexcept; // Since C++26 + bool is_open() const; void open(const char* s, ios_base::openmode mode = ios_base::out); void open(const string& s, ios_base::openmode mode = ios_base::out); @@ -148,6 +153,7 @@ public: typedef typename traits_type::int_type int_type; typedef typename traits_type::pos_type pos_type; typedef typename traits_type::off_type off_type; + using native_handle_type = typename basic_filebuf<charT, traits>::native_handle_type; // Since C++26 basic_fstream(); explicit basic_fstream(const char* s, ios_base::openmode mode = ios_base::in|ios_base::out); @@ -160,6 +166,7 @@ public: void swap(basic_fstream& rhs); basic_filebuf<char_type, traits_type>* rdbuf() const; + native_handle_type native_handle() const noexcept; // Since C++26 bool is_open() const; void open(const char* s, ios_base::openmode mode = ios_base::in|ios_base::out); void open(const string& s, ios_base::openmode mode = ios_base::in|ios_base::out); @@ -210,6 +217,10 @@ _LIBCPP_PUSH_MACROS _LIBCPP_BEGIN_NAMESPACE_STD +# if _LIBCPP_STD_VER >= 26 && defined(_LIBCPP_WIN32API) +_LIBCPP_EXPORTED_FROM_ABI void* __filebuf_windows_native_handle(FILE* __file) noexcept; +# endif + template <class _CharT, class _Traits> class _LIBCPP_TEMPLATE_VIS basic_filebuf : public basic_streambuf<_CharT, _Traits> { public: @@ -219,6 +230,15 @@ public: typedef typename traits_type::pos_type pos_type; typedef typename traits_type::off_type off_type; typedef typename traits_type::state_type state_type; +# if _LIBCPP_STD_VER >= 26 +# if defined(_LIBCPP_WIN32API) + using native_handle_type = void*; // HANDLE +# elif __has_include(<unistd.h>) + using native_handle_type = int; // POSIX file descriptor +# else +# error "Provide a native file handle!" +# endif +# endif // 27.9.1.2 Constructors/destructor: basic_filebuf(); @@ -245,6 +265,18 @@ public: # endif _LIBCPP_HIDE_FROM_ABI basic_filebuf* __open(int __fd, ios_base::openmode __mode); basic_filebuf* close(); +# if _LIBCPP_STD_VER >= 26 + _LIBCPP_HIDE_FROM_ABI native_handle_type native_handle() const noexcept { + _LIBCPP_ASSERT_UNCATEGORIZED(this->is_open(), "File must be opened"); +# if defined(_LIBCPP_WIN32API) + return std::__filebuf_windows_native_handle(__file_); +# elif __has_include(<unistd.h>) + return fileno(__file_); +# else +# error "Provide a way to determine the file native handle!" +# endif + } +# endif // _LIBCPP_STD_VER >= 26 _LIBCPP_HIDE_FROM_ABI inline static const char* __make_mdstring(ios_base::openmode __mode) _NOEXCEPT; @@ -1024,6 +1056,9 @@ public: typedef typename traits_type::int_type int_type; typedef typename traits_type::pos_type pos_type; typedef typename traits_type::off_type off_type; +# if _LIBCPP_STD_VER >= 26 + using native_handle_type = typename basic_filebuf<_CharT, _Traits>::native_handle_type; +# endif _LIBCPP_HIDE_FROM_ABI basic_ifstream(); _LIBCPP_HIDE_FROM_ABI explicit basic_ifstream(const char* __s, ios_base::openmode __mode = ios_base::in); @@ -1041,6 +1076,9 @@ public: _LIBCPP_HIDE_FROM_ABI void swap(basic_ifstream& __rhs); _LIBCPP_HIDE_FROM_ABI basic_filebuf<char_type, traits_type>* rdbuf() const; +# if _LIBCPP_STD_VER >= 26 + _LIBCPP_HIDE_FROM_ABI native_handle_type native_handle() const noexcept { return rdbuf()->native_handle(); } +# endif _LIBCPP_HIDE_FROM_ABI bool is_open() const; void open(const char* __s, ios_base::openmode __mode = ios_base::in); # ifdef _LIBCPP_HAS_OPEN_WITH_WCHAR @@ -1171,6 +1209,9 @@ public: typedef typename traits_type::int_type int_type; typedef typename traits_type::pos_type pos_type; typedef typename traits_type::off_type off_type; +# if _LIBCPP_STD_VER >= 26 + using native_handle_type = typename basic_filebuf<_CharT, _Traits>::native_handle_type; +# endif _LIBCPP_HIDE_FROM_ABI basic_ofstream(); _LIBCPP_HIDE_FROM_ABI explicit basic_ofstream(const char* __s, ios_base::openmode __mode = ios_base::out); @@ -1190,6 +1231,9 @@ public: _LIBCPP_HIDE_FROM_ABI void swap(basic_ofstream& __rhs); _LIBCPP_HIDE_FROM_ABI basic_filebuf<char_type, traits_type>* rdbuf() const; +# if _LIBCPP_STD_VER >= 26 + _LIBCPP_HIDE_FROM_ABI native_handle_type native_handle() const noexcept { return rdbuf()->native_handle(); } +# endif _LIBCPP_HIDE_FROM_ABI bool is_open() const; void open(const char* __s, ios_base::openmode __mode = ios_base::out); # ifdef _LIBCPP_HAS_OPEN_WITH_WCHAR @@ -1321,6 +1365,9 @@ public: typedef typename traits_type::int_type int_type; typedef typename traits_type::pos_type pos_type; typedef typename traits_type::off_type off_type; +# if _LIBCPP_STD_VER >= 26 + using native_handle_type = typename basic_filebuf<_CharT, _Traits>::native_handle_type; +# endif _LIBCPP_HIDE_FROM_ABI basic_fstream(); _LIBCPP_HIDE_FROM_ABI explicit basic_fstream(const char* __s, @@ -1345,6 +1392,9 @@ public: _LIBCPP_HIDE_FROM_ABI void swap(basic_fstream& __rhs); _LIBCPP_HIDE_FROM_ABI basic_filebuf<char_type, traits_type>* rdbuf() const; +# if _LIBCPP_STD_VER >= 26 + _LIBCPP_HIDE_FROM_ABI native_handle_type native_handle() const noexcept { return rdbuf()->native_handle(); } +# endif _LIBCPP_HIDE_FROM_ABI bool is_open() const; _LIBCPP_HIDE_FROM_ABI void open(const char* __s, ios_base::openmode __mode = ios_base::in | ios_base::out); # ifdef _LIBCPP_HAS_OPEN_WITH_WCHAR diff --git a/libcxx/include/print b/libcxx/include/print index 0f8e73f8eb5c..5e00fc87f47e 100644 --- a/libcxx/include/print +++ b/libcxx/include/print @@ -122,6 +122,8 @@ _LIBCPP_HIDE_FROM_ABI constexpr void __encode(_OutIt&, char32_t) = delete; template <class _OutIt> requires __utf16_code_unit<iter_value_t<_OutIt>> _LIBCPP_HIDE_FROM_ABI constexpr void __encode(_OutIt& __out_it, char32_t __value) { + // [print.fun]/7 : "if `out` contains invalid code units, the behavior is undefined and implementations are encouraged + // to diagnose it". _LIBCPP_ASSERT_UNCATEGORIZED(__is_scalar_value(__value), "an invalid unicode scalar value results in invalid UTF-16"); if (__value < 0x10000) { @@ -137,6 +139,8 @@ _LIBCPP_HIDE_FROM_ABI constexpr void __encode(_OutIt& __out_it, char32_t __value template <class _OutIt> requires __utf32_code_unit<iter_value_t<_OutIt>> _LIBCPP_HIDE_FROM_ABI constexpr void __encode(_OutIt& __out_it, char32_t __value) { + // [print.fun]/7 : "if `out` contains invalid code units, the behavior is undefined and implementations are encouraged + // to diagnose it". _LIBCPP_ASSERT_UNCATEGORIZED(__is_scalar_value(__value), "an invalid unicode scalar value results in invalid UTF-32"); *__out_it++ = __value; } @@ -214,7 +218,7 @@ _LIBCPP_HIDE_FROM_ABI inline bool __is_terminal(FILE* __stream) { template <class = void> // TODO PRINT template or availability markup fires too eagerly (http://llvm.org/PR61563). _LIBCPP_HIDE_FROM_ABI inline void __vprint_nonunicode(FILE* __stream, string_view __fmt, format_args __args, bool __write_nl) { - _LIBCPP_ASSERT_UNCATEGORIZED(__stream, "__stream is a valid pointer to an output C stream"); + _LIBCPP_ASSERT_NON_NULL(__stream, "__stream must be a valid pointer to an output C stream"); string __str = std::vformat(__fmt, __args); if (__write_nl) __str.push_back('\n'); @@ -290,7 +294,7 @@ __vprint_unicode([[maybe_unused]] FILE* __stream, [[maybe_unused]] string_view __fmt, [[maybe_unused]] format_args __args, [[maybe_unused]] bool __write_nl) { - _LIBCPP_ASSERT_UNCATEGORIZED(__stream, "__stream is a valid pointer to an output C stream"); + _LIBCPP_ASSERT_NON_NULL(__stream, "__stream must be a valid pointer to an output C stream"); // [print.fun] // 7 - Effects: If stream refers to a terminal capable of displaying diff --git a/libcxx/include/regex b/libcxx/include/regex index 061194cb2eba..b575a267583b 100644 --- a/libcxx/include/regex +++ b/libcxx/include/regex @@ -4587,28 +4587,36 @@ public: // element access: _LIBCPP_HIDE_FROM_ABI difference_type length(size_type __sub = 0) const { - _LIBCPP_ASSERT_UNCATEGORIZED(ready(), "match_results::length() called when not ready"); + // If the match results are not ready, this will return `0`. + _LIBCPP_ASSERT_PEDANTIC(ready(), "match_results::length() called when not ready"); return (*this)[__sub].length(); } _LIBCPP_HIDE_FROM_ABI difference_type position(size_type __sub = 0) const { - _LIBCPP_ASSERT_UNCATEGORIZED(ready(), "match_results::position() called when not ready"); + // If the match results are not ready, this will return the result of subtracting two default-constructed iterators + // (which is typically a well-defined operation). + _LIBCPP_ASSERT_PEDANTIC(ready(), "match_results::position() called when not ready"); return std::distance(__position_start_, (*this)[__sub].first); } _LIBCPP_HIDE_FROM_ABI string_type str(size_type __sub = 0) const { - _LIBCPP_ASSERT_UNCATEGORIZED(ready(), "match_results::str() called when not ready"); + // If the match results are not ready, this will return an empty string. + _LIBCPP_ASSERT_PEDANTIC(ready(), "match_results::str() called when not ready"); return (*this)[__sub].str(); } _LIBCPP_HIDE_FROM_ABI const_reference operator[](size_type __n) const { - _LIBCPP_ASSERT_UNCATEGORIZED(ready(), "match_results::operator[]() called when not ready"); + // If the match results are not ready, this call will be equivalent to calling this function with `__n >= size()`, + // returning an empty subrange. + _LIBCPP_ASSERT_PEDANTIC(ready(), "match_results::operator[]() called when not ready"); return __n < __matches_.size() ? __matches_[__n] : __unmatched_; } _LIBCPP_HIDE_FROM_ABI const_reference prefix() const { - _LIBCPP_ASSERT_UNCATEGORIZED(ready(), "match_results::prefix() called when not ready"); + // If the match results are not ready, this will return a default-constructed empty `__suffix_`. + _LIBCPP_ASSERT_PEDANTIC(ready(), "match_results::prefix() called when not ready"); return __prefix_; } _LIBCPP_HIDE_FROM_ABI const_reference suffix() const { - _LIBCPP_ASSERT_UNCATEGORIZED(ready(), "match_results::suffix() called when not ready"); + // If the match results are not ready, this will return a default-constructed empty `__suffix_`. + _LIBCPP_ASSERT_PEDANTIC(ready(), "match_results::suffix() called when not ready"); return __suffix_; } @@ -4722,7 +4730,8 @@ _OutputIter match_results<_BidirectionalIterator, _Allocator>::format( const char_type* __fmt_first, const char_type* __fmt_last, regex_constants::match_flag_type __flags) const { - _LIBCPP_ASSERT_UNCATEGORIZED(ready(), "match_results::format() called when not ready"); + // Note: this duplicates a check in `vector::operator[]` but provides a better error message. + _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(ready(), "match_results::format() called when not ready"); if (__flags & regex_constants::format_sed) { for (; __fmt_first != __fmt_last; ++__fmt_first) { if (*__fmt_first == '&') diff --git a/libcxx/include/set b/libcxx/include/set index 08677a94054f..55ba8f8208be 100644 --- a/libcxx/include/set +++ b/libcxx/include/set @@ -769,13 +769,13 @@ public: #if _LIBCPP_STD_VER >= 17 _LIBCPP_HIDE_FROM_ABI insert_return_type insert(node_type&& __nh) { - _LIBCPP_ASSERT_UNCATEGORIZED(__nh.empty() || __nh.get_allocator() == get_allocator(), - "node_type with incompatible allocator passed to set::insert()"); + _LIBCPP_ASSERT_COMPATIBLE_ALLOCATOR(__nh.empty() || __nh.get_allocator() == get_allocator(), + "node_type with incompatible allocator passed to set::insert()"); return __tree_.template __node_handle_insert_unique< node_type, insert_return_type>(std::move(__nh)); } _LIBCPP_HIDE_FROM_ABI iterator insert(const_iterator __hint, node_type&& __nh) { - _LIBCPP_ASSERT_UNCATEGORIZED(__nh.empty() || __nh.get_allocator() == get_allocator(), - "node_type with incompatible allocator passed to set::insert()"); + _LIBCPP_ASSERT_COMPATIBLE_ALLOCATOR(__nh.empty() || __nh.get_allocator() == get_allocator(), + "node_type with incompatible allocator passed to set::insert()"); return __tree_.template __node_handle_insert_unique<node_type>(__hint, std::move(__nh)); } _LIBCPP_HIDE_FROM_ABI node_type extract(key_type const& __key) { @@ -786,25 +786,25 @@ public: } template <class _Compare2> _LIBCPP_HIDE_FROM_ABI void merge(set<key_type, _Compare2, allocator_type>& __source) { - _LIBCPP_ASSERT_UNCATEGORIZED( + _LIBCPP_ASSERT_COMPATIBLE_ALLOCATOR( __source.get_allocator() == get_allocator(), "merging container with incompatible allocator"); __tree_.__node_handle_merge_unique(__source.__tree_); } template <class _Compare2> _LIBCPP_HIDE_FROM_ABI void merge(set<key_type, _Compare2, allocator_type>&& __source) { - _LIBCPP_ASSERT_UNCATEGORIZED( + _LIBCPP_ASSERT_COMPATIBLE_ALLOCATOR( __source.get_allocator() == get_allocator(), "merging container with incompatible allocator"); __tree_.__node_handle_merge_unique(__source.__tree_); } template <class _Compare2> _LIBCPP_HIDE_FROM_ABI void merge(multiset<key_type, _Compare2, allocator_type>& __source) { - _LIBCPP_ASSERT_UNCATEGORIZED( + _LIBCPP_ASSERT_COMPATIBLE_ALLOCATOR( __source.get_allocator() == get_allocator(), "merging container with incompatible allocator"); __tree_.__node_handle_merge_unique(__source.__tree_); } template <class _Compare2> _LIBCPP_HIDE_FROM_ABI void merge(multiset<key_type, _Compare2, allocator_type>&& __source) { - _LIBCPP_ASSERT_UNCATEGORIZED( + _LIBCPP_ASSERT_COMPATIBLE_ALLOCATOR( __source.get_allocator() == get_allocator(), "merging container with incompatible allocator"); __tree_.__node_handle_merge_unique(__source.__tree_); } @@ -1227,13 +1227,13 @@ public: #if _LIBCPP_STD_VER >= 17 _LIBCPP_HIDE_FROM_ABI iterator insert(node_type&& __nh) { - _LIBCPP_ASSERT_UNCATEGORIZED(__nh.empty() || __nh.get_allocator() == get_allocator(), - "node_type with incompatible allocator passed to multiset::insert()"); + _LIBCPP_ASSERT_COMPATIBLE_ALLOCATOR(__nh.empty() || __nh.get_allocator() == get_allocator(), + "node_type with incompatible allocator passed to multiset::insert()"); return __tree_.template __node_handle_insert_multi<node_type>(std::move(__nh)); } _LIBCPP_HIDE_FROM_ABI iterator insert(const_iterator __hint, node_type&& __nh) { - _LIBCPP_ASSERT_UNCATEGORIZED(__nh.empty() || __nh.get_allocator() == get_allocator(), - "node_type with incompatible allocator passed to multiset::insert()"); + _LIBCPP_ASSERT_COMPATIBLE_ALLOCATOR(__nh.empty() || __nh.get_allocator() == get_allocator(), + "node_type with incompatible allocator passed to multiset::insert()"); return __tree_.template __node_handle_insert_multi<node_type>(__hint, std::move(__nh)); } _LIBCPP_HIDE_FROM_ABI node_type extract(key_type const& __key) { @@ -1244,25 +1244,25 @@ public: } template <class _Compare2> _LIBCPP_HIDE_FROM_ABI void merge(multiset<key_type, _Compare2, allocator_type>& __source) { - _LIBCPP_ASSERT_UNCATEGORIZED( + _LIBCPP_ASSERT_COMPATIBLE_ALLOCATOR( __source.get_allocator() == get_allocator(), "merging container with incompatible allocator"); __tree_.__node_handle_merge_multi(__source.__tree_); } template <class _Compare2> _LIBCPP_HIDE_FROM_ABI void merge(multiset<key_type, _Compare2, allocator_type>&& __source) { - _LIBCPP_ASSERT_UNCATEGORIZED( + _LIBCPP_ASSERT_COMPATIBLE_ALLOCATOR( __source.get_allocator() == get_allocator(), "merging container with incompatible allocator"); __tree_.__node_handle_merge_multi(__source.__tree_); } template <class _Compare2> _LIBCPP_HIDE_FROM_ABI void merge(set<key_type, _Compare2, allocator_type>& __source) { - _LIBCPP_ASSERT_UNCATEGORIZED( + _LIBCPP_ASSERT_COMPATIBLE_ALLOCATOR( __source.get_allocator() == get_allocator(), "merging container with incompatible allocator"); __tree_.__node_handle_merge_multi(__source.__tree_); } template <class _Compare2> _LIBCPP_HIDE_FROM_ABI void merge(set<key_type, _Compare2, allocator_type>&& __source) { - _LIBCPP_ASSERT_UNCATEGORIZED( + _LIBCPP_ASSERT_COMPATIBLE_ALLOCATOR( __source.get_allocator() == get_allocator(), "merging container with incompatible allocator"); __tree_.__node_handle_merge_multi(__source.__tree_); } diff --git a/libcxx/include/span b/libcxx/include/span index 7dd53110ac29..007a32597f96 100644 --- a/libcxx/include/span +++ b/libcxx/include/span @@ -92,6 +92,7 @@ public: // [span.elem], span element access constexpr reference operator[](size_type idx) const; + constexpr reference at(size_type idx) const; // since C++26 constexpr reference front() const; constexpr reference back() const; constexpr pointer data() const noexcept; @@ -146,6 +147,7 @@ template<class R> #include <__utility/forward.h> #include <array> // for array #include <cstddef> // for byte +#include <stdexcept> #include <version> // standard-mandated includes @@ -321,6 +323,14 @@ public: return __data_[__idx]; } +# if _LIBCPP_STD_VER >= 26 + _LIBCPP_HIDE_FROM_ABI constexpr reference at(size_type __index) const { + if (__index >= size()) + std::__throw_out_of_range("span"); + return __data_[__index]; + } +# endif + _LIBCPP_HIDE_FROM_ABI constexpr reference front() const noexcept { _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(!empty(), "span<T, N>::front() on empty span"); return __data_[0]; @@ -469,6 +479,14 @@ public: return __data_[__idx]; } +# if _LIBCPP_STD_VER >= 26 + _LIBCPP_HIDE_FROM_ABI constexpr reference at(size_type __index) const { + if (__index >= size()) + std::__throw_out_of_range("span"); + return __data_[__index]; + } +# endif + _LIBCPP_HIDE_FROM_ABI constexpr reference front() const noexcept { _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(!empty(), "span<T>::front() on empty span"); return __data_[0]; diff --git a/libcxx/include/sstream b/libcxx/include/sstream index bd5cea9a5e94..9f75b7e0ac9e 100644 --- a/libcxx/include/sstream +++ b/libcxx/include/sstream @@ -398,9 +398,9 @@ public: typename string_type::size_type __pos = __view.empty() ? 0 : __view.data() - __str_.data(); // In C++23, this is just string_type(std::move(__str_), __pos, __view.size(), __str_.get_allocator()); // But we need something that works in C++20 also. - string_type __result(__str_.get_allocator()); - __result.__move_assign(std::move(__str_), __pos, __view.size()); - __str_.clear(); + string_type __result(std::move(__str_), __str_.get_allocator()); + __result.resize(__pos + __view.size()); + __result.erase(0, __pos); __init_buf_ptrs(); return __result; } diff --git a/libcxx/include/tuple b/libcxx/include/tuple index 8c954fc72c37..aa22c320b1ec 100644 --- a/libcxx/include/tuple +++ b/libcxx/include/tuple @@ -1416,25 +1416,27 @@ inline constexpr size_t tuple_size_v = tuple_size<_Tp>::value; # define _LIBCPP_NOEXCEPT_RETURN(...) \ noexcept(noexcept(__VA_ARGS__)) { return __VA_ARGS__; } +// The _LIBCPP_NOEXCEPT_RETURN macro breaks formatting. +// clang-format off template <class _Fn, class _Tuple, size_t... _Id> inline _LIBCPP_HIDE_FROM_ABI constexpr decltype(auto) __apply_tuple_impl(_Fn&& __f, _Tuple&& __t, __tuple_indices<_Id...>) _LIBCPP_NOEXCEPT_RETURN(std::__invoke(std::forward<_Fn>(__f), std::get<_Id>(std::forward<_Tuple>(__t))...)) - template <class _Fn, class _Tuple> - inline _LIBCPP_HIDE_FROM_ABI constexpr decltype(auto) apply(_Fn&& __f, _Tuple&& __t) _LIBCPP_NOEXCEPT_RETURN( - std::__apply_tuple_impl(std::forward<_Fn>(__f), - std::forward<_Tuple>(__t), - typename __make_tuple_indices<tuple_size_v<remove_reference_t<_Tuple>>>::type{})) +template <class _Fn, class _Tuple> +inline _LIBCPP_HIDE_FROM_ABI constexpr decltype(auto) apply(_Fn&& __f, _Tuple&& __t) + _LIBCPP_NOEXCEPT_RETURN(std::__apply_tuple_impl( + std::forward<_Fn>(__f), + std::forward<_Tuple>(__t), + typename __make_tuple_indices<tuple_size_v<remove_reference_t<_Tuple>>>::type{})) - template <class _Tp, class _Tuple, size_t... _Idx> - inline _LIBCPP_HIDE_FROM_ABI constexpr _Tp - __make_from_tuple_impl(_Tuple&& __t, __tuple_indices<_Idx...>) - _LIBCPP_NOEXCEPT_RETURN(_Tp(std::get<_Idx>(std::forward<_Tuple>(__t))...)) +template <class _Tp, class _Tuple, size_t... _Idx> +inline _LIBCPP_HIDE_FROM_ABI constexpr _Tp __make_from_tuple_impl(_Tuple&& __t, __tuple_indices<_Idx...>) + _LIBCPP_NOEXCEPT_RETURN(_Tp(std::get<_Idx>(std::forward<_Tuple>(__t))...)) - template <class _Tp, class _Tuple> - inline _LIBCPP_HIDE_FROM_ABI constexpr _Tp - make_from_tuple(_Tuple&& __t) _LIBCPP_NOEXCEPT_RETURN(std::__make_from_tuple_impl<_Tp>( +template <class _Tp, class _Tuple> +inline _LIBCPP_HIDE_FROM_ABI constexpr _Tp make_from_tuple(_Tuple&& __t) + _LIBCPP_NOEXCEPT_RETURN(std::__make_from_tuple_impl<_Tp>( std::forward<_Tuple>(__t), typename __make_tuple_indices<tuple_size_v<remove_reference_t<_Tuple>>>::type{})) # undef _LIBCPP_NOEXCEPT_RETURN @@ -1443,9 +1445,11 @@ __apply_tuple_impl(_Fn&& __f, _Tuple&& __t, __tuple_indices<_Id...>) #endif // !defined(_LIBCPP_CXX03_LANG) - _LIBCPP_END_NAMESPACE_STD +_LIBCPP_END_NAMESPACE_STD + +_LIBCPP_POP_MACROS - _LIBCPP_POP_MACROS +// clang-format on #if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20 # include <exception> diff --git a/libcxx/include/version b/libcxx/include/version index 768710ef5c84..c96647894dce 100644 --- a/libcxx/include/version +++ b/libcxx/include/version @@ -201,6 +201,7 @@ __cpp_lib_smart_ptr_for_overwrite 202002L <memory> __cpp_lib_smart_ptr_owner_equality 202306L <memory> __cpp_lib_source_location 201907L <source_location> __cpp_lib_span 202002L <span> +__cpp_lib_span_at 202311L <span> __cpp_lib_span_initializer_list 202311L <span> __cpp_lib_spanstream 202106L <spanstream> __cpp_lib_ssize 201902L <iterator> @@ -495,7 +496,7 @@ __cpp_lib_within_lifetime 202306L <type_traits> // # define __cpp_lib_freestanding_optional 202311L // # define __cpp_lib_freestanding_string_view 202311L // # define __cpp_lib_freestanding_variant 202311L -// # define __cpp_lib_fstream_native_handle 202306L +# define __cpp_lib_fstream_native_handle 202306L // # define __cpp_lib_function_ref 202306L // # define __cpp_lib_hazard_pointer 202306L // # define __cpp_lib_linalg 202311L @@ -505,6 +506,7 @@ __cpp_lib_within_lifetime 202306L <type_traits> // # define __cpp_lib_rcu 202306L // # define __cpp_lib_saturation_arithmetic 202311L // # define __cpp_lib_smart_ptr_owner_equality 202306L +# define __cpp_lib_span_at 202311L // # define __cpp_lib_span_initializer_list 202311L // # define __cpp_lib_sstream_from_string_view 202306L // # define __cpp_lib_submdspan 202306L diff --git a/libcxx/src/filesystem/error.h b/libcxx/src/filesystem/error.h index b86f4ed41071..572cc73292a1 100644 --- a/libcxx/src/filesystem/error.h +++ b/libcxx/src/filesystem/error.h @@ -99,7 +99,7 @@ inline errc __win_err_to_errc(int err) { #endif // _LIBCPP_WIN32API inline error_code capture_errno() { - _LIBCPP_ASSERT_UNCATEGORIZED(errno != 0, "Expected errno to be non-zero"); + _LIBCPP_ASSERT_INTERNAL(errno != 0, "Expected errno to be non-zero"); return error_code(errno, generic_category()); } diff --git a/libcxx/src/filesystem/format_string.h b/libcxx/src/filesystem/format_string.h index 215d42421b2a..a44def86f53e 100644 --- a/libcxx/src/filesystem/format_string.h +++ b/libcxx/src/filesystem/format_string.h @@ -47,7 +47,7 @@ inline _LIBCPP_ATTRIBUTE_FORMAT(__printf__, 1, 0) string vformat_string(const ch size_t size_with_null = static_cast<size_t>(ret) + 1; result.__resize_default_init(size_with_null - 1); ret = ::vsnprintf(&result[0], size_with_null, msg, ap); - _LIBCPP_ASSERT_UNCATEGORIZED(static_cast<size_t>(ret) == (size_with_null - 1), "TODO"); + _LIBCPP_ASSERT_INTERNAL(static_cast<size_t>(ret) == (size_with_null - 1), "TODO"); } return result; } diff --git a/libcxx/src/filesystem/posix_compat.h b/libcxx/src/filesystem/posix_compat.h index ec2de49960be..760cdb65dae1 100644 --- a/libcxx/src/filesystem/posix_compat.h +++ b/libcxx/src/filesystem/posix_compat.h @@ -318,8 +318,8 @@ inline int statvfs(const wchar_t* p, StatVFS* buf) { inline wchar_t* getcwd([[maybe_unused]] wchar_t* in_buf, [[maybe_unused]] size_t in_size) { // Only expected to be used with us allocating the buffer. - _LIBCPP_ASSERT_UNCATEGORIZED(in_buf == nullptr, "Windows getcwd() assumes in_buf==nullptr"); - _LIBCPP_ASSERT_UNCATEGORIZED(in_size == 0, "Windows getcwd() assumes in_size==0"); + _LIBCPP_ASSERT_INTERNAL(in_buf == nullptr, "Windows getcwd() assumes in_buf==nullptr"); + _LIBCPP_ASSERT_INTERNAL(in_size == 0, "Windows getcwd() assumes in_size==0"); size_t buff_size = MAX_PATH + 10; std::unique_ptr<wchar_t, decltype(&::free)> buff(static_cast<wchar_t*>(malloc(buff_size * sizeof(wchar_t))), &::free); @@ -338,7 +338,7 @@ inline wchar_t* getcwd([[maybe_unused]] wchar_t* in_buf, [[maybe_unused]] size_t inline wchar_t* realpath(const wchar_t* path, [[maybe_unused]] wchar_t* resolved_name) { // Only expected to be used with us allocating the buffer. - _LIBCPP_ASSERT_UNCATEGORIZED(resolved_name == nullptr, "Windows realpath() assumes a null resolved_name"); + _LIBCPP_ASSERT_INTERNAL(resolved_name == nullptr, "Windows realpath() assumes a null resolved_name"); WinHandle h(path, FILE_READ_ATTRIBUTES, 0); if (!h) { diff --git a/libcxx/src/fstream.cpp b/libcxx/src/fstream.cpp new file mode 100644 index 000000000000..55a4442b9c78 --- /dev/null +++ b/libcxx/src/fstream.cpp @@ -0,0 +1,37 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include <__config> +#include <cstdio> +#include <fstream> + +#if defined(_LIBCPP_WIN32API) +# define WIN32_LEAN_AND_MEAN +# define NOMINMAX +# include <io.h> +# include <windows.h> +#endif + +_LIBCPP_BEGIN_NAMESPACE_STD + +#if defined(_LIBCPP_WIN32API) + +// Confirm that `HANDLE` is `void*` as implemented in `basic_filebuf` +static_assert(std::same_as<HANDLE, void*>); + +_LIBCPP_EXPORTED_FROM_ABI void* __filebuf_windows_native_handle(FILE* __file) noexcept { + // https://learn.microsoft.com/en-us/cpp/c-runtime-library/reference/get-osfhandle?view=msvc-170 + intptr_t __handle = _get_osfhandle(fileno(__file)); + if (__handle == -1) + return nullptr; + return reinterpret_cast<void*>(__handle); +} + +#endif + +_LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/src/include/to_chars_floating_point.h b/libcxx/src/include/to_chars_floating_point.h index 3110bc20e160..e4715d10d97d 100644 --- a/libcxx/src/include/to_chars_floating_point.h +++ b/libcxx/src/include/to_chars_floating_point.h @@ -269,7 +269,7 @@ to_chars_result _Floating_to_chars_hex_precision( // * Print the leading hexit, then mask it away. { const uint32_t _Nibble = static_cast<uint32_t>(_Adjusted_mantissa >> _Adjusted_explicit_bits); - _LIBCPP_ASSERT_UNCATEGORIZED(_Nibble < 3, ""); + _LIBCPP_ASSERT_INTERNAL(_Nibble < 3, ""); const char _Leading_hexit = static_cast<char>('0' + _Nibble); *_First++ = _Leading_hexit; @@ -288,12 +288,12 @@ to_chars_result _Floating_to_chars_hex_precision( int32_t _Number_of_bits_remaining = _Adjusted_explicit_bits; // 24 for float, 52 for double for (;;) { - _LIBCPP_ASSERT_UNCATEGORIZED(_Number_of_bits_remaining >= 4, ""); - _LIBCPP_ASSERT_UNCATEGORIZED(_Number_of_bits_remaining % 4 == 0, ""); + _LIBCPP_ASSERT_INTERNAL(_Number_of_bits_remaining >= 4, ""); + _LIBCPP_ASSERT_INTERNAL(_Number_of_bits_remaining % 4 == 0, ""); _Number_of_bits_remaining -= 4; const uint32_t _Nibble = static_cast<uint32_t>(_Adjusted_mantissa >> _Number_of_bits_remaining); - _LIBCPP_ASSERT_UNCATEGORIZED(_Nibble < 16, ""); + _LIBCPP_ASSERT_INTERNAL(_Nibble < 16, ""); const char _Hexit = __itoa::_Charconv_digits[_Nibble]; *_First++ = _Hexit; @@ -415,12 +415,12 @@ to_chars_result _Floating_to_chars_hex_shortest( // '0' hexits, the same condition works (as we print the final hexit and mask it away); we don't need to test // _Number_of_bits_remaining. do { - _LIBCPP_ASSERT_UNCATEGORIZED(_Number_of_bits_remaining >= 4, ""); - _LIBCPP_ASSERT_UNCATEGORIZED(_Number_of_bits_remaining % 4 == 0, ""); + _LIBCPP_ASSERT_INTERNAL(_Number_of_bits_remaining >= 4, ""); + _LIBCPP_ASSERT_INTERNAL(_Number_of_bits_remaining % 4 == 0, ""); _Number_of_bits_remaining -= 4; const uint32_t _Nibble = static_cast<uint32_t>(_Adjusted_mantissa >> _Number_of_bits_remaining); - _LIBCPP_ASSERT_UNCATEGORIZED(_Nibble < 16, ""); + _LIBCPP_ASSERT_INTERNAL(_Nibble < 16, ""); const char _Hexit = __itoa::_Charconv_digits[_Nibble]; if (_First == _Last) { @@ -940,13 +940,13 @@ to_chars_result _Floating_to_chars_general_precision( _Effective_precision = std::min(_Precision - (_Scientific_exponent_X + 1), _Max_fixed_precision); const to_chars_result _Buf_result = _Floating_to_chars_fixed_precision(_Buffer, std::end(_Buffer), _Value, _Effective_precision); - _LIBCPP_ASSERT_UNCATEGORIZED(_Buf_result.ec == errc{}, ""); + _LIBCPP_ASSERT_INTERNAL(_Buf_result.ec == errc{}, ""); _Significand_last = _Buf_result.ptr; } else { _Effective_precision = std::min(_Precision - 1, _Max_scientific_precision); const to_chars_result _Buf_result = _Floating_to_chars_scientific_precision(_Buffer, std::end(_Buffer), _Value, _Effective_precision); - _LIBCPP_ASSERT_UNCATEGORIZED(_Buf_result.ec == errc{}, ""); + _LIBCPP_ASSERT_INTERNAL(_Buf_result.ec == errc{}, ""); _Significand_last = std::find(_Buffer, _Buf_result.ptr, 'e'); _Exponent_first = _Significand_last; _Exponent_last = _Buf_result.ptr; @@ -992,7 +992,7 @@ to_chars_result _Floating_to_chars( char* _First, char* const _Last, _Floating _Value, const chars_format _Fmt, const int _Precision) noexcept { if constexpr (_Overload == _Floating_to_chars_overload::_Plain) { - _LIBCPP_ASSERT_UNCATEGORIZED(_Fmt == chars_format{}, ""); // plain overload must pass chars_format{} internally + _LIBCPP_ASSERT_INTERNAL(_Fmt == chars_format{}, ""); // plain overload must pass chars_format{} internally } else { _LIBCPP_ASSERT_UNCATEGORIZED(_Fmt == chars_format::general || _Fmt == chars_format::scientific || _Fmt == chars_format::fixed || _Fmt == chars_format::hex, diff --git a/libcxx/src/memory_resource.cpp b/libcxx/src/memory_resource.cpp index afd1b892086d..42c366893f73 100644 --- a/libcxx/src/memory_resource.cpp +++ b/libcxx/src/memory_resource.cpp @@ -230,7 +230,7 @@ public: } void* __allocate_in_new_chunk(memory_resource* upstream, size_t block_size, size_t chunk_size) { - _LIBCPP_ASSERT_UNCATEGORIZED(chunk_size % block_size == 0, ""); + _LIBCPP_ASSERT_INTERNAL(chunk_size % block_size == 0, ""); static_assert(__default_alignment >= alignof(std::max_align_t), ""); static_assert(__default_alignment >= alignof(__chunk_footer), ""); static_assert(__default_alignment >= alignof(__vacancy_header), ""); diff --git a/libcxx/src/strstream.cpp b/libcxx/src/strstream.cpp index a9b5989ec495..70374191c6ab 100644 --- a/libcxx/src/strstream.cpp +++ b/libcxx/src/strstream.cpp @@ -120,7 +120,7 @@ strstreambuf::int_type strstreambuf::overflow(int_type __c) { if (buf == nullptr) return int_type(EOF); if (old_size != 0) { - _LIBCPP_ASSERT_UNCATEGORIZED(eback(), "overflow copying from NULL"); + _LIBCPP_ASSERT_INTERNAL(eback(), "strstreambuf::overflow reallocating but the get area is a null pointer"); memcpy(buf, eback(), static_cast<size_t>(old_size)); } ptrdiff_t ninp = gptr() - eback(); diff --git a/libcxx/src/system_error.cpp b/libcxx/src/system_error.cpp index 034b73c5480a..f518b480a278 100644 --- a/libcxx/src/system_error.cpp +++ b/libcxx/src/system_error.cpp @@ -68,7 +68,7 @@ __attribute__((unused)) const char* handle_strerror_r_return(int strerror_return if (new_errno == EINVAL) return ""; - _LIBCPP_ASSERT_UNCATEGORIZED(new_errno == ERANGE, "unexpected error from ::strerror_r"); + _LIBCPP_ASSERT_INTERNAL(new_errno == ERANGE, "unexpected error from ::strerror_r"); // FIXME maybe? 'strerror_buff_size' is likely to exceed the // maximum error size so ERANGE shouldn't be returned. std::abort(); diff --git a/libunwind/src/UnwindCursor.hpp b/libunwind/src/UnwindCursor.hpp index 647a5a9c9d92..8517d328bd05 100644 --- a/libunwind/src/UnwindCursor.hpp +++ b/libunwind/src/UnwindCursor.hpp @@ -12,6 +12,8 @@ #define __UNWINDCURSOR_HPP__ #include "cet_unwind.h" +#include <errno.h> +#include <signal.h> #include <stdint.h> #include <stdio.h> #include <stdlib.h> @@ -990,6 +992,7 @@ private: R dummy; return stepThroughSigReturn(dummy); } + bool isReadableAddr(const pint_t addr) const; #if defined(_LIBUNWIND_TARGET_AARCH64) bool setInfoForSigReturn(Registers_arm64 &); int stepThroughSigReturn(Registers_arm64 &); @@ -2700,20 +2703,12 @@ bool UnwindCursor<A, R>::setInfoForSigReturn(Registers_arm64 &) { // [1] https://github.com/torvalds/linux/blob/master/arch/arm64/kernel/vdso/sigreturn.S const pint_t pc = static_cast<pint_t>(this->getReg(UNW_REG_IP)); // The PC might contain an invalid address if the unwind info is bad, so - // directly accessing it could cause a segfault. Use process_vm_readv to read - // the memory safely instead. process_vm_readv was added in Linux 3.2, and - // AArch64 supported was added in Linux 3.7, so the syscall is guaranteed to - // be present. Unfortunately, there are Linux AArch64 environments where the - // libc wrapper for the syscall might not be present (e.g. Android 5), so call - // the syscall directly instead. - uint32_t instructions[2]; - struct iovec local_iov = {&instructions, sizeof instructions}; - struct iovec remote_iov = {reinterpret_cast<void *>(pc), sizeof instructions}; - long bytesRead = - syscall(SYS_process_vm_readv, getpid(), &local_iov, 1, &remote_iov, 1, 0); + // directly accessing it could cause a SIGSEGV. + if (!isReadableAddr(pc)) + return false; + auto *instructions = reinterpret_cast<const uint32_t *>(pc); // Look for instructions: mov x8, #0x8b; svc #0x0 - if (bytesRead != sizeof instructions || instructions[0] != 0xd2801168 || - instructions[1] != 0xd4000001) + if (instructions[0] != 0xd2801168 || instructions[1] != 0xd4000001) return false; _info = {}; @@ -2762,18 +2757,17 @@ int UnwindCursor<A, R>::stepThroughSigReturn(Registers_arm64 &) { template <typename A, typename R> bool UnwindCursor<A, R>::setInfoForSigReturn(Registers_riscv &) { const pint_t pc = static_cast<pint_t>(getReg(UNW_REG_IP)); - uint32_t instructions[2]; - struct iovec local_iov = {&instructions, sizeof instructions}; - struct iovec remote_iov = {reinterpret_cast<void *>(pc), sizeof instructions}; - long bytesRead = - syscall(SYS_process_vm_readv, getpid(), &local_iov, 1, &remote_iov, 1, 0); + // The PC might contain an invalid address if the unwind info is bad, so + // directly accessing it could cause a SIGSEGV. + if (!isReadableAddr(pc)) + return false; + const auto *instructions = reinterpret_cast<const uint32_t *>(pc); // Look for the two instructions used in the sigreturn trampoline // __vdso_rt_sigreturn: // // 0x08b00893 li a7,0x8b // 0x00000073 ecall - if (bytesRead != sizeof instructions || instructions[0] != 0x08b00893 || - instructions[1] != 0x00000073) + if (instructions[0] != 0x08b00893 || instructions[1] != 0x00000073) return false; _info = {}; @@ -2822,13 +2816,11 @@ bool UnwindCursor<A, R>::setInfoForSigReturn(Registers_s390x &) { // onto the stack. const pint_t pc = static_cast<pint_t>(this->getReg(UNW_REG_IP)); // The PC might contain an invalid address if the unwind info is bad, so - // directly accessing it could cause a segfault. Use process_vm_readv to - // read the memory safely instead. - uint16_t inst; - struct iovec local_iov = {&inst, sizeof inst}; - struct iovec remote_iov = {reinterpret_cast<void *>(pc), sizeof inst}; - long bytesRead = process_vm_readv(getpid(), &local_iov, 1, &remote_iov, 1, 0); - if (bytesRead == sizeof inst && (inst == 0x0a77 || inst == 0x0aad)) { + // directly accessing it could cause a SIGSEGV. + if (!isReadableAddr(pc)) + return false; + const auto inst = *reinterpret_cast<const uint16_t *>(pc); + if (inst == 0x0a77 || inst == 0x0aad) { _info = {}; _info.start_ip = pc; _info.end_ip = pc + 2; @@ -2974,6 +2966,37 @@ bool UnwindCursor<A, R>::getFunctionName(char *buf, size_t bufLen, buf, bufLen, offset); } +#if defined(_LIBUNWIND_CHECK_LINUX_SIGRETURN) +template <typename A, typename R> +bool UnwindCursor<A, R>::isReadableAddr(const pint_t addr) const { + // We use SYS_rt_sigprocmask, inspired by Abseil's AddressIsReadable. + + const auto sigsetAddr = reinterpret_cast<sigset_t *>(addr); + // We have to check that addr is nullptr because sigprocmask allows that + // as an argument without failure. + if (!sigsetAddr) + return false; + const auto saveErrno = errno; + // We MUST use a raw syscall here, as wrappers may try to access + // sigsetAddr which may cause a SIGSEGV. A raw syscall however is + // safe. Additionally, we need to pass the kernel_sigset_size, which is + // different from libc sizeof(sigset_t). For the majority of architectures, + // it's 64 bits (_NSIG), and libc NSIG is _NSIG + 1. + const auto kernelSigsetSize = NSIG / 8; + [[maybe_unused]] const int Result = syscall( + SYS_rt_sigprocmask, /*how=*/~0, sigsetAddr, nullptr, kernelSigsetSize); + // Because our "how" is invalid, this syscall should always fail, and our + // errno should always be EINVAL or an EFAULT. This relies on the Linux + // kernel to check copy_from_user before checking if the "how" argument is + // invalid. + assert(Result == -1); + assert(errno == EFAULT || errno == EINVAL); + const auto readable = errno != EFAULT; + errno = saveErrno; + return readable; +} +#endif + #if defined(_LIBUNWIND_USE_CET) extern "C" void *__libunwind_cet_get_registers(unw_cursor_t *cursor) { AbstractUnwindCursor *co = (AbstractUnwindCursor *)cursor; diff --git a/lld/COFF/Config.h b/lld/COFF/Config.h index 48c48cefe91d..018f03b211e4 100644 --- a/lld/COFF/Config.h +++ b/lld/COFF/Config.h @@ -186,9 +186,9 @@ struct Configuration { // Used for /opt:lldltopartitions=N unsigned ltoPartitions = 1; - // Used for /opt:lldltocache=path + // Used for /lldltocache=path StringRef ltoCache; - // Used for /opt:lldltocachepolicy=policy + // Used for /lldltocachepolicy=policy llvm::CachePruningPolicy ltoCachePolicy; // Used for /opt:[no]ltodebugpassmanager diff --git a/lld/ELF/Arch/AArch64.cpp b/lld/ELF/Arch/AArch64.cpp index 048f0ec30ebd..54b0a84e5213 100644 --- a/lld/ELF/Arch/AArch64.cpp +++ b/lld/ELF/Arch/AArch64.cpp @@ -1025,8 +1025,7 @@ addTaggedSymbolReferences(InputSectionBase &sec, // symbols should also be built with tagging. But, to handle these cases, we // demote the symbol to be untagged. void lld::elf::createTaggedSymbols(const SmallVector<ELFFileBase *, 0> &files) { - assert(config->emachine == EM_AARCH64 && - config->androidMemtagMode != ELF::NT_MEMTAG_LEVEL_NONE); + assert(hasMemtag()); // First, collect all symbols that are marked as tagged, and count how many // times they're marked as tagged. diff --git a/lld/ELF/Arch/RISCV.cpp b/lld/ELF/Arch/RISCV.cpp index 898e3e45b9e7..1d3d179e5d6f 100644 --- a/lld/ELF/Arch/RISCV.cpp +++ b/lld/ELF/Arch/RISCV.cpp @@ -43,6 +43,7 @@ public: const uint8_t *loc) const override; void relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const override; + void relocateAlloc(InputSectionBase &sec, uint8_t *buf) const override; bool relaxOnce(int pass) const override; }; @@ -307,6 +308,7 @@ RelExpr RISCV::getRelExpr(const RelType type, const Symbol &s, case R_RISCV_RELAX: return config->relax ? R_RELAX_HINT : R_NONE; case R_RISCV_SET_ULEB128: + case R_RISCV_SUB_ULEB128: return R_RISCV_LEB128; default: error(getErrorLocation(loc) + "unknown relocation (" + Twine(type) + @@ -515,6 +517,46 @@ void RISCV::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const { } } +void RISCV::relocateAlloc(InputSectionBase &sec, uint8_t *buf) const { + uint64_t secAddr = sec.getOutputSection()->addr; + if (auto *s = dyn_cast<InputSection>(&sec)) + secAddr += s->outSecOff; + else if (auto *ehIn = dyn_cast<EhInputSection>(&sec)) + secAddr += ehIn->getParent()->outSecOff; + for (size_t i = 0, size = sec.relocs().size(); i != size; ++i) { + const Relocation &rel = sec.relocs()[i]; + uint8_t *loc = buf + rel.offset; + const uint64_t val = + sec.getRelocTargetVA(sec.file, rel.type, rel.addend, + secAddr + rel.offset, *rel.sym, rel.expr); + + switch (rel.expr) { + case R_RELAX_HINT: + break; + case R_RISCV_LEB128: + if (i + 1 < size) { + const Relocation &rel1 = sec.relocs()[i + 1]; + if (rel.type == R_RISCV_SET_ULEB128 && + rel1.type == R_RISCV_SUB_ULEB128 && rel.offset == rel1.offset) { + auto val = rel.sym->getVA(rel.addend) - rel1.sym->getVA(rel1.addend); + if (overwriteULEB128(loc, val) >= 0x80) + errorOrWarn(sec.getLocation(rel.offset) + ": ULEB128 value " + + Twine(val) + " exceeds available space; references '" + + lld::toString(*rel.sym) + "'"); + ++i; + continue; + } + } + errorOrWarn(sec.getLocation(rel.offset) + + ": R_RISCV_SET_ULEB128 not paired with R_RISCV_SUB_SET128"); + return; + default: + relocate(loc, rel, val); + break; + } + } +} + namespace { struct SymbolAnchor { uint64_t offset; diff --git a/lld/ELF/Arch/X86_64.cpp b/lld/ELF/Arch/X86_64.cpp index 2135ac234864..c28e01e48195 100644 --- a/lld/ELF/Arch/X86_64.cpp +++ b/lld/ELF/Arch/X86_64.cpp @@ -358,6 +358,7 @@ RelExpr X86_64::getRelExpr(RelType type, const Symbol &s, case R_X86_64_DTPOFF64: return R_DTPREL; case R_X86_64_TPOFF32: + case R_X86_64_TPOFF64: return R_TPREL; case R_X86_64_TLSDESC_CALL: return R_TLSDESC_CALL; @@ -791,6 +792,7 @@ void X86_64::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const { write32le(loc, val); break; case R_X86_64_64: + case R_X86_64_TPOFF64: case R_X86_64_DTPOFF64: case R_X86_64_PC64: case R_X86_64_SIZE64: diff --git a/lld/ELF/InputSection.cpp b/lld/ELF/InputSection.cpp index 5dfb57fda432..53b496bd0842 100644 --- a/lld/ELF/InputSection.cpp +++ b/lld/ELF/InputSection.cpp @@ -671,6 +671,7 @@ uint64_t InputSectionBase::getRelocTargetVA(const InputFile *file, RelType type, case R_RELAX_TLS_LD_TO_LE_ABS: case R_RELAX_GOT_PC_NOPIC: case R_RISCV_ADD: + case R_RISCV_LEB128: return sym.getVA(a); case R_ADDEND: return a; @@ -875,16 +876,6 @@ uint64_t InputSectionBase::getRelocTargetVA(const InputFile *file, RelType type, } } -// Overwrite a ULEB128 value and keep the original length. -static uint64_t overwriteULEB128(uint8_t *bufLoc, uint64_t val) { - while (*bufLoc & 0x80) { - *bufLoc++ = 0x80 | (val & 0x7f); - val >>= 7; - } - *bufLoc = val; - return val; -} - // This function applies relocations to sections without SHF_ALLOC bit. // Such sections are never mapped to memory at runtime. Debug sections are // an example. Relocations in non-alloc sections are much easier to diff --git a/lld/ELF/LinkerScript.cpp b/lld/ELF/LinkerScript.cpp index 28ae4b854306..03aec187668a 100644 --- a/lld/ELF/LinkerScript.cpp +++ b/lld/ELF/LinkerScript.cpp @@ -57,6 +57,10 @@ static StringRef getOutputSectionName(const InputSectionBase *s) { if (auto *isec = dyn_cast<InputSection>(s)) { if (InputSectionBase *rel = isec->getRelocatedSection()) { OutputSection *out = rel->getOutputSection(); + if (!out) { + assert(config->relocatable && (rel->flags & SHF_LINK_ORDER)); + return s->name; + } if (s->type == SHT_RELA) return saver().save(".rela" + out->name); return saver().save(".rel" + out->name); diff --git a/lld/ELF/Relocations.cpp b/lld/ELF/Relocations.cpp index 210b4d1eb1a7..20eb02b87984 100644 --- a/lld/ELF/Relocations.cpp +++ b/lld/ELF/Relocations.cpp @@ -988,8 +988,8 @@ bool RelocationScanner::isStaticLinkTimeConstant(RelExpr e, RelType type, if (!config->isPic) return true; - // The size of a non preemptible symbol is a constant. - if (e == R_SIZE) + // Constant when referencing a non-preemptible symbol. + if (e == R_SIZE || e == R_RISCV_LEB128) return true; // For the target and the relocation, we want to know if they are @@ -1669,7 +1669,7 @@ void elf::postScanRelocations() { return; if (sym.isTagged() && sym.isDefined()) - mainPart->memtagDescriptors->addSymbol(sym); + mainPart->memtagGlobalDescriptors->addSymbol(sym); if (!sym.needsDynReloc()) return; diff --git a/lld/ELF/ScriptParser.cpp b/lld/ELF/ScriptParser.cpp index 55b10f0c59b5..4fdb8c7075a6 100644 --- a/lld/ELF/ScriptParser.cpp +++ b/lld/ELF/ScriptParser.cpp @@ -531,13 +531,17 @@ void ScriptParser::readSearchDir() { // linker's sections sanity check failures. // https://sourceware.org/binutils/docs/ld/Overlay-Description.html#Overlay-Description SmallVector<SectionCommand *, 0> ScriptParser::readOverlay() { - // VA and LMA expressions are optional, though for simplicity of - // implementation we assume they are not. That is what OVERLAY was designed - // for first of all: to allow sections with overlapping VAs at different LMAs. - Expr addrExpr = readExpr(); - expect(":"); - expect("AT"); - Expr lmaExpr = readParenExpr(); + Expr addrExpr; + if (consume(":")) { + addrExpr = [] { return script->getDot(); }; + } else { + addrExpr = readExpr(); + expect(":"); + } + // When AT is omitted, LMA should equal VMA. script->getDot() when evaluating + // lmaExpr will ensure this, even if the start address is specified. + Expr lmaExpr = + consume("AT") ? readParenExpr() : [] { return script->getDot(); }; expect("{"); SmallVector<SectionCommand *, 0> v; @@ -547,10 +551,15 @@ SmallVector<SectionCommand *, 0> ScriptParser::readOverlay() { // starting from the base load address specified. OutputDesc *osd = readOverlaySectionDescription(); osd->osec.addrExpr = addrExpr; - if (prev) + if (prev) { osd->osec.lmaExpr = [=] { return prev->getLMA() + prev->size; }; - else + } else { osd->osec.lmaExpr = lmaExpr; + // Use first section address for subsequent sections as initial addrExpr + // can be DOT. Ensure the first section, even if empty, is not discarded. + osd->osec.usedInExpression = true; + addrExpr = [=]() -> ExprValue { return {&osd->osec, false, 0, ""}; }; + } v.push_back(osd); prev = &osd->osec; } diff --git a/lld/ELF/SyntheticSections.cpp b/lld/ELF/SyntheticSections.cpp index 2b32eb3a0fe3..1c1b0ee2f9c8 100644 --- a/lld/ELF/SyntheticSections.cpp +++ b/lld/ELF/SyntheticSections.cpp @@ -1450,13 +1450,14 @@ DynamicSection<ELFT>::computeContents() { if (config->zPacPlt) addInt(DT_AARCH64_PAC_PLT, 0); - if (config->androidMemtagMode != ELF::NT_MEMTAG_LEVEL_NONE) { + if (hasMemtag()) { addInt(DT_AARCH64_MEMTAG_MODE, config->androidMemtagMode == NT_MEMTAG_LEVEL_ASYNC); addInt(DT_AARCH64_MEMTAG_HEAP, config->androidMemtagHeap); addInt(DT_AARCH64_MEMTAG_STACK, config->androidMemtagStack); - if (mainPart->memtagDescriptors->isNeeded()) { - addInSec(DT_AARCH64_MEMTAG_GLOBALS, *mainPart->memtagDescriptors); - addInt(DT_AARCH64_MEMTAG_GLOBALSSZ, mainPart->memtagDescriptors->getSize()); + if (mainPart->memtagGlobalDescriptors->isNeeded()) { + addInSec(DT_AARCH64_MEMTAG_GLOBALS, *mainPart->memtagGlobalDescriptors); + addInt(DT_AARCH64_MEMTAG_GLOBALSSZ, + mainPart->memtagGlobalDescriptors->getSize()); } } } @@ -3919,8 +3920,9 @@ static size_t computeOrWriteULEB128(uint64_t v, uint8_t *buf, size_t offset) { // https://github.com/ARM-software/abi-aa/blob/main/memtagabielf64/memtagabielf64.rst#83encoding-of-sht_aarch64_memtag_globals_dynamic constexpr uint64_t kMemtagStepSizeBits = 3; constexpr uint64_t kMemtagGranuleSize = 16; -static size_t createMemtagDescriptors(const SmallVector<const Symbol *, 0> &symbols, - uint8_t *buf = nullptr) { +static size_t +createMemtagGlobalDescriptors(const SmallVector<const Symbol *, 0> &symbols, + uint8_t *buf = nullptr) { size_t sectionSize = 0; uint64_t lastGlobalEnd = 0; @@ -3961,7 +3963,7 @@ static size_t createMemtagDescriptors(const SmallVector<const Symbol *, 0> &symb return sectionSize; } -bool MemtagDescriptors::updateAllocSize() { +bool MemtagGlobalDescriptors::updateAllocSize() { size_t oldSize = getSize(); std::stable_sort(symbols.begin(), symbols.end(), [](const Symbol *s1, const Symbol *s2) { @@ -3970,12 +3972,12 @@ bool MemtagDescriptors::updateAllocSize() { return oldSize != getSize(); } -void MemtagDescriptors::writeTo(uint8_t *buf) { - createMemtagDescriptors(symbols, buf); +void MemtagGlobalDescriptors::writeTo(uint8_t *buf) { + createMemtagGlobalDescriptors(symbols, buf); } -size_t MemtagDescriptors::getSize() const { - return createMemtagDescriptors(symbols); +size_t MemtagGlobalDescriptors::getSize() const { + return createMemtagGlobalDescriptors(symbols); } InStruct elf::in; diff --git a/lld/ELF/SyntheticSections.h b/lld/ELF/SyntheticSections.h index 3a9f4ba886f6..7882ad87c241 100644 --- a/lld/ELF/SyntheticSections.h +++ b/lld/ELF/SyntheticSections.h @@ -1257,9 +1257,9 @@ public: size_t getSize() const override; }; -class MemtagDescriptors final : public SyntheticSection { +class MemtagGlobalDescriptors final : public SyntheticSection { public: - MemtagDescriptors() + MemtagGlobalDescriptors() : SyntheticSection(llvm::ELF::SHF_ALLOC, llvm::ELF::SHT_AARCH64_MEMTAG_GLOBALS_DYNAMIC, /*alignment=*/4, ".memtag.globals.dynamic") {} @@ -1315,7 +1315,7 @@ struct Partition { std::unique_ptr<GnuHashTableSection> gnuHashTab; std::unique_ptr<HashTableSection> hashTab; std::unique_ptr<MemtagAndroidNote> memtagAndroidNote; - std::unique_ptr<MemtagDescriptors> memtagDescriptors; + std::unique_ptr<MemtagGlobalDescriptors> memtagGlobalDescriptors; std::unique_ptr<PackageMetadataNote> packageMetadataNote; std::unique_ptr<RelocationBaseSection> relaDyn; std::unique_ptr<RelrBaseSection> relrDyn; diff --git a/lld/ELF/Target.h b/lld/ELF/Target.h index 6264ab1a3da7..af7aaff8a4c0 100644 --- a/lld/ELF/Target.h +++ b/lld/ELF/Target.h @@ -301,6 +301,16 @@ inline void write32(void *p, uint32_t v) { inline void write64(void *p, uint64_t v) { llvm::support::endian::write64(p, v, config->endianness); } + +// Overwrite a ULEB128 value and keep the original length. +inline uint64_t overwriteULEB128(uint8_t *bufLoc, uint64_t val) { + while (*bufLoc & 0x80) { + *bufLoc++ = 0x80 | (val & 0x7f); + val >>= 7; + } + *bufLoc = val; + return val; +} } // namespace elf } // namespace lld diff --git a/lld/ELF/Writer.cpp b/lld/ELF/Writer.cpp index a84e4864ab0e..dfec5e07301a 100644 --- a/lld/ELF/Writer.cpp +++ b/lld/ELF/Writer.cpp @@ -291,6 +291,11 @@ static void demoteSymbolsAndComputeIsPreemptible() { } } +bool elf::hasMemtag() { + return config->emachine == EM_AARCH64 && + config->androidMemtagMode != ELF::NT_MEMTAG_LEVEL_NONE; +} + // Fully static executables don't support MTE globals at this point in time, as // we currently rely on: // - A dynamic loader to process relocations, and @@ -298,8 +303,7 @@ static void demoteSymbolsAndComputeIsPreemptible() { // This restriction could be removed in future by re-using some of the ideas // that ifuncs use in fully static executables. bool elf::canHaveMemtagGlobals() { - return config->emachine == EM_AARCH64 && - config->androidMemtagMode != ELF::NT_MEMTAG_LEVEL_NONE && + return hasMemtag() && (config->relocatable || config->shared || needsInterpSection()); } @@ -397,11 +401,14 @@ template <class ELFT> void elf::createSyntheticSections() { std::make_unique<SymbolTableSection<ELFT>>(*part.dynStrTab); part.dynamic = std::make_unique<DynamicSection<ELFT>>(); - if (canHaveMemtagGlobals()) { + if (hasMemtag()) { part.memtagAndroidNote = std::make_unique<MemtagAndroidNote>(); add(*part.memtagAndroidNote); - part.memtagDescriptors = std::make_unique<MemtagDescriptors>(); - add(*part.memtagDescriptors); + if (canHaveMemtagGlobals()) { + part.memtagGlobalDescriptors = + std::make_unique<MemtagGlobalDescriptors>(); + add(*part.memtagGlobalDescriptors); + } } if (config->androidPackDynRelocs) @@ -1725,8 +1732,8 @@ template <class ELFT> void Writer<ELFT>::finalizeAddressDependentContent() { changed |= part.relaDyn->updateAllocSize(); if (part.relrDyn) changed |= part.relrDyn->updateAllocSize(); - if (part.memtagDescriptors) - changed |= part.memtagDescriptors->updateAllocSize(); + if (part.memtagGlobalDescriptors) + changed |= part.memtagGlobalDescriptors->updateAllocSize(); } const Defined *changedSym = script->assignAddresses(); diff --git a/lld/ELF/Writer.h b/lld/ELF/Writer.h index eaf021aac42e..aac8176d9098 100644 --- a/lld/ELF/Writer.h +++ b/lld/ELF/Writer.h @@ -57,6 +57,7 @@ bool isMipsN32Abi(const InputFile *f); bool isMicroMips(); bool isMipsR6(); +bool hasMemtag(); bool canHaveMemtagGlobals(); } // namespace lld::elf diff --git a/lldb/include/lldb/API/SBBreakpoint.h b/lldb/include/lldb/API/SBBreakpoint.h index 0bb7c31d74f2..e08df3b6d5ab 100644 --- a/lldb/include/lldb/API/SBBreakpoint.h +++ b/lldb/include/lldb/API/SBBreakpoint.h @@ -112,6 +112,8 @@ public: SBError SetScriptCallbackBody(const char *script_body_text); + LLDB_DEPRECATED_FIXME("Doesn't provide error handling", + "AddNameWithErrorHandling") bool AddName(const char *new_name); SBError AddNameWithErrorHandling(const char *new_name); diff --git a/lldb/include/lldb/Breakpoint/BreakpointIDList.h b/lldb/include/lldb/Breakpoint/BreakpointIDList.h index 924cb1f26b8b..6910024695d8 100644 --- a/lldb/include/lldb/Breakpoint/BreakpointIDList.h +++ b/lldb/include/lldb/Breakpoint/BreakpointIDList.h @@ -33,7 +33,7 @@ public: size_t GetSize() const; - const BreakpointID &GetBreakpointIDAtIndex(size_t index) const; + BreakpointID GetBreakpointIDAtIndex(size_t index) const; bool RemoveBreakpointIDAtIndex(size_t index); @@ -48,9 +48,6 @@ public: bool FindBreakpointID(const char *bp_id, size_t *position) const; - void InsertStringArray(llvm::ArrayRef<const char *> string_array, - CommandReturnObject &result); - // Returns a pair consisting of the beginning and end of a breakpoint // ID range expression. If the input string is not a valid specification, // returns an empty pair. @@ -66,7 +63,6 @@ public: private: BreakpointIDArray m_breakpoint_ids; - BreakpointID m_invalid_id; BreakpointIDList(const BreakpointIDList &) = delete; const BreakpointIDList &operator=(const BreakpointIDList &) = delete; diff --git a/lldb/include/lldb/Symbol/CompileUnit.h b/lldb/include/lldb/Symbol/CompileUnit.h index 93f191b49985..89e853ab599d 100644 --- a/lldb/include/lldb/Symbol/CompileUnit.h +++ b/lldb/include/lldb/Symbol/CompileUnit.h @@ -112,10 +112,13 @@ public: /// the compile unit is optimized will be made when /// CompileUnit::GetIsOptimized() is called. /// + /// \param[in] support_files + /// An rvalue list of already parsed support files. /// \see lldb::LanguageType CompileUnit(const lldb::ModuleSP &module_sp, void *user_data, const FileSpec &file_spec, lldb::user_id_t uid, - lldb::LanguageType language, lldb_private::LazyBool is_optimized); + lldb::LanguageType language, lldb_private::LazyBool is_optimized, + SupportFileList &&support_files = {}); /// Add a function to this compile unit. /// @@ -226,6 +229,9 @@ public: /// Return the primary source file associated with this compile unit. const FileSpec &GetPrimaryFile() const { return m_file_spec; } + /// Return the primary source file associated with this compile unit. + void SetPrimaryFile(const FileSpec &fs) { m_file_spec = fs; } + /// Get the line table for the compile unit. /// /// Called by clients and the SymbolFile plug-in. The SymbolFile plug-ins @@ -265,7 +271,13 @@ public: /// /// \return /// A support file list object. - const FileSpecList &GetSupportFiles(); + const SupportFileList &GetSupportFiles(); + + /// Used by plugins that parse the support file list. + SupportFileList &GetSupportFileList() { + m_flags.Set(flagsParsedSupportFiles); + return m_support_files; + } /// Get the compile unit's imported module list. /// @@ -331,8 +343,6 @@ public: /// A line table object pointer that this object now owns. void SetLineTable(LineTable *line_table); - void SetSupportFiles(FileSpecList support_files); - void SetDebugMacros(const DebugMacrosSP &debug_macros); /// Set accessor for the variable list. @@ -410,9 +420,8 @@ protected: std::vector<SourceModule> m_imported_modules; /// The primary file associated with this compile unit. FileSpec m_file_spec; - /// Files associated with this compile unit's line table and - /// declarations. - FileSpecList m_support_files; + /// Files associated with this compile unit's line table and declarations. + SupportFileList m_support_files; /// Line table that will get parsed on demand. std::unique_ptr<LineTable> m_line_table_up; /// Debug macros that will get parsed on demand. diff --git a/lldb/include/lldb/Symbol/SymbolFile.h b/lldb/include/lldb/Symbol/SymbolFile.h index c9a2a647a039..f356f7b789fa 100644 --- a/lldb/include/lldb/Symbol/SymbolFile.h +++ b/lldb/include/lldb/Symbol/SymbolFile.h @@ -197,7 +197,7 @@ public: return false; } virtual bool ParseSupportFiles(CompileUnit &comp_unit, - FileSpecList &support_files) = 0; + SupportFileList &support_files) = 0; virtual size_t ParseTypes(CompileUnit &comp_unit) = 0; virtual bool ParseIsOptimized(CompileUnit &comp_unit) { return false; } diff --git a/lldb/include/lldb/Symbol/SymbolFileOnDemand.h b/lldb/include/lldb/Symbol/SymbolFileOnDemand.h index cde9f3c3b8ce..4e3009941aa7 100644 --- a/lldb/include/lldb/Symbol/SymbolFileOnDemand.h +++ b/lldb/include/lldb/Symbol/SymbolFileOnDemand.h @@ -81,7 +81,7 @@ public: llvm::function_ref<bool(lldb_private::Module &)>) override; bool ParseSupportFiles(lldb_private::CompileUnit &comp_unit, - lldb_private::FileSpecList &support_files) override; + lldb_private::SupportFileList &support_files) override; bool ParseIsOptimized(lldb_private::CompileUnit &comp_unit) override; diff --git a/lldb/include/lldb/Utility/FileSpecList.h b/lldb/include/lldb/Utility/FileSpecList.h index 77587aa91791..8cccb1949999 100644 --- a/lldb/include/lldb/Utility/FileSpecList.h +++ b/lldb/include/lldb/Utility/FileSpecList.h @@ -17,6 +17,86 @@ namespace lldb_private { class Stream; +/// Wraps either a FileSpec that represents a local file or a source +/// file whose contents is known (for example because it can be +/// reconstructed from debug info), but that hasn't been written to a +/// file yet. +class SupportFile { +protected: + FileSpec m_file_spec; + +public: + SupportFile(const FileSpec &spec) : m_file_spec(spec) {} + SupportFile(const SupportFile &other) = delete; + SupportFile(SupportFile &&other) = default; + virtual ~SupportFile() = default; + bool operator==(const SupportFile &other) { + return m_file_spec == other.m_file_spec; + } + /// Return the file name only. Useful for resolving breakpoints by file name. + const FileSpec &GetSpecOnly() const { return m_file_spec; }; + /// Materialize the file to disk and return the path to that temporary file. + virtual const FileSpec &Materialize() { return m_file_spec; } +}; + +/// A list of support files for a CompileUnit. +class SupportFileList { +public: + SupportFileList(){}; + SupportFileList(const SupportFileList &) = delete; + SupportFileList(SupportFileList &&other) = default; + + typedef std::vector<std::unique_ptr<SupportFile>> collection; + typedef collection::const_iterator const_iterator; + const_iterator begin() const { return m_files.begin(); } + const_iterator end() const { return m_files.end(); } + + void Append(const FileSpec &file) { + return Append(std::make_unique<SupportFile>(file)); + } + void Append(std::unique_ptr<SupportFile> &&file) { + m_files.push_back(std::move(file)); + } + // FIXME: Only used by SymbolFilePDB. Replace with a DenseSet at call site. + bool AppendIfUnique(const FileSpec &file); + size_t GetSize() const { return m_files.size(); } + const FileSpec &GetFileSpecAtIndex(size_t idx) const; + size_t FindFileIndex(size_t idx, const FileSpec &file, bool full) const; + /// Find a compatible file index. + /// + /// Find the index of a compatible file in the file spec list that matches \a + /// file starting \a idx entries into the file spec list. A file is considered + /// compatible if: + /// - The file matches exactly (only filename if \a file has no directory) + /// - If \a file is relative and any file in the list has this same suffix + /// - If any file in the list is relative and the relative path is a suffix + /// of \a file + /// + /// This is used to implement better matching for setting breakpoints in + /// source files where an IDE might specify a full path when setting the + /// breakpoint and debug info contains relative paths, if a user specifies + /// a relative path when setting a breakpoint. + /// + /// \param[in] idx + /// An index into the file list. + /// + /// \param[in] file + /// The file specification to search for. + /// + /// \return + /// The index of the file that matches \a file if it is found, + /// else UINT32_MAX is returned. + size_t FindCompatibleIndex(size_t idx, const FileSpec &file) const; + + template <class... Args> void EmplaceBack(Args &&...args) { + m_files.push_back( + std::make_unique<SupportFile>(FileSpec(std::forward<Args>(args)...))); + } + +protected: + collection m_files; ///< A collection of FileSpec objects. +}; + /// \class FileSpecList FileSpecList.h "lldb/Utility/FileSpecList.h" /// A file collection class. /// @@ -114,32 +194,6 @@ public: /// else UINT32_MAX is returned. size_t FindFileIndex(size_t idx, const FileSpec &file, bool full) const; - /// Find a compatible file index. - /// - /// Find the index of a compatible file in the file spec list that matches \a - /// file starting \a idx entries into the file spec list. A file is considered - /// compatible if: - /// - The file matches exactly (only filename if \a file has no directory) - /// - If \a file is relative and any file in the list has this same suffix - /// - If any file in the list is relative and the relative path is a suffix - /// of \a file - /// - /// This is used to implement better matching for setting breakpoints in - /// source files where an IDE might specify a full path when setting the - /// breakpoint and debug info contains relative paths, if a user specifies - /// a relative path when setting a breakpoint. - /// - /// \param[in] idx - /// An index into the file list. - /// - /// \param[in] file - /// The file specification to search for. - /// - /// \return - /// The index of the file that matches \a file if it is found, - /// else UINT32_MAX is returned. - size_t FindCompatibleIndex(size_t idx, const FileSpec &file) const; - /// Get file at index. /// /// Gets a file from the file list. If \a idx is not a valid index, an empty diff --git a/lldb/include/lldb/Utility/StructuredData.h b/lldb/include/lldb/Utility/StructuredData.h index e7ee12868512..5e63ef92fac3 100644 --- a/lldb/include/lldb/Utility/StructuredData.h +++ b/lldb/include/lldb/Utility/StructuredData.h @@ -221,31 +221,17 @@ public: } template <class IntType> - bool GetItemAtIndexAsInteger(size_t idx, IntType &result) const { - ObjectSP value_sp = GetItemAtIndex(idx); - if (value_sp.get()) { + std::optional<IntType> GetItemAtIndexAsInteger(size_t idx) const { + if (auto item_sp = GetItemAtIndex(idx)) { if constexpr (std::numeric_limits<IntType>::is_signed) { - if (auto signed_value = value_sp->GetAsSignedInteger()) { - result = static_cast<IntType>(signed_value->GetValue()); - return true; - } + if (auto *signed_value = item_sp->GetAsSignedInteger()) + return static_cast<IntType>(signed_value->GetValue()); } else { - if (auto unsigned_value = value_sp->GetAsUnsignedInteger()) { - result = static_cast<IntType>(unsigned_value->GetValue()); - return true; - } + if (auto *unsigned_value = item_sp->GetAsUnsignedInteger()) + return static_cast<IntType>(unsigned_value->GetValue()); } } - return false; - } - - template <class IntType> - bool GetItemAtIndexAsInteger(size_t idx, IntType &result, - IntType default_val) const { - bool success = GetItemAtIndexAsInteger(idx, result); - if (!success) - result = default_val; - return success; + return {}; } std::optional<llvm::StringRef> GetItemAtIndexAsString(size_t idx) const { diff --git a/lldb/source/API/SBCompileUnit.cpp b/lldb/source/API/SBCompileUnit.cpp index 3aa65e225d7a..65fdb11032b9 100644 --- a/lldb/source/API/SBCompileUnit.cpp +++ b/lldb/source/API/SBCompileUnit.cpp @@ -171,7 +171,7 @@ uint32_t SBCompileUnit::FindSupportFileIndex(uint32_t start_idx, LLDB_INSTRUMENT_VA(this, start_idx, sb_file, full); if (m_opaque_ptr) { - const FileSpecList &support_files = m_opaque_ptr->GetSupportFiles(); + const SupportFileList &support_files = m_opaque_ptr->GetSupportFiles(); return support_files.FindFileIndex(start_idx, sb_file.ref(), full); } return 0; diff --git a/lldb/source/Breakpoint/BreakpointIDList.cpp b/lldb/source/Breakpoint/BreakpointIDList.cpp index dd16d3b6388c..05c461827cad 100644 --- a/lldb/source/Breakpoint/BreakpointIDList.cpp +++ b/lldb/source/Breakpoint/BreakpointIDList.cpp @@ -20,17 +20,15 @@ using namespace lldb_private; // class BreakpointIDList -BreakpointIDList::BreakpointIDList() - : m_invalid_id(LLDB_INVALID_BREAK_ID, LLDB_INVALID_BREAK_ID) {} +BreakpointIDList::BreakpointIDList() : m_breakpoint_ids() {} BreakpointIDList::~BreakpointIDList() = default; size_t BreakpointIDList::GetSize() const { return m_breakpoint_ids.size(); } -const BreakpointID & -BreakpointIDList::GetBreakpointIDAtIndex(size_t index) const { +BreakpointID BreakpointIDList::GetBreakpointIDAtIndex(size_t index) const { return ((index < m_breakpoint_ids.size()) ? m_breakpoint_ids[index] - : m_invalid_id); + : BreakpointID()); } bool BreakpointIDList::RemoveBreakpointIDAtIndex(size_t index) { @@ -82,19 +80,6 @@ bool BreakpointIDList::FindBreakpointID(const char *bp_id_str, return FindBreakpointID(*bp_id, position); } -void BreakpointIDList::InsertStringArray( - llvm::ArrayRef<const char *> string_array, CommandReturnObject &result) { - if(string_array.empty()) - return; - - for (const char *str : string_array) { - auto bp_id = BreakpointID::ParseCanonicalReference(str); - if (bp_id) - m_breakpoint_ids.push_back(*bp_id); - } - result.SetStatus(eReturnStatusSuccessFinishNoResult); -} - // This function takes OLD_ARGS, which is usually the result of breaking the // command string arguments into // an array of space-separated strings, and searches through the arguments for diff --git a/lldb/source/Breakpoint/BreakpointResolverAddress.cpp b/lldb/source/Breakpoint/BreakpointResolverAddress.cpp index a0c628a8e299..ee4cbd50f9ee 100644 --- a/lldb/source/Breakpoint/BreakpointResolverAddress.cpp +++ b/lldb/source/Breakpoint/BreakpointResolverAddress.cpp @@ -65,13 +65,11 @@ BreakpointResolverAddress::SerializeToStructuredData() { new StructuredData::Dictionary()); SectionSP section_sp = m_addr.GetSection(); if (section_sp) { - ModuleSP module_sp = section_sp->GetModule(); - ConstString module_name; - if (module_sp) - module_name.SetCString(module_name.GetCString()); - - options_dict_sp->AddStringItem(GetKey(OptionNames::ModuleName), - module_name.GetCString()); + if (ModuleSP module_sp = section_sp->GetModule()) { + const FileSpec &module_fspec = module_sp->GetFileSpec(); + options_dict_sp->AddStringItem(GetKey(OptionNames::ModuleName), + module_fspec.GetPath().c_str()); + } options_dict_sp->AddIntegerItem(GetKey(OptionNames::AddressOffset), m_addr.GetOffset()); } else { diff --git a/lldb/source/Breakpoint/BreakpointResolverName.cpp b/lldb/source/Breakpoint/BreakpointResolverName.cpp index 82eef43ad6cf..aa86d2a26d11 100644 --- a/lldb/source/Breakpoint/BreakpointResolverName.cpp +++ b/lldb/source/Breakpoint/BreakpointResolverName.cpp @@ -161,14 +161,14 @@ BreakpointResolverSP BreakpointResolverName::CreateFromStructuredData( error.SetErrorString("BRN::CFSD: name entry is not a string."); return nullptr; } - std::underlying_type<FunctionNameType>::type fnt; - success = names_mask_array->GetItemAtIndexAsInteger(i, fnt); - if (!success) { + auto maybe_fnt = names_mask_array->GetItemAtIndexAsInteger< + std::underlying_type<FunctionNameType>::type>(i); + if (!maybe_fnt) { error.SetErrorString("BRN::CFSD: name mask entry is not an integer."); return nullptr; } names.push_back(std::string(*maybe_name)); - name_masks.push_back(static_cast<FunctionNameType>(fnt)); + name_masks.push_back(static_cast<FunctionNameType>(*maybe_fnt)); } std::shared_ptr<BreakpointResolverName> resolver_sp = diff --git a/lldb/source/Commands/CommandObjectBreakpoint.cpp b/lldb/source/Commands/CommandObjectBreakpoint.cpp index 63492590d32d..f9ba68eda3ff 100644 --- a/lldb/source/Commands/CommandObjectBreakpoint.cpp +++ b/lldb/source/Commands/CommandObjectBreakpoint.cpp @@ -2494,7 +2494,9 @@ void CommandObjectMultiwordBreakpoint::VerifyIDs( // NOW, convert the list of breakpoint id strings in TEMP_ARGS into an actual // BreakpointIDList: - valid_ids->InsertStringArray(temp_args.GetArgumentArrayRef(), result); + for (llvm::StringRef temp_arg : temp_args.GetArgumentArrayRef()) + if (auto bp_id = BreakpointID::ParseCanonicalReference(temp_arg)) + valid_ids->AddBreakpointID(*bp_id); // At this point, all of the breakpoint ids that the user passed in have // been converted to breakpoint IDs and put into valid_ids. diff --git a/lldb/source/Commands/CommandObjectSource.cpp b/lldb/source/Commands/CommandObjectSource.cpp index db158a7f5263..cabf6f0436f1 100644 --- a/lldb/source/Commands/CommandObjectSource.cpp +++ b/lldb/source/Commands/CommandObjectSource.cpp @@ -204,7 +204,7 @@ protected: if (cu) { assert(file_spec.GetFilename().AsCString()); bool has_path = (file_spec.GetDirectory().AsCString() != nullptr); - const FileSpecList &cu_file_list = cu->GetSupportFiles(); + const SupportFileList &cu_file_list = cu->GetSupportFiles(); size_t file_idx = cu_file_list.FindFileIndex(0, file_spec, has_path); if (file_idx != UINT32_MAX) { // Update the file to how it appears in the CU. diff --git a/lldb/source/Core/ModuleList.cpp b/lldb/source/Core/ModuleList.cpp index aa89c93c8d05..2180f29f3694 100644 --- a/lldb/source/Core/ModuleList.cpp +++ b/lldb/source/Core/ModuleList.cpp @@ -164,7 +164,7 @@ void ModuleListProperties::UpdateSymlinkMappings() { llvm::sys::ScopedWriter lock(m_symlink_paths_mutex); const bool notify = false; m_symlink_paths.Clear(notify); - for (FileSpec symlink : list) { + for (auto symlink : list) { FileSpec resolved; Status status = FileSystem::Instance().Readlink(symlink, resolved); if (status.Success()) diff --git a/lldb/source/Plugins/ExpressionParser/Clang/ClangUserExpression.cpp b/lldb/source/Plugins/ExpressionParser/Clang/ClangUserExpression.cpp index 68bdd96e8adb..30bc81c9ed8c 100644 --- a/lldb/source/Plugins/ExpressionParser/Clang/ClangUserExpression.cpp +++ b/lldb/source/Plugins/ExpressionParser/Clang/ClangUserExpression.cpp @@ -488,18 +488,18 @@ CppModuleConfiguration GetModuleConfig(lldb::LanguageType language, // Build a list of files we need to analyze to build the configuration. FileSpecList files; - for (const FileSpec &f : sc.comp_unit->GetSupportFiles()) - files.AppendIfUnique(f); + for (auto &f : sc.comp_unit->GetSupportFiles()) + files.AppendIfUnique(f->Materialize()); // We also need to look at external modules in the case of -gmodules as they // contain the support files for libc++ and the C library. llvm::DenseSet<SymbolFile *> visited_symbol_files; sc.comp_unit->ForEachExternalModule( visited_symbol_files, [&files](Module &module) { for (std::size_t i = 0; i < module.GetNumCompileUnits(); ++i) { - const FileSpecList &support_files = + const SupportFileList &support_files = module.GetCompileUnitAtIndex(i)->GetSupportFiles(); - for (const FileSpec &f : support_files) { - files.AppendIfUnique(f); + for (auto &f : support_files) { + files.AppendIfUnique(f->Materialize()); } } return false; @@ -508,7 +508,7 @@ CppModuleConfiguration GetModuleConfig(lldb::LanguageType language, LLDB_LOG(log, "[C++ module config] Found {0} support files to analyze", files.GetSize()); if (log && log->GetVerbose()) { - for (const FileSpec &f : files) + for (auto &f : files) LLDB_LOGV(log, "[C++ module config] Analyzing support file: {0}", f.GetPath()); } diff --git a/lldb/source/Plugins/ExpressionParser/Clang/CppModuleConfiguration.cpp b/lldb/source/Plugins/ExpressionParser/Clang/CppModuleConfiguration.cpp index 62443d1290dc..f43a04488230 100644 --- a/lldb/source/Plugins/ExpressionParser/Clang/CppModuleConfiguration.cpp +++ b/lldb/source/Plugins/ExpressionParser/Clang/CppModuleConfiguration.cpp @@ -134,9 +134,9 @@ bool CppModuleConfiguration::hasValidConfig() { CppModuleConfiguration::CppModuleConfiguration( const FileSpecList &support_files, const llvm::Triple &triple) { // Analyze all files we were given to build the configuration. - bool error = !llvm::all_of(support_files, - std::bind(&CppModuleConfiguration::analyzeFile, - this, std::placeholders::_1, triple)); + bool error = !llvm::all_of(support_files, [&](auto &file) { + return CppModuleConfiguration::analyzeFile(file, triple); + }); // If we have a valid configuration at this point, set the // include directories and module list that should be used. if (!error && hasValidConfig()) { diff --git a/lldb/source/Plugins/InstrumentationRuntime/TSan/InstrumentationRuntimeTSan.cpp b/lldb/source/Plugins/InstrumentationRuntime/TSan/InstrumentationRuntimeTSan.cpp index 2a35256a6fb0..72293c5331f4 100644 --- a/lldb/source/Plugins/InstrumentationRuntime/TSan/InstrumentationRuntimeTSan.cpp +++ b/lldb/source/Plugins/InstrumentationRuntime/TSan/InstrumentationRuntimeTSan.cpp @@ -592,9 +592,10 @@ addr_t InstrumentationRuntimeTSan::GetFirstNonInternalFramePc( if (skip_one_frame && i == 0) continue; - addr_t addr; - if (!trace_array->GetItemAtIndexAsInteger(i, addr)) + auto maybe_addr = trace_array->GetItemAtIndexAsInteger<addr_t>(i); + if (!maybe_addr) continue; + addr_t addr = *maybe_addr; lldb_private::Address so_addr; if (!process_sp->GetTarget().GetSectionLoadList().ResolveLoadAddress( diff --git a/lldb/source/Plugins/JITLoader/GDB/JITLoaderGDB.cpp b/lldb/source/Plugins/JITLoader/GDB/JITLoaderGDB.cpp index 5326a73166e7..1688fb27430a 100644 --- a/lldb/source/Plugins/JITLoader/GDB/JITLoaderGDB.cpp +++ b/lldb/source/Plugins/JITLoader/GDB/JITLoaderGDB.cpp @@ -35,6 +35,7 @@ using namespace lldb_private; LLDB_PLUGIN_DEFINE(JITLoaderGDB) +namespace { // Debug Interface Structures enum jit_actions_t { JIT_NOACTION = 0, JIT_REGISTER_FN, JIT_UNREGISTER_FN }; @@ -52,7 +53,6 @@ template <typename ptr_t> struct jit_descriptor { ptr_t first_entry; // pointer }; -namespace { enum EnableJITLoaderGDB { eEnableJITLoaderGDBDefault, eEnableJITLoaderGDBOn, diff --git a/lldb/source/Plugins/SymbolFile/Breakpad/SymbolFileBreakpad.cpp b/lldb/source/Plugins/SymbolFile/Breakpad/SymbolFileBreakpad.cpp index 729d6af02402..47c8074adc5b 100644 --- a/lldb/source/Plugins/SymbolFile/Breakpad/SymbolFileBreakpad.cpp +++ b/lldb/source/Plugins/SymbolFile/Breakpad/SymbolFileBreakpad.cpp @@ -278,13 +278,14 @@ bool SymbolFileBreakpad::ParseLineTable(CompileUnit &comp_unit) { } bool SymbolFileBreakpad::ParseSupportFiles(CompileUnit &comp_unit, - FileSpecList &support_files) { + SupportFileList &support_files) { std::lock_guard<std::recursive_mutex> guard(GetModuleMutex()); CompUnitData &data = m_cu_data->GetEntryRef(comp_unit.GetID()).data; if (!data.support_files) ParseLineTableAndSupportFiles(comp_unit, data); - support_files = std::move(*data.support_files); + for (auto &fs : *data.support_files) + support_files.Append(fs); return true; } diff --git a/lldb/source/Plugins/SymbolFile/Breakpad/SymbolFileBreakpad.h b/lldb/source/Plugins/SymbolFile/Breakpad/SymbolFileBreakpad.h index 214fbdd3ff3a..41e4e3b25801 100644 --- a/lldb/source/Plugins/SymbolFile/Breakpad/SymbolFileBreakpad.h +++ b/lldb/source/Plugins/SymbolFile/Breakpad/SymbolFileBreakpad.h @@ -73,7 +73,7 @@ public: bool ParseDebugMacros(CompileUnit &comp_unit) override { return false; } bool ParseSupportFiles(CompileUnit &comp_unit, - FileSpecList &support_files) override; + SupportFileList &support_files) override; size_t ParseTypes(CompileUnit &cu) override { return 0; } bool ParseImportedModules( @@ -195,7 +195,6 @@ private: Bookmark bookmark; std::optional<FileSpecList> support_files; std::unique_ptr<LineTable> line_table_up; - }; uint32_t CalculateNumCompileUnits() override; diff --git a/lldb/source/Plugins/SymbolFile/CTF/SymbolFileCTF.h b/lldb/source/Plugins/SymbolFile/CTF/SymbolFileCTF.h index f111937dbd6e..3a80138fffbc 100644 --- a/lldb/source/Plugins/SymbolFile/CTF/SymbolFileCTF.h +++ b/lldb/source/Plugins/SymbolFile/CTF/SymbolFileCTF.h @@ -66,7 +66,7 @@ public: bool ParseDebugMacros(CompileUnit &comp_unit) override { return false; } bool ParseSupportFiles(CompileUnit &comp_unit, - FileSpecList &support_files) override { + SupportFileList &support_files) override { return false; } diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp index 3e08f2550081..54d06b1115a2 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp @@ -142,6 +142,18 @@ static bool ShouldIgnoreArtificialField(llvm::StringRef FieldName) { || FieldName.starts_with("_vptr."); } +/// Returns true for C++ constructs represented by clang::CXXRecordDecl +static bool TagIsRecordType(dw_tag_t tag) { + switch (tag) { + case DW_TAG_class_type: + case DW_TAG_structure_type: + case DW_TAG_union_type: + return true; + default: + return false; + } +} + TypeSP DWARFASTParserClang::ParseTypeFromClangModule(const SymbolContext &sc, const DWARFDIE &die, Log *log) { @@ -2150,6 +2162,7 @@ bool DWARFASTParserClang::CompleteRecordType(const DWARFDIE &die, SymbolFileDWARF *dwarf = die.GetDWARF(); ClangASTImporter::LayoutInfo layout_info; + std::vector<DWARFDIE> contained_type_dies; if (die.HasChildren()) { const bool type_is_objc_object_or_interface = @@ -2175,7 +2188,8 @@ bool DWARFASTParserClang::CompleteRecordType(const DWARFDIE &die, DelayedPropertyList delayed_properties; ParseChildMembers(die, clang_type, bases, member_function_dies, - delayed_properties, default_accessibility, layout_info); + contained_type_dies, delayed_properties, + default_accessibility, layout_info); // Now parse any methods if there were any... for (const DWARFDIE &die : member_function_dies) @@ -2231,6 +2245,12 @@ bool DWARFASTParserClang::CompleteRecordType(const DWARFDIE &die, if (record_decl) GetClangASTImporter().SetRecordLayout(record_decl, layout_info); } + // Now parse all contained types inside of the class. We make forward + // declarations to all classes, but we need the CXXRecordDecl to have decls + // for all contained types because we don't get asked for them via the + // external AST support. + for (const DWARFDIE &die : contained_type_dies) + dwarf->ResolveType(die); return (bool)clang_type; } @@ -3110,6 +3130,7 @@ bool DWARFASTParserClang::ParseChildMembers( const DWARFDIE &parent_die, CompilerType &class_clang_type, std::vector<std::unique_ptr<clang::CXXBaseSpecifier>> &base_classes, std::vector<DWARFDIE> &member_function_dies, + std::vector<DWARFDIE> &contained_type_dies, DelayedPropertyList &delayed_properties, const AccessType default_accessibility, ClangASTImporter::LayoutInfo &layout_info) { @@ -3159,6 +3180,8 @@ bool DWARFASTParserClang::ParseChildMembers( break; default: + if (llvm::dwarf::isType(tag)) + contained_type_dies.push_back(die); break; } } @@ -3293,12 +3316,19 @@ clang::Decl *DWARFASTParserClang::GetClangDeclForDIE(const DWARFDIE &die) { return nullptr; switch (die.Tag()) { - case DW_TAG_variable: case DW_TAG_constant: case DW_TAG_formal_parameter: case DW_TAG_imported_declaration: case DW_TAG_imported_module: break; + case DW_TAG_variable: + // This means 'die' is a C++ static data member. + // We don't want to create decls for such members + // here. + if (auto parent = die.GetParent(); + parent.IsValid() && TagIsRecordType(parent.Tag())) + return nullptr; + break; default: return nullptr; } diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.h index 3e28e54d6220..8d4af203bb28 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.h @@ -175,6 +175,7 @@ protected: lldb_private::CompilerType &class_compiler_type, std::vector<std::unique_ptr<clang::CXXBaseSpecifier>> &base_classes, std::vector<lldb_private::plugin::dwarf::DWARFDIE> &member_function_dies, + std::vector<lldb_private::plugin::dwarf::DWARFDIE> &contained_type_dies, DelayedPropertyList &delayed_properties, const lldb::AccessType default_accessibility, lldb_private::ClangASTImporter::LayoutInfo &layout_info); diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFIndex.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFIndex.cpp index b1c323b101ce..20c07a94b507 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFIndex.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFIndex.cpp @@ -7,6 +7,8 @@ //===----------------------------------------------------------------------===// #include "Plugins/SymbolFile/DWARF/DWARFIndex.h" +#include "DWARFDebugInfoEntry.h" +#include "DWARFDeclContext.h" #include "Plugins/Language/ObjC/ObjCLanguage.h" #include "Plugins/SymbolFile/DWARF/DWARFDIE.h" #include "Plugins/SymbolFile/DWARF/SymbolFileDWARF.h" @@ -112,3 +114,21 @@ void DWARFIndex::ReportInvalidDIERef(DIERef ref, llvm::StringRef name) const { "bad die {0:x16} for '{1}')\n", ref.die_offset(), name.str().c_str()); } + +void DWARFIndex::GetFullyQualifiedType( + const DWARFDeclContext &context, + llvm::function_ref<bool(DWARFDIE die)> callback) { + GetTypes(context, [&](DWARFDIE die) { + return GetFullyQualifiedTypeImpl(context, die, callback); + }); +} + +bool DWARFIndex::GetFullyQualifiedTypeImpl( + const DWARFDeclContext &context, DWARFDIE die, + llvm::function_ref<bool(DWARFDIE die)> callback) { + DWARFDeclContext dwarf_decl_ctx = + die.GetDIE()->GetDWARFDeclContext(die.GetCU()); + if (dwarf_decl_ctx == context) + return callback(die); + return true; +} diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFIndex.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFIndex.h index 9aadeddbb217..0551b07100a9 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFIndex.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFIndex.h @@ -53,6 +53,14 @@ public: llvm::function_ref<bool(DWARFDIE die)> callback) = 0; virtual void GetTypes(const DWARFDeclContext &context, llvm::function_ref<bool(DWARFDIE die)> callback) = 0; + + /// Finds all DIEs whose fully qualified name matches `context`. A base + /// implementation is provided, and it uses the entire CU to check the DIE + /// parent hierarchy. Specializations should override this if they are able + /// to provide a faster implementation. + virtual void + GetFullyQualifiedType(const DWARFDeclContext &context, + llvm::function_ref<bool(DWARFDIE die)> callback); virtual void GetNamespaces(ConstString name, llvm::function_ref<bool(DWARFDIE die)> callback) = 0; @@ -102,6 +110,12 @@ protected: } void ReportInvalidDIERef(DIERef ref, llvm::StringRef name) const; + + /// Implementation of `GetFullyQualifiedType` to check a single entry, + /// shareable with derived classes. + bool + GetFullyQualifiedTypeImpl(const DWARFDeclContext &context, DWARFDIE die, + llvm::function_ref<bool(DWARFDIE die)> callback); }; } // namespace dwarf } // namespace lldb_private::plugin diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DebugNamesDWARFIndex.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DebugNamesDWARFIndex.cpp index 7c253553d57b..b718f98340a7 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DebugNamesDWARFIndex.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DebugNamesDWARFIndex.cpp @@ -48,7 +48,7 @@ DebugNamesDWARFIndex::GetUnits(const DebugNames &debug_names) { } std::optional<DIERef> -DebugNamesDWARFIndex::ToDIERef(const DebugNames::Entry &entry) { +DebugNamesDWARFIndex::ToDIERef(const DebugNames::Entry &entry) const { // Look for a DWARF unit offset (CU offset or local TU offset) as they are // both offsets into the .debug_info section. std::optional<uint64_t> unit_offset = entry.getCUOffset(); diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DebugNamesDWARFIndex.h b/lldb/source/Plugins/SymbolFile/DWARF/DebugNamesDWARFIndex.h index 7ce630a56137..cca0913c4124 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DebugNamesDWARFIndex.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DebugNamesDWARFIndex.h @@ -79,7 +79,7 @@ private: std::unique_ptr<DebugNames> m_debug_names_up; ManualDWARFIndex m_fallback; - std::optional<DIERef> ToDIERef(const DebugNames::Entry &entry); + std::optional<DIERef> ToDIERef(const DebugNames::Entry &entry) const; bool ProcessEntry(const DebugNames::Entry &entry, llvm::function_ref<bool(DWARFDIE die)> callback); diff --git a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp index 505ea29ca4d4..737da7798b82 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp @@ -10,6 +10,7 @@ #include "llvm/DebugInfo/DWARF/DWARFDebugLoc.h" #include "llvm/Support/Casting.h" +#include "llvm/Support/FileUtilities.h" #include "llvm/Support/Format.h" #include "llvm/Support/Threading.h" @@ -209,17 +210,14 @@ GetFileByIndex(const llvm::DWARFDebugLine::Prologue &prologue, size_t idx, return std::move(rel_path); } -static FileSpecList -ParseSupportFilesFromPrologue(const lldb::ModuleSP &module, - const llvm::DWARFDebugLine::Prologue &prologue, - FileSpec::Style style, - llvm::StringRef compile_dir = {}) { - FileSpecList support_files; - +static void ParseSupportFilesFromPrologue( + SupportFileList &support_files, const lldb::ModuleSP &module, + const llvm::DWARFDebugLine::Prologue &prologue, FileSpec::Style style, + llvm::StringRef compile_dir = {}) { // Handle the case where there are no files first to avoid having to special // case this later. if (prologue.FileNames.empty()) - return support_files; + return; // Before DWARF v5, the line table indexes were one based. const bool is_one_based = prologue.getVersion() < 5; @@ -235,6 +233,53 @@ ParseSupportFilesFromPrologue(const lldb::ModuleSP &module, for (size_t idx = first_file_idx; idx <= last_file_idx; ++idx) { std::string remapped_file; if (auto file_path = GetFileByIndex(prologue, idx, compile_dir, style)) { + auto entry = prologue.getFileNameEntry(idx); + auto source = entry.Source.getAsCString(); + if (!source) + consumeError(source.takeError()); + else { + llvm::StringRef source_ref(*source); + if (!source_ref.empty()) { + /// Wrap a path for an in-DWARF source file. Lazily write it + /// to disk when Materialize() is called. + struct LazyDWARFSourceFile : public SupportFile { + LazyDWARFSourceFile(const FileSpec &fs, llvm::StringRef source, + FileSpec::Style style) + : SupportFile(fs), source(source), style(style) {} + FileSpec tmp_file; + /// The file contents buffer. + llvm::StringRef source; + /// Deletes the temporary file at the end. + std::unique_ptr<llvm::FileRemover> remover; + FileSpec::Style style; + + /// Write the file contents to a temporary file. + const FileSpec &Materialize() override { + if (tmp_file) + return tmp_file; + llvm::SmallString<0> name; + int fd; + auto orig_name = m_file_spec.GetFilename().GetStringRef(); + auto ec = llvm::sys::fs::createTemporaryFile( + "", llvm::sys::path::filename(orig_name, style), fd, name); + if (ec || fd <= 0) { + LLDB_LOG(GetLog(DWARFLog::DebugInfo), + "Could not create temporary file"); + return tmp_file; + } + remover = std::make_unique<llvm::FileRemover>(name); + NativeFile file(fd, File::eOpenOptionWriteOnly, true); + size_t num_bytes = source.size(); + file.Write(source.data(), num_bytes); + tmp_file.SetPath(name); + return tmp_file; + } + }; + support_files.Append(std::make_unique<LazyDWARFSourceFile>( + FileSpec(*file_path), *source, style)); + continue; + } + } if (auto remapped = module->RemapSourceFile(llvm::StringRef(*file_path))) remapped_file = *remapped; else @@ -251,8 +296,6 @@ ParseSupportFilesFromPrologue(const lldb::ModuleSP &module, // Unconditionally add an entry, so the indices match up. support_files.EmplaceBack(remapped_file, style, checksum); } - - return support_files; } void SymbolFileDWARF::Initialize() { @@ -744,12 +787,13 @@ lldb::CompUnitSP SymbolFileDWARF::ParseCompileUnit(DWARFCompileUnit &dwarf_cu) { ModuleSP module_sp(m_objfile_sp->GetModule()); if (module_sp) { auto initialize_cu = [&](const FileSpec &file_spec, - LanguageType cu_language) { + LanguageType cu_language, + SupportFileList &&support_files = {}) { BuildCuTranslationTable(); cu_sp = std::make_shared<CompileUnit>( module_sp, &dwarf_cu, file_spec, *GetDWARFUnitIndex(dwarf_cu.GetID()), cu_language, - eLazyBoolCalculate); + eLazyBoolCalculate, std::move(support_files)); dwarf_cu.SetUserData(cu_sp.get()); @@ -775,15 +819,13 @@ lldb::CompUnitSP SymbolFileDWARF::ParseCompileUnit(DWARFCompileUnit &dwarf_cu) { // file is also the name of the compile unit. This // allows us to avoid loading the non-skeleton unit, // which may be in a separate DWO file. - FileSpecList support_files; + SupportFileList support_files; if (!ParseSupportFiles(dwarf_cu, module_sp, support_files)) return false; if (support_files.GetSize() == 0) return false; - initialize_cu(support_files.GetFileSpecAtIndex(0), - eLanguageTypeUnknown); - cu_sp->SetSupportFiles(std::move(support_files)); + eLanguageTypeUnknown, std::move(support_files)); return true; }; @@ -1029,7 +1071,7 @@ bool SymbolFileDWARF::ForEachExternalModule( } bool SymbolFileDWARF::ParseSupportFiles(CompileUnit &comp_unit, - FileSpecList &support_files) { + SupportFileList &support_files) { std::lock_guard<std::recursive_mutex> guard(GetModuleMutex()); DWARFUnit *dwarf_cu = GetDWARFCompileUnit(&comp_unit); if (!dwarf_cu) @@ -1038,13 +1080,12 @@ bool SymbolFileDWARF::ParseSupportFiles(CompileUnit &comp_unit, if (!ParseSupportFiles(*dwarf_cu, comp_unit.GetModule(), support_files)) return false; - comp_unit.SetSupportFiles(support_files); return true; } bool SymbolFileDWARF::ParseSupportFiles(DWARFUnit &dwarf_cu, const ModuleSP &module, - FileSpecList &support_files) { + SupportFileList &support_files) { dw_offset_t offset = dwarf_cu.GetLineTableOffset(); if (offset == DW_INVALID_OFFSET) @@ -1057,8 +1098,8 @@ bool SymbolFileDWARF::ParseSupportFiles(DWARFUnit &dwarf_cu, return false; std::string comp_dir = dwarf_cu.GetCompilationDirectory().GetPath(); - support_files = ParseSupportFilesFromPrologue( - module, prologue, dwarf_cu.GetPathStyle(), comp_dir); + ParseSupportFilesFromPrologue(support_files, module, prologue, + dwarf_cu.GetPathStyle(), comp_dir); return true; } @@ -1070,24 +1111,27 @@ FileSpec SymbolFileDWARF::GetFile(DWARFUnit &unit, size_t file_idx) { } auto &tu = llvm::cast<DWARFTypeUnit>(unit); - return GetTypeUnitSupportFiles(tu).GetFileSpecAtIndex(file_idx); + if (const SupportFileList *support_files = GetTypeUnitSupportFiles(tu)) + return support_files->GetFileSpecAtIndex(file_idx); + return {}; } -const FileSpecList & +const SupportFileList * SymbolFileDWARF::GetTypeUnitSupportFiles(DWARFTypeUnit &tu) { - static FileSpecList empty_list; + static SupportFileList empty_list; dw_offset_t offset = tu.GetLineTableOffset(); if (offset == DW_INVALID_OFFSET || offset == llvm::DenseMapInfo<dw_offset_t>::getEmptyKey() || offset == llvm::DenseMapInfo<dw_offset_t>::getTombstoneKey()) - return empty_list; + return nullptr; // Many type units can share a line table, so parse the support file list // once, and cache it based on the offset field. auto iter_bool = m_type_unit_support_files.try_emplace(offset); - FileSpecList &list = iter_bool.first->second; + std::unique_ptr<SupportFileList> &list = iter_bool.first->second; if (iter_bool.second) { + list = std::make_unique<SupportFileList>(); uint64_t line_table_offset = offset; llvm::DWARFDataExtractor data = m_context.getOrLoadLineData().GetAsLLVMDWARF(); @@ -1101,14 +1145,13 @@ SymbolFileDWARF::GetTypeUnitSupportFiles(DWARFTypeUnit &tu) { }; ElapsedTime elapsed(m_parse_time); llvm::Error error = prologue.parse(data, &line_table_offset, report, ctx); - if (error) { + if (error) report(std::move(error)); - } else { - list = ParseSupportFilesFromPrologue(GetObjectFile()->GetModule(), - prologue, tu.GetPathStyle()); - } + else + ParseSupportFilesFromPrologue(*list, GetObjectFile()->GetModule(), + prologue, tu.GetPathStyle()); } - return list; + return list.get(); } bool SymbolFileDWARF::ParseIsOptimized(CompileUnit &comp_unit) { @@ -3095,7 +3138,7 @@ SymbolFileDWARF::FindDefinitionTypeForDWARFDeclContext(const DWARFDIE &die) { } const DWARFDeclContext die_dwarf_decl_ctx = GetDWARFDeclContext(die); - m_index->GetTypes(die_dwarf_decl_ctx, [&](DWARFDIE type_die) { + m_index->GetFullyQualifiedType(die_dwarf_decl_ctx, [&](DWARFDIE type_die) { // Make sure type_die's language matches the type system we are // looking for. We don't want to find a "Foo" type from Java if we // are looking for a "Foo" type for C, C++, ObjC, or ObjC++. @@ -3122,9 +3165,8 @@ SymbolFileDWARF::FindDefinitionTypeForDWARFDeclContext(const DWARFDIE &die) { return true; } - DWARFDeclContext type_dwarf_decl_ctx = GetDWARFDeclContext(type_die); - if (log) { + DWARFDeclContext type_dwarf_decl_ctx = GetDWARFDeclContext(type_die); GetObjectFile()->GetModule()->LogMessage( log, "SymbolFileDWARF::" @@ -3134,10 +3176,6 @@ SymbolFileDWARF::FindDefinitionTypeForDWARFDeclContext(const DWARFDIE &die) { type_dwarf_decl_ctx.GetQualifiedName()); } - // Make sure the decl contexts match all the way up - if (die_dwarf_decl_ctx != type_dwarf_decl_ctx) - return true; - Type *resolved_type = ResolveType(type_die, false); if (!resolved_type || resolved_type == DIE_IS_BEING_PARSED) return true; diff --git a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.h b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.h index 78819edd0062..26a9502f90aa 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.h @@ -123,7 +123,7 @@ public: llvm::function_ref<bool(Module &)>) override; bool ParseSupportFiles(CompileUnit &comp_unit, - FileSpecList &support_files) override; + SupportFileList &support_files) override; bool ParseIsOptimized(CompileUnit &comp_unit) override; @@ -396,7 +396,7 @@ protected: bool *type_is_new); bool ParseSupportFiles(DWARFUnit &dwarf_cu, const lldb::ModuleSP &module, - FileSpecList &support_files); + SupportFileList &support_files); lldb::VariableSP ParseVariableDIE(const SymbolContext &sc, const DWARFDIE &die, @@ -489,7 +489,7 @@ protected: void FindDwpSymbolFile(); - const FileSpecList &GetTypeUnitSupportFiles(DWARFTypeUnit &tu); + const SupportFileList *GetTypeUnitSupportFiles(DWARFTypeUnit &tu); void InitializeFirstCodeAddressRecursive(const SectionList §ion_list); @@ -529,7 +529,8 @@ protected: DIEToVariableSP m_die_to_variable_sp; DIEToCompilerType m_forward_decl_die_to_compiler_type; CompilerTypeToDIE m_forward_decl_compiler_type_to_die; - llvm::DenseMap<dw_offset_t, FileSpecList> m_type_unit_support_files; + llvm::DenseMap<dw_offset_t, std::unique_ptr<SupportFileList>> + m_type_unit_support_files; std::vector<uint32_t> m_lldb_cu_to_dwarf_unit; /// DWARF does not provide a good way for traditional (concatenating) linkers /// to invalidate debug info describing dead-stripped code. These linkers will diff --git a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDebugMap.cpp b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDebugMap.cpp index e5b59460cb85..9094a5e21e69 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDebugMap.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDebugMap.cpp @@ -725,8 +725,8 @@ bool SymbolFileDWARFDebugMap::ForEachExternalModule( return false; } -bool SymbolFileDWARFDebugMap::ParseSupportFiles(CompileUnit &comp_unit, - FileSpecList &support_files) { +bool SymbolFileDWARFDebugMap::ParseSupportFiles( + CompileUnit &comp_unit, SupportFileList &support_files) { std::lock_guard<std::recursive_mutex> guard(GetModuleMutex()); SymbolFileDWARF *oso_dwarf = GetSymbolFile(comp_unit); if (oso_dwarf) diff --git a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDebugMap.h b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDebugMap.h index cd0a4bb6e41c..d639ee500080 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDebugMap.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDebugMap.h @@ -74,7 +74,7 @@ public: llvm::function_ref<bool(Module &)>) override; bool ParseSupportFiles(CompileUnit &comp_unit, - FileSpecList &support_files) override; + SupportFileList &support_files) override; bool ParseIsOptimized(CompileUnit &comp_unit) override; diff --git a/lldb/source/Plugins/SymbolFile/JSON/SymbolFileJSON.h b/lldb/source/Plugins/SymbolFile/JSON/SymbolFileJSON.h index 4dd0d65da465..3dd33b3dc82f 100644 --- a/lldb/source/Plugins/SymbolFile/JSON/SymbolFileJSON.h +++ b/lldb/source/Plugins/SymbolFile/JSON/SymbolFileJSON.h @@ -59,7 +59,7 @@ public: bool ParseDebugMacros(CompileUnit &comp_unit) override { return false; } bool ParseSupportFiles(CompileUnit &comp_unit, - FileSpecList &support_files) override { + SupportFileList &support_files) override { return false; } diff --git a/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.cpp b/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.cpp index ad0801339936..9234768323e7 100644 --- a/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.cpp +++ b/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.cpp @@ -1369,7 +1369,7 @@ SymbolFileNativePDB::GetFileIndex(const CompilandIndexItem &cii, } bool SymbolFileNativePDB::ParseSupportFiles(CompileUnit &comp_unit, - FileSpecList &support_files) { + SupportFileList &support_files) { std::lock_guard<std::recursive_mutex> guard(GetModuleMutex()); PdbSymUid cu_id(comp_unit.GetID()); lldbassert(cu_id.kind() == PdbSymUidKind::Compiland); @@ -1416,7 +1416,7 @@ void SymbolFileNativePDB::ParseInlineSite(PdbCompilandSymId id, return; InlineeSourceLine inlinee_line = iter->second; - const FileSpecList &files = comp_unit->GetSupportFiles(); + const SupportFileList &files = comp_unit->GetSupportFiles(); FileSpec decl_file; llvm::Expected<uint32_t> file_index_or_err = GetFileIndex(*cii, inlinee_line.Header->FileID); @@ -2265,7 +2265,8 @@ void SymbolFileNativePDB::BuildParentMap() { } for (TypeIndex fwd : fwd_keys) { TypeIndex full = forward_to_full[fwd]; - m_parent_types[full] = m_parent_types[fwd]; + TypeIndex parent_idx = m_parent_types[fwd]; + m_parent_types[full] = parent_idx; } for (TypeIndex full : full_keys) { TypeIndex fwd = full_to_forward[full]; diff --git a/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.h b/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.h index 9d0458cf7ebf..82577771f355 100644 --- a/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.h +++ b/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.h @@ -94,7 +94,7 @@ public: bool ParseDebugMacros(lldb_private::CompileUnit &comp_unit) override; bool ParseSupportFiles(lldb_private::CompileUnit &comp_unit, - FileSpecList &support_files) override; + SupportFileList &support_files) override; size_t ParseTypes(lldb_private::CompileUnit &comp_unit) override; bool ParseImportedModules( diff --git a/lldb/source/Plugins/SymbolFile/PDB/SymbolFilePDB.cpp b/lldb/source/Plugins/SymbolFile/PDB/SymbolFilePDB.cpp index 9e1cd8360660..b26beecc6d12 100644 --- a/lldb/source/Plugins/SymbolFile/PDB/SymbolFilePDB.cpp +++ b/lldb/source/Plugins/SymbolFile/PDB/SymbolFilePDB.cpp @@ -365,7 +365,7 @@ bool SymbolFilePDB::ParseDebugMacros(CompileUnit &comp_unit) { } bool SymbolFilePDB::ParseSupportFiles( - CompileUnit &comp_unit, lldb_private::FileSpecList &support_files) { + CompileUnit &comp_unit, lldb_private::SupportFileList &support_files) { // In theory this is unnecessary work for us, because all of this information // is easily (and quickly) accessible from DebugInfoPDB, so caching it a diff --git a/lldb/source/Plugins/SymbolFile/PDB/SymbolFilePDB.h b/lldb/source/Plugins/SymbolFile/PDB/SymbolFilePDB.h index 01851f1418f3..ea495c575f1f 100644 --- a/lldb/source/Plugins/SymbolFile/PDB/SymbolFilePDB.h +++ b/lldb/source/Plugins/SymbolFile/PDB/SymbolFilePDB.h @@ -70,7 +70,7 @@ public: bool ParseDebugMacros(lldb_private::CompileUnit &comp_unit) override; bool ParseSupportFiles(lldb_private::CompileUnit &comp_unit, - lldb_private::FileSpecList &support_files) override; + lldb_private::SupportFileList &support_files) override; size_t ParseTypes(lldb_private::CompileUnit &comp_unit) override; diff --git a/lldb/source/Plugins/SymbolFile/Symtab/SymbolFileSymtab.cpp b/lldb/source/Plugins/SymbolFile/Symtab/SymbolFileSymtab.cpp index 6e4c6439974e..8c17017442b1 100644 --- a/lldb/source/Plugins/SymbolFile/Symtab/SymbolFileSymtab.cpp +++ b/lldb/source/Plugins/SymbolFile/Symtab/SymbolFileSymtab.cpp @@ -211,7 +211,7 @@ bool SymbolFileSymtab::ParseDebugMacros(CompileUnit &comp_unit) { } bool SymbolFileSymtab::ParseSupportFiles(CompileUnit &comp_unit, - FileSpecList &support_files) { + SupportFileList &support_files) { return false; } diff --git a/lldb/source/Plugins/SymbolFile/Symtab/SymbolFileSymtab.h b/lldb/source/Plugins/SymbolFile/Symtab/SymbolFileSymtab.h index 1bbc4de9c942..a36311525334 100644 --- a/lldb/source/Plugins/SymbolFile/Symtab/SymbolFileSymtab.h +++ b/lldb/source/Plugins/SymbolFile/Symtab/SymbolFileSymtab.h @@ -57,7 +57,7 @@ public: bool ParseDebugMacros(lldb_private::CompileUnit &comp_unit) override; bool ParseSupportFiles(lldb_private::CompileUnit &comp_unit, - lldb_private::FileSpecList &support_files) override; + lldb_private::SupportFileList &support_files) override; size_t ParseTypes(lldb_private::CompileUnit &comp_unit) override; diff --git a/lldb/source/Symbol/CompileUnit.cpp b/lldb/source/Symbol/CompileUnit.cpp index c9796973940a..a6b6c8e57eec 100644 --- a/lldb/source/Symbol/CompileUnit.cpp +++ b/lldb/source/Symbol/CompileUnit.cpp @@ -28,10 +28,11 @@ CompileUnit::CompileUnit(const lldb::ModuleSP &module_sp, void *user_data, CompileUnit::CompileUnit(const lldb::ModuleSP &module_sp, void *user_data, const FileSpec &fspec, const lldb::user_id_t cu_sym_id, lldb::LanguageType language, - lldb_private::LazyBool is_optimized) + lldb_private::LazyBool is_optimized, + SupportFileList &&support_files) : ModuleChild(module_sp), UserID(cu_sym_id), m_user_data(user_data), m_language(language), m_flags(0), m_file_spec(fspec), - m_is_optimized(is_optimized) { + m_support_files(std::move(support_files)), m_is_optimized(is_optimized) { if (language != eLanguageTypeUnknown) m_flags.Set(flagsParsedLanguage); assert(module_sp); @@ -178,10 +179,6 @@ void CompileUnit::SetLineTable(LineTable *line_table) { m_line_table_up.reset(line_table); } -void CompileUnit::SetSupportFiles(FileSpecList support_files) { - m_support_files = std::move(support_files); -} - DebugMacros *CompileUnit::GetDebugMacros() { if (m_debug_macros_sp.get() == nullptr) { if (m_flags.IsClear(flagsParsedDebugMacros)) { @@ -213,7 +210,7 @@ VariableListSP CompileUnit::GetVariableList(bool can_create) { return m_variables; } -std::vector<uint32_t> FindFileIndexes(const FileSpecList &files, +std::vector<uint32_t> FindFileIndexes(const SupportFileList &files, const FileSpec &file) { std::vector<uint32_t> result; uint32_t idx = -1; @@ -411,7 +408,7 @@ bool CompileUnit::ForEachExternalModule( return false; } -const FileSpecList &CompileUnit::GetSupportFiles() { +const SupportFileList &CompileUnit::GetSupportFiles() { if (m_support_files.GetSize() == 0) { if (m_flags.IsClear(flagsParsedSupportFiles)) { m_flags.Set(flagsParsedSupportFiles); diff --git a/lldb/source/Symbol/SymbolFileOnDemand.cpp b/lldb/source/Symbol/SymbolFileOnDemand.cpp index 33995252bfe2..bdb1951d5125 100644 --- a/lldb/source/Symbol/SymbolFileOnDemand.cpp +++ b/lldb/source/Symbol/SymbolFileOnDemand.cpp @@ -115,7 +115,7 @@ bool SymbolFileOnDemand::ForEachExternalModule( } bool SymbolFileOnDemand::ParseSupportFiles(CompileUnit &comp_unit, - FileSpecList &support_files) { + SupportFileList &support_files) { LLDB_LOG(GetLog(), "[{0}] {1} is not skipped: explicitly allowed to support breakpoint", GetSymbolFileName(), __FUNCTION__); diff --git a/lldb/source/Target/DynamicRegisterInfo.cpp b/lldb/source/Target/DynamicRegisterInfo.cpp index 7469c1d4259a..1a817449fa95 100644 --- a/lldb/source/Target/DynamicRegisterInfo.cpp +++ b/lldb/source/Target/DynamicRegisterInfo.cpp @@ -349,10 +349,8 @@ DynamicRegisterInfo::SetRegisterInfo(const StructuredData::Dictionary &dict, const size_t num_regs = invalidate_reg_list->GetSize(); if (num_regs > 0) { for (uint32_t idx = 0; idx < num_regs; ++idx) { - uint64_t invalidate_reg_num; - std::optional<llvm::StringRef> maybe_invalidate_reg_name = - invalidate_reg_list->GetItemAtIndexAsString(idx); - if (maybe_invalidate_reg_name) { + if (auto maybe_invalidate_reg_name = + invalidate_reg_list->GetItemAtIndexAsString(idx)) { const RegisterInfo *invalidate_reg_info = GetRegisterInfo(*maybe_invalidate_reg_name); if (invalidate_reg_info) { @@ -365,10 +363,11 @@ DynamicRegisterInfo::SetRegisterInfo(const StructuredData::Dictionary &dict, "\"%s\" while parsing register \"%s\"\n", maybe_invalidate_reg_name->str().c_str(), reg_info.name); } - } else if (invalidate_reg_list->GetItemAtIndexAsInteger( - idx, invalidate_reg_num)) { - if (invalidate_reg_num != UINT64_MAX) - m_invalidate_regs_map[i].push_back(invalidate_reg_num); + } else if (auto maybe_invalidate_reg_num = + invalidate_reg_list->GetItemAtIndexAsInteger<uint64_t>( + idx)) { + if (*maybe_invalidate_reg_num != UINT64_MAX) + m_invalidate_regs_map[i].push_back(*maybe_invalidate_reg_num); else printf("error: 'invalidate-regs' list value wasn't a valid " "integer\n"); diff --git a/lldb/source/Utility/FileSpecList.cpp b/lldb/source/Utility/FileSpecList.cpp index e3d8ea650c75..8d2cf81efe5b 100644 --- a/lldb/source/Utility/FileSpecList.cpp +++ b/lldb/source/Utility/FileSpecList.cpp @@ -37,6 +37,19 @@ bool FileSpecList::AppendIfUnique(const FileSpec &file_spec) { return false; } +// FIXME: Replace this with a DenseSet at the call site. It is inefficient. +bool SupportFileList::AppendIfUnique(const FileSpec &file_spec) { + collection::iterator end = m_files.end(); + if (find_if(m_files.begin(), end, + [&](const std::unique_ptr<SupportFile> &support_file) { + return support_file->GetSpecOnly() == file_spec; + }) == end) { + Append(file_spec); + return true; + } + return false; +} + // Clears the file list. void FileSpecList::Clear() { m_files.clear(); } @@ -55,22 +68,22 @@ void FileSpecList::Dump(Stream *s, const char *separator_cstr) const { // // Returns the valid index of the file that matches "file_spec" if it is found, // else std::numeric_limits<uint32_t>::max() is returned. -size_t FileSpecList::FindFileIndex(size_t start_idx, const FileSpec &file_spec, - bool full) const { - const size_t num_files = m_files.size(); - +static size_t FindFileIndex(size_t start_idx, const FileSpec &file_spec, + bool full, size_t num_files, + std::function<const FileSpec &(size_t)> get_ith) { // When looking for files, we will compare only the filename if the FILE_SPEC // argument is empty bool compare_filename_only = file_spec.GetDirectory().IsEmpty(); for (size_t idx = start_idx; idx < num_files; ++idx) { + const FileSpec &ith = get_ith(idx); if (compare_filename_only) { - if (ConstString::Equals( - m_files[idx].GetFilename(), file_spec.GetFilename(), - file_spec.IsCaseSensitive() || m_files[idx].IsCaseSensitive())) + if (ConstString::Equals(ith.GetFilename(), file_spec.GetFilename(), + file_spec.IsCaseSensitive() || + ith.IsCaseSensitive())) return idx; } else { - if (FileSpec::Equal(m_files[idx], file_spec, full)) + if (FileSpec::Equal(ith, file_spec, full)) return idx; } } @@ -79,8 +92,24 @@ size_t FileSpecList::FindFileIndex(size_t start_idx, const FileSpec &file_spec, return UINT32_MAX; } -size_t FileSpecList::FindCompatibleIndex(size_t start_idx, - const FileSpec &file_spec) const { +size_t FileSpecList::FindFileIndex(size_t start_idx, const FileSpec &file_spec, + bool full) const { + return ::FindFileIndex( + start_idx, file_spec, full, m_files.size(), + [&](size_t idx) -> const FileSpec & { return m_files[idx]; }); +} + +size_t SupportFileList::FindFileIndex(size_t start_idx, + const FileSpec &file_spec, + bool full) const { + return ::FindFileIndex(start_idx, file_spec, full, m_files.size(), + [&](size_t idx) -> const FileSpec & { + return m_files[idx]->GetSpecOnly(); + }); +} + +size_t SupportFileList::FindCompatibleIndex(size_t start_idx, + const FileSpec &file_spec) const { const size_t num_files = m_files.size(); if (start_idx >= num_files) return UINT32_MAX; @@ -92,7 +121,7 @@ size_t FileSpecList::FindCompatibleIndex(size_t start_idx, const bool full = !file_spec.GetDirectory().IsEmpty(); for (size_t idx = start_idx; idx < num_files; ++idx) { - const FileSpec &curr_file = m_files[idx]; + const FileSpec &curr_file = m_files[idx]->GetSpecOnly(); // Always start by matching the filename first if (!curr_file.FileEquals(file_spec)) @@ -140,6 +169,13 @@ const FileSpec &FileSpecList::GetFileSpecAtIndex(size_t idx) const { return g_empty_file_spec; } +const FileSpec &SupportFileList::GetFileSpecAtIndex(size_t idx) const { + if (idx < m_files.size()) + return m_files[idx]->Materialize(); + static FileSpec g_empty_file_spec; + return g_empty_file_spec; +} + // Return the size in bytes that this object takes in memory. This returns the // size in bytes of this object's member variables and any FileSpec objects its // member variables contain, the result doesn't not include the string values diff --git a/llvm/include/llvm/Analysis/AliasSetTracker.h b/llvm/include/llvm/Analysis/AliasSetTracker.h index 4a952ccae7a0..8afe455e2f08 100644 --- a/llvm/include/llvm/Analysis/AliasSetTracker.h +++ b/llvm/include/llvm/Analysis/AliasSetTracker.h @@ -411,6 +411,7 @@ class AliasSetsPrinterPass : public PassInfoMixin<AliasSetsPrinterPass> { public: explicit AliasSetsPrinterPass(raw_ostream &OS); PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); + static bool isRequired() { return true; } }; } // end namespace llvm diff --git a/llvm/include/llvm/Analysis/AssumptionCache.h b/llvm/include/llvm/Analysis/AssumptionCache.h index 12dd9b04c932..96ae32da6743 100644 --- a/llvm/include/llvm/Analysis/AssumptionCache.h +++ b/llvm/include/llvm/Analysis/AssumptionCache.h @@ -189,6 +189,8 @@ public: explicit AssumptionPrinterPass(raw_ostream &OS) : OS(OS) {} PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); + + static bool isRequired() { return true; } }; /// An immutable pass that tracks lazily created \c AssumptionCache diff --git a/llvm/include/llvm/Analysis/BlockFrequencyInfo.h b/llvm/include/llvm/Analysis/BlockFrequencyInfo.h index 95d75b0e1854..179fd06addec 100644 --- a/llvm/include/llvm/Analysis/BlockFrequencyInfo.h +++ b/llvm/include/llvm/Analysis/BlockFrequencyInfo.h @@ -134,6 +134,8 @@ public: explicit BlockFrequencyPrinterPass(raw_ostream &OS) : OS(OS) {} PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); + + static bool isRequired() { return true; } }; /// Legacy analysis pass which computes \c BlockFrequencyInfo. diff --git a/llvm/include/llvm/Analysis/BranchProbabilityInfo.h b/llvm/include/llvm/Analysis/BranchProbabilityInfo.h index fb02997371bf..6b9d17818201 100644 --- a/llvm/include/llvm/Analysis/BranchProbabilityInfo.h +++ b/llvm/include/llvm/Analysis/BranchProbabilityInfo.h @@ -436,6 +436,8 @@ public: explicit BranchProbabilityPrinterPass(raw_ostream &OS) : OS(OS) {} PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); + + static bool isRequired() { return true; } }; /// Legacy analysis pass which computes \c BranchProbabilityInfo. diff --git a/llvm/include/llvm/Analysis/CFGSCCPrinter.h b/llvm/include/llvm/Analysis/CFGSCCPrinter.h index d98071461f57..0ea0b46c4626 100644 --- a/llvm/include/llvm/Analysis/CFGSCCPrinter.h +++ b/llvm/include/llvm/Analysis/CFGSCCPrinter.h @@ -19,6 +19,7 @@ class CFGSCCPrinterPass : public PassInfoMixin<CFGSCCPrinterPass> { public: explicit CFGSCCPrinterPass(raw_ostream &OS) : OS(OS) {} PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); + static bool isRequired() { return true; } }; } // namespace llvm diff --git a/llvm/include/llvm/Analysis/CallGraph.h b/llvm/include/llvm/Analysis/CallGraph.h index 9413b39978e3..887743774175 100644 --- a/llvm/include/llvm/Analysis/CallGraph.h +++ b/llvm/include/llvm/Analysis/CallGraph.h @@ -322,6 +322,8 @@ public: explicit CallGraphPrinterPass(raw_ostream &OS) : OS(OS) {} PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); + + static bool isRequired() { return true; } }; /// Printer pass for the summarized \c CallGraphAnalysis results. @@ -333,6 +335,8 @@ public: explicit CallGraphSCCsPrinterPass(raw_ostream &OS) : OS(OS) {} PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); + + static bool isRequired() { return true; } }; /// The \c ModulePass which wraps up a \c CallGraph and the logic to diff --git a/llvm/include/llvm/Analysis/CallPrinter.h b/llvm/include/llvm/Analysis/CallPrinter.h index d325d0010371..95cb5cc3ca86 100644 --- a/llvm/include/llvm/Analysis/CallPrinter.h +++ b/llvm/include/llvm/Analysis/CallPrinter.h @@ -24,12 +24,14 @@ class ModulePass; class CallGraphDOTPrinterPass : public PassInfoMixin<CallGraphDOTPrinterPass> { public: PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); + static bool isRequired() { return true; } }; /// Pass for viewing the call graph class CallGraphViewerPass : public PassInfoMixin<CallGraphViewerPass> { public: PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); + static bool isRequired() { return true; } }; ModulePass *createCallGraphViewerPass(); diff --git a/llvm/include/llvm/Analysis/CostModel.h b/llvm/include/llvm/Analysis/CostModel.h index 649168050cec..9b127c27ba7e 100644 --- a/llvm/include/llvm/Analysis/CostModel.h +++ b/llvm/include/llvm/Analysis/CostModel.h @@ -20,6 +20,8 @@ public: explicit CostModelPrinterPass(raw_ostream &OS) : OS(OS) {} PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); + + static bool isRequired() { return true; } }; } // end namespace llvm diff --git a/llvm/include/llvm/Analysis/CycleAnalysis.h b/llvm/include/llvm/Analysis/CycleAnalysis.h index 099d7611dedc..ce939eff8ff8 100644 --- a/llvm/include/llvm/Analysis/CycleAnalysis.h +++ b/llvm/include/llvm/Analysis/CycleAnalysis.h @@ -68,6 +68,8 @@ public: explicit CycleInfoPrinterPass(raw_ostream &OS); PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); + + static bool isRequired() { return true; } }; } // end namespace llvm diff --git a/llvm/include/llvm/Analysis/DDG.h b/llvm/include/llvm/Analysis/DDG.h index bc599cb1f9a1..bd559f3fb69b 100644 --- a/llvm/include/llvm/Analysis/DDG.h +++ b/llvm/include/llvm/Analysis/DDG.h @@ -427,6 +427,7 @@ public: explicit DDGAnalysisPrinterPass(raw_ostream &OS) : OS(OS) {} PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U); + static bool isRequired() { return true; } private: raw_ostream &OS; diff --git a/llvm/include/llvm/Analysis/DDGPrinter.h b/llvm/include/llvm/Analysis/DDGPrinter.h index d93c28280bac..4aa154d173ba 100644 --- a/llvm/include/llvm/Analysis/DDGPrinter.h +++ b/llvm/include/llvm/Analysis/DDGPrinter.h @@ -29,6 +29,7 @@ class DDGDotPrinterPass : public PassInfoMixin<DDGDotPrinterPass> { public: PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U); + static bool isRequired() { return true; } }; //===--------------------------------------------------------------------===// diff --git a/llvm/include/llvm/Analysis/Delinearization.h b/llvm/include/llvm/Analysis/Delinearization.h index 95a36b8b79a4..a00adb289604 100644 --- a/llvm/include/llvm/Analysis/Delinearization.h +++ b/llvm/include/llvm/Analysis/Delinearization.h @@ -140,6 +140,7 @@ struct DelinearizationPrinterPass : public PassInfoMixin<DelinearizationPrinterPass> { explicit DelinearizationPrinterPass(raw_ostream &OS); PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); + static bool isRequired() { return true; } private: raw_ostream &OS; diff --git a/llvm/include/llvm/Analysis/DemandedBits.h b/llvm/include/llvm/Analysis/DemandedBits.h index 6e4bfcf899c9..aac7382528f0 100644 --- a/llvm/include/llvm/Analysis/DemandedBits.h +++ b/llvm/include/llvm/Analysis/DemandedBits.h @@ -120,6 +120,8 @@ public: explicit DemandedBitsPrinterPass(raw_ostream &OS) : OS(OS) {} PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); + + static bool isRequired() { return true; } }; } // end namespace llvm diff --git a/llvm/include/llvm/Analysis/DependenceAnalysis.h b/llvm/include/llvm/Analysis/DependenceAnalysis.h index 327315f831e1..f0a09644e0f4 100644 --- a/llvm/include/llvm/Analysis/DependenceAnalysis.h +++ b/llvm/include/llvm/Analysis/DependenceAnalysis.h @@ -994,6 +994,8 @@ namespace llvm { PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM); + static bool isRequired() { return true; } + private: raw_ostream &OS; bool NormalizeResults; diff --git a/llvm/include/llvm/Analysis/DominanceFrontier.h b/llvm/include/llvm/Analysis/DominanceFrontier.h index db0130e4804b..b65cdc9cdb3c 100644 --- a/llvm/include/llvm/Analysis/DominanceFrontier.h +++ b/llvm/include/llvm/Analysis/DominanceFrontier.h @@ -204,6 +204,8 @@ public: explicit DominanceFrontierPrinterPass(raw_ostream &OS); PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); + + static bool isRequired() { return true; } }; } // end namespace llvm diff --git a/llvm/include/llvm/Analysis/FunctionPropertiesAnalysis.h b/llvm/include/llvm/Analysis/FunctionPropertiesAnalysis.h index 3e9eb9374563..f5fbbdcb7143 100644 --- a/llvm/include/llvm/Analysis/FunctionPropertiesAnalysis.h +++ b/llvm/include/llvm/Analysis/FunctionPropertiesAnalysis.h @@ -157,6 +157,8 @@ public: explicit FunctionPropertiesPrinterPass(raw_ostream &OS) : OS(OS) {} PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); + + static bool isRequired() { return true; } }; /// Correctly update FunctionPropertiesInfo post-inlining. A diff --git a/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h b/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h index ad137baff5d4..0d19de6edc2a 100644 --- a/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h +++ b/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h @@ -1198,6 +1198,7 @@ class IRSimilarityAnalysisPrinterPass public: explicit IRSimilarityAnalysisPrinterPass(raw_ostream &OS) : OS(OS) {} PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); + static bool isRequired() { return true; } }; } // end namespace llvm diff --git a/llvm/include/llvm/Analysis/InlineAdvisor.h b/llvm/include/llvm/Analysis/InlineAdvisor.h index 2740106bc7db..5f36ee6f68ab 100644 --- a/llvm/include/llvm/Analysis/InlineAdvisor.h +++ b/llvm/include/llvm/Analysis/InlineAdvisor.h @@ -341,7 +341,7 @@ public: Result run(Module &M, ModuleAnalysisManager &MAM) { return Result(M, MAM); } }; -/// Printer pass for the FunctionPropertiesAnalysis results. +/// Printer pass for the InlineAdvisorAnalysis results. class InlineAdvisorAnalysisPrinterPass : public PassInfoMixin<InlineAdvisorAnalysisPrinterPass> { raw_ostream &OS; @@ -353,6 +353,7 @@ public: PreservedAnalyses run(LazyCallGraph::SCC &InitialC, CGSCCAnalysisManager &AM, LazyCallGraph &CG, CGSCCUpdateResult &UR); + static bool isRequired() { return true; } }; std::unique_ptr<InlineAdvisor> diff --git a/llvm/include/llvm/Analysis/InlineCost.h b/llvm/include/llvm/Analysis/InlineCost.h index 3f0bb879e021..3a760e0a85ce 100644 --- a/llvm/include/llvm/Analysis/InlineCost.h +++ b/llvm/include/llvm/Analysis/InlineCost.h @@ -343,6 +343,7 @@ struct InlineCostAnnotationPrinterPass public: explicit InlineCostAnnotationPrinterPass(raw_ostream &OS) : OS(OS) {} PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM); + static bool isRequired() { return true; } }; } // namespace llvm diff --git a/llvm/include/llvm/Analysis/InlineSizeEstimatorAnalysis.h b/llvm/include/llvm/Analysis/InlineSizeEstimatorAnalysis.h index 0aae696a98a9..b44edd370dd1 100644 --- a/llvm/include/llvm/Analysis/InlineSizeEstimatorAnalysis.h +++ b/llvm/include/llvm/Analysis/InlineSizeEstimatorAnalysis.h @@ -40,6 +40,8 @@ public: explicit InlineSizeEstimatorAnalysisPrinterPass(raw_ostream &OS) : OS(OS) {} PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); + + static bool isRequired() { return true; } }; } // namespace llvm #endif // LLVM_ANALYSIS_INLINESIZEESTIMATORANALYSIS_H diff --git a/llvm/include/llvm/Analysis/LazyCallGraph.h b/llvm/include/llvm/Analysis/LazyCallGraph.h index 211a058aa017..68c98b416ce9 100644 --- a/llvm/include/llvm/Analysis/LazyCallGraph.h +++ b/llvm/include/llvm/Analysis/LazyCallGraph.h @@ -1288,6 +1288,8 @@ public: explicit LazyCallGraphPrinterPass(raw_ostream &OS); PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); + + static bool isRequired() { return true; } }; /// A pass which prints the call graph as a DOT file to a \c raw_ostream. @@ -1301,6 +1303,8 @@ public: explicit LazyCallGraphDOTPrinterPass(raw_ostream &OS); PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); + + static bool isRequired() { return true; } }; } // end namespace llvm diff --git a/llvm/include/llvm/Analysis/LazyValueInfo.h b/llvm/include/llvm/Analysis/LazyValueInfo.h index 25a2c9ffa534..5611a2b98020 100644 --- a/llvm/include/llvm/Analysis/LazyValueInfo.h +++ b/llvm/include/llvm/Analysis/LazyValueInfo.h @@ -157,6 +157,8 @@ public: explicit LazyValueInfoPrinterPass(raw_ostream &OS) : OS(OS) {} PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); + + static bool isRequired() { return true; } }; /// Wrapper around LazyValueInfo. diff --git a/llvm/include/llvm/Analysis/LoopCacheAnalysis.h b/llvm/include/llvm/Analysis/LoopCacheAnalysis.h index c9e853b9be8e..4fd2485e39d6 100644 --- a/llvm/include/llvm/Analysis/LoopCacheAnalysis.h +++ b/llvm/include/llvm/Analysis/LoopCacheAnalysis.h @@ -291,6 +291,8 @@ public: PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U); + + static bool isRequired() { return true; } }; } // namespace llvm diff --git a/llvm/include/llvm/Analysis/LoopInfo.h b/llvm/include/llvm/Analysis/LoopInfo.h index 3b106381fbca..52084630560c 100644 --- a/llvm/include/llvm/Analysis/LoopInfo.h +++ b/llvm/include/llvm/Analysis/LoopInfo.h @@ -580,11 +580,13 @@ class LoopPrinterPass : public PassInfoMixin<LoopPrinterPass> { public: explicit LoopPrinterPass(raw_ostream &OS) : OS(OS) {} PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); + static bool isRequired() { return true; } }; /// Verifier pass for the \c LoopAnalysis results. struct LoopVerifierPass : public PassInfoMixin<LoopVerifierPass> { PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); + static bool isRequired() { return true; } }; /// The legacy pass manager's analysis pass to compute loop information. diff --git a/llvm/include/llvm/Analysis/LoopNestAnalysis.h b/llvm/include/llvm/Analysis/LoopNestAnalysis.h index 852a6c438d43..3b33dd505dde 100644 --- a/llvm/include/llvm/Analysis/LoopNestAnalysis.h +++ b/llvm/include/llvm/Analysis/LoopNestAnalysis.h @@ -217,6 +217,8 @@ public: PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U); + + static bool isRequired() { return true; } }; } // namespace llvm diff --git a/llvm/include/llvm/Analysis/MemDerefPrinter.h b/llvm/include/llvm/Analysis/MemDerefPrinter.h index bafdc543eeaf..ba376dadb2a7 100644 --- a/llvm/include/llvm/Analysis/MemDerefPrinter.h +++ b/llvm/include/llvm/Analysis/MemDerefPrinter.h @@ -18,6 +18,7 @@ class MemDerefPrinterPass : public PassInfoMixin<MemDerefPrinterPass> { public: MemDerefPrinterPass(raw_ostream &OS) : OS(OS) {} PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); + static bool isRequired() { return true; } }; } // namespace llvm diff --git a/llvm/include/llvm/Analysis/MemoryBuiltins.h b/llvm/include/llvm/Analysis/MemoryBuiltins.h index 827b5081b2ce..37ce1518f00c 100644 --- a/llvm/include/llvm/Analysis/MemoryBuiltins.h +++ b/llvm/include/llvm/Analysis/MemoryBuiltins.h @@ -187,80 +187,119 @@ Value *lowerObjectSizeCall( const TargetLibraryInfo *TLI, AAResults *AA, bool MustSucceed, SmallVectorImpl<Instruction *> *InsertedInstructions = nullptr); -using SizeOffsetType = std::pair<APInt, APInt>; +/// SizeOffsetType - A base template class for the object size visitors. Used +/// here as a self-documenting way to handle the values rather than using a +/// \p std::pair. +template <typename T, class C> struct SizeOffsetType { +public: + T Size; + T Offset; + + SizeOffsetType() = default; + SizeOffsetType(T Size, T Offset) : Size(Size), Offset(Offset) {} + + bool knownSize() const { return C::known(Size); } + bool knownOffset() const { return C::known(Offset); } + bool anyKnown() const { return knownSize() || knownOffset(); } + bool bothKnown() const { return knownSize() && knownOffset(); } + + bool operator==(const SizeOffsetType<T, C> &RHS) const { + return Size == RHS.Size && Offset == RHS.Offset; + } + bool operator!=(const SizeOffsetType<T, C> &RHS) const { + return !(*this == RHS); + } +}; + +/// SizeOffsetAPInt - Used by \p ObjectSizeOffsetVisitor, which works with +/// \p APInts. +struct SizeOffsetAPInt : public SizeOffsetType<APInt, SizeOffsetAPInt> { + SizeOffsetAPInt() = default; + SizeOffsetAPInt(APInt Size, APInt Offset) : SizeOffsetType(Size, Offset) {} + + static bool known(APInt V) { return V.getBitWidth() > 1; } +}; /// Evaluate the size and offset of an object pointed to by a Value* /// statically. Fails if size or offset are not known at compile time. class ObjectSizeOffsetVisitor - : public InstVisitor<ObjectSizeOffsetVisitor, SizeOffsetType> { + : public InstVisitor<ObjectSizeOffsetVisitor, SizeOffsetAPInt> { const DataLayout &DL; const TargetLibraryInfo *TLI; ObjectSizeOpts Options; unsigned IntTyBits; APInt Zero; - SmallDenseMap<Instruction *, SizeOffsetType, 8> SeenInsts; + SmallDenseMap<Instruction *, SizeOffsetAPInt, 8> SeenInsts; unsigned InstructionsVisited; APInt align(APInt Size, MaybeAlign Align); - SizeOffsetType unknown() { - return std::make_pair(APInt(), APInt()); - } + static SizeOffsetAPInt unknown() { return SizeOffsetAPInt(); } public: ObjectSizeOffsetVisitor(const DataLayout &DL, const TargetLibraryInfo *TLI, LLVMContext &Context, ObjectSizeOpts Options = {}); - SizeOffsetType compute(Value *V); - - static bool knownSize(const SizeOffsetType &SizeOffset) { - return SizeOffset.first.getBitWidth() > 1; - } - - static bool knownOffset(const SizeOffsetType &SizeOffset) { - return SizeOffset.second.getBitWidth() > 1; - } - - static bool bothKnown(const SizeOffsetType &SizeOffset) { - return knownSize(SizeOffset) && knownOffset(SizeOffset); - } + SizeOffsetAPInt compute(Value *V); // These are "private", except they can't actually be made private. Only // compute() should be used by external users. - SizeOffsetType visitAllocaInst(AllocaInst &I); - SizeOffsetType visitArgument(Argument &A); - SizeOffsetType visitCallBase(CallBase &CB); - SizeOffsetType visitConstantPointerNull(ConstantPointerNull&); - SizeOffsetType visitExtractElementInst(ExtractElementInst &I); - SizeOffsetType visitExtractValueInst(ExtractValueInst &I); - SizeOffsetType visitGlobalAlias(GlobalAlias &GA); - SizeOffsetType visitGlobalVariable(GlobalVariable &GV); - SizeOffsetType visitIntToPtrInst(IntToPtrInst&); - SizeOffsetType visitLoadInst(LoadInst &I); - SizeOffsetType visitPHINode(PHINode&); - SizeOffsetType visitSelectInst(SelectInst &I); - SizeOffsetType visitUndefValue(UndefValue&); - SizeOffsetType visitInstruction(Instruction &I); + SizeOffsetAPInt visitAllocaInst(AllocaInst &I); + SizeOffsetAPInt visitArgument(Argument &A); + SizeOffsetAPInt visitCallBase(CallBase &CB); + SizeOffsetAPInt visitConstantPointerNull(ConstantPointerNull &); + SizeOffsetAPInt visitExtractElementInst(ExtractElementInst &I); + SizeOffsetAPInt visitExtractValueInst(ExtractValueInst &I); + SizeOffsetAPInt visitGlobalAlias(GlobalAlias &GA); + SizeOffsetAPInt visitGlobalVariable(GlobalVariable &GV); + SizeOffsetAPInt visitIntToPtrInst(IntToPtrInst &); + SizeOffsetAPInt visitLoadInst(LoadInst &I); + SizeOffsetAPInt visitPHINode(PHINode &); + SizeOffsetAPInt visitSelectInst(SelectInst &I); + SizeOffsetAPInt visitUndefValue(UndefValue &); + SizeOffsetAPInt visitInstruction(Instruction &I); private: - SizeOffsetType findLoadSizeOffset( + SizeOffsetAPInt findLoadSizeOffset( LoadInst &LoadFrom, BasicBlock &BB, BasicBlock::iterator From, - SmallDenseMap<BasicBlock *, SizeOffsetType, 8> &VisitedBlocks, + SmallDenseMap<BasicBlock *, SizeOffsetAPInt, 8> &VisitedBlocks, unsigned &ScannedInstCount); - SizeOffsetType combineSizeOffset(SizeOffsetType LHS, SizeOffsetType RHS); - SizeOffsetType computeImpl(Value *V); - SizeOffsetType computeValue(Value *V); + SizeOffsetAPInt combineSizeOffset(SizeOffsetAPInt LHS, SizeOffsetAPInt RHS); + SizeOffsetAPInt computeImpl(Value *V); + SizeOffsetAPInt computeValue(Value *V); bool CheckedZextOrTrunc(APInt &I); }; -using SizeOffsetEvalType = std::pair<Value *, Value *>; +/// SizeOffsetValue - Used by \p ObjectSizeOffsetEvaluator, which works with +/// \p Values. +struct SizeOffsetWeakTrackingVH; +struct SizeOffsetValue : public SizeOffsetType<Value *, SizeOffsetValue> { + SizeOffsetValue() : SizeOffsetType(nullptr, nullptr) {} + SizeOffsetValue(Value *Size, Value *Offset) : SizeOffsetType(Size, Offset) {} + SizeOffsetValue(const SizeOffsetWeakTrackingVH &SOT); + + static bool known(Value *V) { return V != nullptr; } +}; + +/// SizeOffsetWeakTrackingVH - Used by \p ObjectSizeOffsetEvaluator in a +/// \p DenseMap. +struct SizeOffsetWeakTrackingVH + : public SizeOffsetType<WeakTrackingVH, SizeOffsetWeakTrackingVH> { + SizeOffsetWeakTrackingVH() : SizeOffsetType(nullptr, nullptr) {} + SizeOffsetWeakTrackingVH(Value *Size, Value *Offset) + : SizeOffsetType(Size, Offset) {} + SizeOffsetWeakTrackingVH(const SizeOffsetValue &SOV) + : SizeOffsetType(SOV.Size, SOV.Offset) {} + + static bool known(WeakTrackingVH V) { return V.pointsToAliveValue(); } +}; /// Evaluate the size and offset of an object pointed to by a Value*. /// May create code to compute the result at run-time. class ObjectSizeOffsetEvaluator - : public InstVisitor<ObjectSizeOffsetEvaluator, SizeOffsetEvalType> { + : public InstVisitor<ObjectSizeOffsetEvaluator, SizeOffsetValue> { using BuilderTy = IRBuilder<TargetFolder, IRBuilderCallbackInserter>; - using WeakEvalType = std::pair<WeakTrackingVH, WeakTrackingVH>; + using WeakEvalType = SizeOffsetWeakTrackingVH; using CacheMapTy = DenseMap<const Value *, WeakEvalType>; using PtrSetTy = SmallPtrSet<const Value *, 8>; @@ -275,45 +314,27 @@ class ObjectSizeOffsetEvaluator ObjectSizeOpts EvalOpts; SmallPtrSet<Instruction *, 8> InsertedInstructions; - SizeOffsetEvalType compute_(Value *V); + SizeOffsetValue compute_(Value *V); public: - static SizeOffsetEvalType unknown() { - return std::make_pair(nullptr, nullptr); - } - ObjectSizeOffsetEvaluator(const DataLayout &DL, const TargetLibraryInfo *TLI, LLVMContext &Context, ObjectSizeOpts EvalOpts = {}); - SizeOffsetEvalType compute(Value *V); + static SizeOffsetValue unknown() { return SizeOffsetValue(); } - bool knownSize(SizeOffsetEvalType SizeOffset) { - return SizeOffset.first; - } - - bool knownOffset(SizeOffsetEvalType SizeOffset) { - return SizeOffset.second; - } - - bool anyKnown(SizeOffsetEvalType SizeOffset) { - return knownSize(SizeOffset) || knownOffset(SizeOffset); - } - - bool bothKnown(SizeOffsetEvalType SizeOffset) { - return knownSize(SizeOffset) && knownOffset(SizeOffset); - } + SizeOffsetValue compute(Value *V); // The individual instruction visitors should be treated as private. - SizeOffsetEvalType visitAllocaInst(AllocaInst &I); - SizeOffsetEvalType visitCallBase(CallBase &CB); - SizeOffsetEvalType visitExtractElementInst(ExtractElementInst &I); - SizeOffsetEvalType visitExtractValueInst(ExtractValueInst &I); - SizeOffsetEvalType visitGEPOperator(GEPOperator &GEP); - SizeOffsetEvalType visitIntToPtrInst(IntToPtrInst&); - SizeOffsetEvalType visitLoadInst(LoadInst &I); - SizeOffsetEvalType visitPHINode(PHINode &PHI); - SizeOffsetEvalType visitSelectInst(SelectInst &I); - SizeOffsetEvalType visitInstruction(Instruction &I); + SizeOffsetValue visitAllocaInst(AllocaInst &I); + SizeOffsetValue visitCallBase(CallBase &CB); + SizeOffsetValue visitExtractElementInst(ExtractElementInst &I); + SizeOffsetValue visitExtractValueInst(ExtractValueInst &I); + SizeOffsetValue visitGEPOperator(GEPOperator &GEP); + SizeOffsetValue visitIntToPtrInst(IntToPtrInst &); + SizeOffsetValue visitLoadInst(LoadInst &I); + SizeOffsetValue visitPHINode(PHINode &PHI); + SizeOffsetValue visitSelectInst(SelectInst &I); + SizeOffsetValue visitInstruction(Instruction &I); }; } // end namespace llvm diff --git a/llvm/include/llvm/Analysis/MemorySSA.h b/llvm/include/llvm/Analysis/MemorySSA.h index 94d7f1a78b84..caf0e31fd37d 100644 --- a/llvm/include/llvm/Analysis/MemorySSA.h +++ b/llvm/include/llvm/Analysis/MemorySSA.h @@ -953,6 +953,8 @@ public: : OS(OS), EnsureOptimizedUses(EnsureOptimizedUses) {} PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); + + static bool isRequired() { return true; } }; /// Printer pass for \c MemorySSA via the walker. @@ -964,11 +966,14 @@ public: explicit MemorySSAWalkerPrinterPass(raw_ostream &OS) : OS(OS) {} PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); + + static bool isRequired() { return true; } }; /// Verifier pass for \c MemorySSA. struct MemorySSAVerifierPass : PassInfoMixin<MemorySSAVerifierPass> { PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); + static bool isRequired() { return true; } }; /// Legacy analysis pass which computes \c MemorySSA. diff --git a/llvm/include/llvm/Analysis/ModuleDebugInfoPrinter.h b/llvm/include/llvm/Analysis/ModuleDebugInfoPrinter.h index fa91e4f653d0..e69db780a206 100644 --- a/llvm/include/llvm/Analysis/ModuleDebugInfoPrinter.h +++ b/llvm/include/llvm/Analysis/ModuleDebugInfoPrinter.h @@ -23,6 +23,7 @@ class ModuleDebugInfoPrinterPass public: explicit ModuleDebugInfoPrinterPass(raw_ostream &OS); PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); + static bool isRequired() { return true; } }; } // end namespace llvm diff --git a/llvm/include/llvm/Analysis/MustExecute.h b/llvm/include/llvm/Analysis/MustExecute.h index 9c97bd1725ac..468d94e7cd68 100644 --- a/llvm/include/llvm/Analysis/MustExecute.h +++ b/llvm/include/llvm/Analysis/MustExecute.h @@ -547,6 +547,7 @@ class MustExecutePrinterPass : public PassInfoMixin<MustExecutePrinterPass> { public: MustExecutePrinterPass(raw_ostream &OS) : OS(OS) {} PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); + static bool isRequired() { return true; } }; class MustBeExecutedContextPrinterPass @@ -556,6 +557,7 @@ class MustBeExecutedContextPrinterPass public: MustBeExecutedContextPrinterPass(raw_ostream &OS) : OS(OS) {} PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); + static bool isRequired() { return true; } }; } // namespace llvm diff --git a/llvm/include/llvm/Analysis/PhiValues.h b/llvm/include/llvm/Analysis/PhiValues.h index ecbb8874b378..a749af30be9e 100644 --- a/llvm/include/llvm/Analysis/PhiValues.h +++ b/llvm/include/llvm/Analysis/PhiValues.h @@ -132,6 +132,7 @@ class PhiValuesPrinterPass : public PassInfoMixin<PhiValuesPrinterPass> { public: explicit PhiValuesPrinterPass(raw_ostream &OS) : OS(OS) {} PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); + static bool isRequired() { return true; } }; /// Wrapper pass for the legacy pass manager diff --git a/llvm/include/llvm/Analysis/PostDominators.h b/llvm/include/llvm/Analysis/PostDominators.h index 4383113c8db1..92e30f82501c 100644 --- a/llvm/include/llvm/Analysis/PostDominators.h +++ b/llvm/include/llvm/Analysis/PostDominators.h @@ -68,6 +68,8 @@ public: explicit PostDominatorTreePrinterPass(raw_ostream &OS); PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); + + static bool isRequired() { return true; } }; struct PostDominatorTreeWrapperPass : public FunctionPass { diff --git a/llvm/include/llvm/Analysis/ProfileSummaryInfo.h b/llvm/include/llvm/Analysis/ProfileSummaryInfo.h index e49538bfaf80..73be9e1d74a3 100644 --- a/llvm/include/llvm/Analysis/ProfileSummaryInfo.h +++ b/llvm/include/llvm/Analysis/ProfileSummaryInfo.h @@ -389,6 +389,7 @@ class ProfileSummaryPrinterPass public: explicit ProfileSummaryPrinterPass(raw_ostream &OS) : OS(OS) {} PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); + static bool isRequired() { return true; } }; } // end namespace llvm diff --git a/llvm/include/llvm/Analysis/RegionInfo.h b/llvm/include/llvm/Analysis/RegionInfo.h index 612b977f1ffa..fc8df36ec287 100644 --- a/llvm/include/llvm/Analysis/RegionInfo.h +++ b/llvm/include/llvm/Analysis/RegionInfo.h @@ -983,11 +983,14 @@ public: explicit RegionInfoPrinterPass(raw_ostream &OS); PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); + + static bool isRequired() { return true; } }; /// Verifier pass for the \c RegionInfo. struct RegionInfoVerifierPass : PassInfoMixin<RegionInfoVerifierPass> { PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); + static bool isRequired() { return true; } }; template <> diff --git a/llvm/include/llvm/Analysis/ScalarEvolution.h b/llvm/include/llvm/Analysis/ScalarEvolution.h index 4f1237c4b1f9..af3ad822e0b0 100644 --- a/llvm/include/llvm/Analysis/ScalarEvolution.h +++ b/llvm/include/llvm/Analysis/ScalarEvolution.h @@ -2246,6 +2246,7 @@ class ScalarEvolutionVerifierPass : public PassInfoMixin<ScalarEvolutionVerifierPass> { public: PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); + static bool isRequired() { return true; } }; /// Printer pass for the \c ScalarEvolutionAnalysis results. @@ -2257,6 +2258,8 @@ public: explicit ScalarEvolutionPrinterPass(raw_ostream &OS) : OS(OS) {} PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); + + static bool isRequired() { return true; } }; class ScalarEvolutionWrapperPass : public FunctionPass { diff --git a/llvm/include/llvm/Analysis/StackLifetime.h b/llvm/include/llvm/Analysis/StackLifetime.h index 7fd88362276a..438407fb7056 100644 --- a/llvm/include/llvm/Analysis/StackLifetime.h +++ b/llvm/include/llvm/Analysis/StackLifetime.h @@ -190,6 +190,7 @@ public: StackLifetimePrinterPass(raw_ostream &OS, StackLifetime::LivenessType Type) : Type(Type), OS(OS) {} PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); + static bool isRequired() { return true; } void printPipeline(raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName); }; diff --git a/llvm/include/llvm/Analysis/StackSafetyAnalysis.h b/llvm/include/llvm/Analysis/StackSafetyAnalysis.h index 751735f3e59f..2966f0c7e161 100644 --- a/llvm/include/llvm/Analysis/StackSafetyAnalysis.h +++ b/llvm/include/llvm/Analysis/StackSafetyAnalysis.h @@ -105,6 +105,7 @@ class StackSafetyPrinterPass : public PassInfoMixin<StackSafetyPrinterPass> { public: explicit StackSafetyPrinterPass(raw_ostream &OS) : OS(OS) {} PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); + static bool isRequired() { return true; } }; /// StackSafetyInfo wrapper for the legacy pass manager @@ -143,6 +144,7 @@ class StackSafetyGlobalPrinterPass public: explicit StackSafetyGlobalPrinterPass(raw_ostream &OS) : OS(OS) {} PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); + static bool isRequired() { return true; } }; /// This pass performs the global (interprocedural) stack safety analysis diff --git a/llvm/include/llvm/Analysis/StructuralHash.h b/llvm/include/llvm/Analysis/StructuralHash.h index 0eef17d637c8..9f33c69aed34 100644 --- a/llvm/include/llvm/Analysis/StructuralHash.h +++ b/llvm/include/llvm/Analysis/StructuralHash.h @@ -24,6 +24,8 @@ public: : OS(OS), EnableDetailedStructuralHash(Detailed) {} PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM); + + static bool isRequired() { return true; } }; } // namespace llvm diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h index 048912beaba5..9697278eaeae 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfo.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -1174,6 +1174,9 @@ public: /// \return The associativity of the cache level, if available. std::optional<unsigned> getCacheAssociativity(CacheLevel Level) const; + /// \return The minimum architectural page size for the target. + std::optional<unsigned> getMinPageSize() const; + /// \return How much before a load we should place the prefetch /// instruction. This is currently measured in number of /// instructions. @@ -1923,6 +1926,7 @@ public: virtual std::optional<unsigned> getCacheSize(CacheLevel Level) const = 0; virtual std::optional<unsigned> getCacheAssociativity(CacheLevel Level) const = 0; + virtual std::optional<unsigned> getMinPageSize() const = 0; /// \return How much before a load we should place the prefetch /// instruction. This is currently measured in number of @@ -2520,6 +2524,10 @@ public: return Impl.getCacheAssociativity(Level); } + std::optional<unsigned> getMinPageSize() const override { + return Impl.getMinPageSize(); + } + /// Return the preferred prefetch distance in terms of instructions. /// unsigned getPrefetchDistance() const override { diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h index 7ad3ce512a35..60eab53fa2f6 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -501,6 +501,8 @@ public: llvm_unreachable("Unknown TargetTransformInfo::CacheLevel"); } + std::optional<unsigned> getMinPageSize() const { return {}; } + unsigned getPrefetchDistance() const { return 0; } unsigned getMinPrefetchStride(unsigned NumMemAccesses, unsigned NumStridedMemAccesses, @@ -1048,7 +1050,7 @@ public: if (TargetType->isScalableTy()) return TTI::TCC_Basic; int64_t ElementSize = - DL.getTypeAllocSize(GTI.getIndexedType()).getFixedValue(); + GTI.getSequentialElementStride(DL).getFixedValue(); if (ConstIdx) { BaseOffset += ConstIdx->getValue().sextOrTrunc(PtrSizeBits) * ElementSize; diff --git a/llvm/include/llvm/Analysis/UniformityAnalysis.h b/llvm/include/llvm/Analysis/UniformityAnalysis.h index f42c4950ed64..c38d100d88b8 100644 --- a/llvm/include/llvm/Analysis/UniformityAnalysis.h +++ b/llvm/include/llvm/Analysis/UniformityAnalysis.h @@ -47,6 +47,8 @@ public: explicit UniformityInfoPrinterPass(raw_ostream &OS); PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); + + static bool isRequired() { return true; } }; /// Legacy analysis pass which computes a \ref CycleInfo. diff --git a/llvm/include/llvm/Analysis/ValueTracking.h b/llvm/include/llvm/Analysis/ValueTracking.h index baa16306ebf5..7360edfce1f3 100644 --- a/llvm/include/llvm/Analysis/ValueTracking.h +++ b/llvm/include/llvm/Analysis/ValueTracking.h @@ -810,9 +810,14 @@ bool isAssumeLikeIntrinsic(const Instruction *I); /// Return true if it is valid to use the assumptions provided by an /// assume intrinsic, I, at the point in the control-flow identified by the -/// context instruction, CxtI. +/// context instruction, CxtI. By default, ephemeral values of the assumption +/// are treated as an invalid context, to prevent the assumption from being used +/// to optimize away its argument. If the caller can ensure that this won't +/// happen, it can call with AllowEphemerals set to true to get more valid +/// assumptions. bool isValidAssumeForContext(const Instruction *I, const Instruction *CxtI, - const DominatorTree *DT = nullptr); + const DominatorTree *DT = nullptr, + bool AllowEphemerals = false); enum class OverflowResult { /// Always overflows in the direction of signed/unsigned min value. diff --git a/llvm/include/llvm/Analysis/VecFuncs.def b/llvm/include/llvm/Analysis/VecFuncs.def index 4bffcdee6f9c..ee9207bb4f7d 100644 --- a/llvm/include/llvm/Analysis/VecFuncs.def +++ b/llvm/include/llvm/Analysis/VecFuncs.def @@ -506,12 +506,18 @@ TLI_DEFINE_VECFUNC( "llvm.log2.f64", "_ZGVnN2v_log2", FIXED(2), "_ZGV_LLVM_N2v") TLI_DEFINE_VECFUNC( "log10", "_ZGVnN2v_log10", FIXED(2), "_ZGV_LLVM_N2v") TLI_DEFINE_VECFUNC( "llvm.log10.f64", "_ZGVnN2v_log10", FIXED(2), "_ZGV_LLVM_N2v") +TLI_DEFINE_VECFUNC( "modf", "_ZGVnN2vl8_modf", FIXED(2), "_ZGV_LLVM_N2vl8") + TLI_DEFINE_VECFUNC( "pow", "_ZGVnN2vv_pow", FIXED(2), "_ZGV_LLVM_N2vv") TLI_DEFINE_VECFUNC( "llvm.pow.f64", "_ZGVnN2vv_pow", FIXED(2), "_ZGV_LLVM_N2vv") TLI_DEFINE_VECFUNC( "sin", "_ZGVnN2v_sin", FIXED(2), "_ZGV_LLVM_N2v") TLI_DEFINE_VECFUNC( "llvm.sin.f64", "_ZGVnN2v_sin", FIXED(2), "_ZGV_LLVM_N2v") +TLI_DEFINE_VECFUNC( "sincos", "_ZGVnN2vl8l8_sincos", FIXED(2), "_ZGV_LLVM_N2vl8l8") + +TLI_DEFINE_VECFUNC( "sincospi", "_ZGVnN2vl8l8_sincospi", FIXED(2), "_ZGV_LLVM_N2vl8l8") + TLI_DEFINE_VECFUNC( "sinh", "_ZGVnN2v_sinh", FIXED(2), "_ZGV_LLVM_N2v") TLI_DEFINE_VECFUNC( "sqrt", "_ZGVnN2v_sqrt", FIXED(2), "_ZGV_LLVM_N2v") @@ -560,12 +566,18 @@ TLI_DEFINE_VECFUNC( "llvm.log2.f32", "_ZGVnN4v_log2f", FIXED(4), "_ZGV_LLVM_N4v" TLI_DEFINE_VECFUNC( "log10f", "_ZGVnN4v_log10f", FIXED(4), "_ZGV_LLVM_N4v") TLI_DEFINE_VECFUNC( "llvm.log10.f32", "_ZGVnN4v_log10f", FIXED(4), "_ZGV_LLVM_N4v") +TLI_DEFINE_VECFUNC( "modff", "_ZGVnN4vl4_modff", FIXED(4), "_ZGV_LLVM_N4vl4") + TLI_DEFINE_VECFUNC( "powf", "_ZGVnN4vv_powf", FIXED(4), "_ZGV_LLVM_N4vv") TLI_DEFINE_VECFUNC( "llvm.pow.f32", "_ZGVnN4vv_powf", FIXED(4), "_ZGV_LLVM_N4vv") TLI_DEFINE_VECFUNC( "sinf", "_ZGVnN4v_sinf", FIXED(4), "_ZGV_LLVM_N4v") TLI_DEFINE_VECFUNC( "llvm.sin.f32", "_ZGVnN4v_sinf", FIXED(4), "_ZGV_LLVM_N4v") +TLI_DEFINE_VECFUNC("sincosf", "_ZGVnN4vl4l4_sincosf", FIXED(4), "_ZGV_LLVM_N4vl4l4") + +TLI_DEFINE_VECFUNC("sincospif", "_ZGVnN4vl4l4_sincospif", FIXED(4), "_ZGV_LLVM_N4vl4l4") + TLI_DEFINE_VECFUNC( "sinhf", "_ZGVnN4v_sinhf", FIXED(4), "_ZGV_LLVM_N4v") TLI_DEFINE_VECFUNC( "sqrtf", "_ZGVnN4v_sqrtf", FIXED(4), "_ZGV_LLVM_N4v") @@ -637,6 +649,9 @@ TLI_DEFINE_VECFUNC("log10f", "_ZGVsMxv_log10f", SCALABLE(4), MASKED, "_ZGVsMxv") TLI_DEFINE_VECFUNC("llvm.log10.f64", "_ZGVsMxv_log10", SCALABLE(2), MASKED, "_ZGVsMxv") TLI_DEFINE_VECFUNC("llvm.log10.f32", "_ZGVsMxv_log10f", SCALABLE(4), MASKED, "_ZGVsMxv") +TLI_DEFINE_VECFUNC("modf", "_ZGVsMxvl8_modf", SCALABLE(2), MASKED, "_ZGVsMxvl8") +TLI_DEFINE_VECFUNC("modff", "_ZGVsMxvl4_modff", SCALABLE(4), MASKED, "_ZGVsMxvl4") + TLI_DEFINE_VECFUNC("pow", "_ZGVsMxvv_pow", SCALABLE(2), MASKED, "_ZGVsMxvv") TLI_DEFINE_VECFUNC("powf", "_ZGVsMxvv_powf", SCALABLE(4), MASKED, "_ZGVsMxvv") TLI_DEFINE_VECFUNC("llvm.pow.f64", "_ZGVsMxvv_pow", SCALABLE(2), MASKED, "_ZGVsMxvv") @@ -647,6 +662,12 @@ TLI_DEFINE_VECFUNC("sinf", "_ZGVsMxv_sinf", SCALABLE(4), MASKED, "_ZGVsMxv") TLI_DEFINE_VECFUNC("llvm.sin.f64", "_ZGVsMxv_sin", SCALABLE(2), MASKED, "_ZGVsMxv") TLI_DEFINE_VECFUNC("llvm.sin.f32", "_ZGVsMxv_sinf", SCALABLE(4), MASKED, "_ZGVsMxv") +TLI_DEFINE_VECFUNC("sincos", "_ZGVsMxvl8l8_sincos", SCALABLE(2), MASKED, "_ZGVsMxvl8l8") +TLI_DEFINE_VECFUNC("sincosf", "_ZGVsMxvl4l4_sincosf", SCALABLE(4), MASKED, "_ZGVsMxvl4l4") + +TLI_DEFINE_VECFUNC("sincospi", "_ZGVsMxvl8l8_sincospi", SCALABLE(2), MASKED, "_ZGVsMxvl8l8") +TLI_DEFINE_VECFUNC("sincospif", "_ZGVsMxvl4l4_sincospif", SCALABLE(4), MASKED, "_ZGVsMxvl4l4") + TLI_DEFINE_VECFUNC("sinh", "_ZGVsMxv_sinh", SCALABLE(2), MASKED, "_ZGVsMxv") TLI_DEFINE_VECFUNC("sinhf", "_ZGVsMxv_sinhf", SCALABLE(4), MASKED, "_ZGVsMxv") @@ -834,6 +855,11 @@ TLI_DEFINE_VECFUNC("llvm.log10.f32", "armpl_vlog10q_f32", FIXED(4), NOMASK, "_ZG TLI_DEFINE_VECFUNC("llvm.log10.f64", "armpl_svlog10_f64_x", SCALABLE(2), MASKED, "_ZGVsMxv") TLI_DEFINE_VECFUNC("llvm.log10.f32", "armpl_svlog10_f32_x", SCALABLE(4), MASKED, "_ZGVsMxv") +TLI_DEFINE_VECFUNC("modf", "armpl_vmodfq_f64", FIXED(2), NOMASK, "_ZGV_LLVM_N2vl8") +TLI_DEFINE_VECFUNC("modff", "armpl_vmodfq_f32", FIXED(4), NOMASK, "_ZGV_LLVM_N4vl4") +TLI_DEFINE_VECFUNC("modf", "armpl_svmodf_f64_x", SCALABLE(2), MASKED, "_ZGVsMxvl8") +TLI_DEFINE_VECFUNC("modff", "armpl_svmodf_f32_x", SCALABLE(4), MASKED, "_ZGVsMxvl4") + TLI_DEFINE_VECFUNC("nextafter", "armpl_vnextafterq_f64", FIXED(2), NOMASK, "_ZGV_LLVM_N2vv") TLI_DEFINE_VECFUNC("nextafterf", "armpl_vnextafterq_f32", FIXED(4), NOMASK, "_ZGV_LLVM_N4vv") TLI_DEFINE_VECFUNC("nextafter", "armpl_svnextafter_f64_x", SCALABLE(2), MASKED, "_ZGVsMxvv") @@ -859,6 +885,16 @@ TLI_DEFINE_VECFUNC("llvm.sin.f32", "armpl_vsinq_f32", FIXED(4), NOMASK, "_ZGV_LL TLI_DEFINE_VECFUNC("llvm.sin.f64", "armpl_svsin_f64_x", SCALABLE(2), MASKED, "_ZGVsMxv") TLI_DEFINE_VECFUNC("llvm.sin.f32", "armpl_svsin_f32_x", SCALABLE(4), MASKED, "_ZGVsMxv") +TLI_DEFINE_VECFUNC("sincos", "armpl_vsincosq_f64", FIXED(2), NOMASK, "_ZGV_LLVM_N2vl8l8") +TLI_DEFINE_VECFUNC("sincosf", "armpl_vsincosq_f32", FIXED(4), NOMASK, "_ZGV_LLVM_N4vl4l4") +TLI_DEFINE_VECFUNC("sincos", "armpl_svsincos_f64_x", SCALABLE(2), MASKED, "_ZGVsMxvl8l8") +TLI_DEFINE_VECFUNC("sincosf", "armpl_svsincos_f32_x", SCALABLE(4), MASKED, "_ZGVsMxvl4l4") + +TLI_DEFINE_VECFUNC("sincospi", "armpl_vsincospiq_f64", FIXED(2), NOMASK, "_ZGV_LLVM_N2vl8l8") +TLI_DEFINE_VECFUNC("sincospif", "armpl_vsincospiq_f32", FIXED(4), NOMASK, "_ZGV_LLVM_N4vl4l4") +TLI_DEFINE_VECFUNC("sincospi", "armpl_svsincospi_f64_x", SCALABLE(2), MASKED, "_ZGVsMxvl8l8") +TLI_DEFINE_VECFUNC("sincospif", "armpl_svsincospi_f32_x", SCALABLE(4), MASKED, "_ZGVsMxvl4l4") + TLI_DEFINE_VECFUNC("sinh", "armpl_vsinhq_f64", FIXED(2), NOMASK, "_ZGV_LLVM_N2v") TLI_DEFINE_VECFUNC("sinhf", "armpl_vsinhq_f32", FIXED(4), NOMASK, "_ZGV_LLVM_N4v") TLI_DEFINE_VECFUNC("sinh", "armpl_svsinh_f64_x", SCALABLE(2), MASKED, "_ZGVsMxv") diff --git a/llvm/include/llvm/BinaryFormat/ELF.h b/llvm/include/llvm/BinaryFormat/ELF.h index 0f968eac36e7..9b8128a9ec40 100644 --- a/llvm/include/llvm/BinaryFormat/ELF.h +++ b/llvm/include/llvm/BinaryFormat/ELF.h @@ -356,6 +356,7 @@ enum { ELFOSABI_AROS = 15, // AROS ELFOSABI_FENIXOS = 16, // FenixOS ELFOSABI_CLOUDABI = 17, // Nuxi CloudABI + ELFOSABI_CUDA = 51, // NVIDIA CUDA architecture. ELFOSABI_FIRST_ARCH = 64, // First architecture-specific OS ABI ELFOSABI_AMDGPU_HSA = 64, // AMD HSA runtime ELFOSABI_AMDGPU_PAL = 65, // AMD PAL runtime diff --git a/llvm/include/llvm/BinaryFormat/ELFRelocs/LoongArch.def b/llvm/include/llvm/BinaryFormat/ELFRelocs/LoongArch.def index df3a342151fb..4859057abcbb 100644 --- a/llvm/include/llvm/BinaryFormat/ELFRelocs/LoongArch.def +++ b/llvm/include/llvm/BinaryFormat/ELFRelocs/LoongArch.def @@ -126,3 +126,26 @@ ELF_RELOC(R_LARCH_64_PCREL, 109) // // Spec addition: https://github.com/loongson/la-abi-specs/pull/4 ELF_RELOC(R_LARCH_CALL36, 110) + +// Relocs added in ELF for the LoongArchâ„¢ Architecture v20231219, part of the +// v2.30 LoongArch ABI specs. +// +// Spec addition: https://github.com/loongson/la-abi-specs/pull/5 +ELF_RELOC(R_LARCH_TLS_DESC32, 13) +ELF_RELOC(R_LARCH_TLS_DESC64, 14) +ELF_RELOC(R_LARCH_TLS_DESC_PC_HI20, 111) +ELF_RELOC(R_LARCH_TLS_DESC_PC_LO12, 112) +ELF_RELOC(R_LARCH_TLS_DESC64_PC_LO20, 113) +ELF_RELOC(R_LARCH_TLS_DESC64_PC_HI12, 114) +ELF_RELOC(R_LARCH_TLS_DESC_HI20, 115) +ELF_RELOC(R_LARCH_TLS_DESC_LO12, 116) +ELF_RELOC(R_LARCH_TLS_DESC64_LO20, 117) +ELF_RELOC(R_LARCH_TLS_DESC64_HI12, 118) +ELF_RELOC(R_LARCH_TLS_DESC_LD, 119) +ELF_RELOC(R_LARCH_TLS_DESC_CALL, 120) +ELF_RELOC(R_LARCH_TLS_LE_HI20_R, 121) +ELF_RELOC(R_LARCH_TLS_LE_ADD_R, 122) +ELF_RELOC(R_LARCH_TLS_LE_LO12_R, 123) +ELF_RELOC(R_LARCH_TLS_LD_PCREL20_S2, 124) +ELF_RELOC(R_LARCH_TLS_GD_PCREL20_S2, 125) +ELF_RELOC(R_LARCH_TLS_DESC_PCREL20_S2, 126) diff --git a/llvm/include/llvm/Bitstream/BitstreamWriter.h b/llvm/include/llvm/Bitstream/BitstreamWriter.h index f7d362b5d70c..c726508cd528 100644 --- a/llvm/include/llvm/Bitstream/BitstreamWriter.h +++ b/llvm/include/llvm/Bitstream/BitstreamWriter.h @@ -239,7 +239,8 @@ public: // Emit the bits with VBR encoding, NumBits-1 bits at a time. while (Val >= Threshold) { - Emit((Val & ((1 << (NumBits-1))-1)) | (1 << (NumBits-1)), NumBits); + Emit((Val & ((1U << (NumBits - 1)) - 1)) | (1U << (NumBits - 1)), + NumBits); Val >>= NumBits-1; } @@ -255,7 +256,8 @@ public: // Emit the bits with VBR encoding, NumBits-1 bits at a time. while (Val >= Threshold) { - Emit(((uint32_t)Val & ((1 << (NumBits - 1)) - 1)) | (1 << (NumBits - 1)), + Emit(((uint32_t)Val & ((1U << (NumBits - 1)) - 1)) | + (1U << (NumBits - 1)), NumBits); Val >>= NumBits-1; } diff --git a/llvm/include/llvm/CodeGen/AccelTable.h b/llvm/include/llvm/CodeGen/AccelTable.h index 6eb09f32f9f9..0638fbffda4f 100644 --- a/llvm/include/llvm/CodeGen/AccelTable.h +++ b/llvm/include/llvm/CodeGen/AccelTable.h @@ -143,6 +143,15 @@ public: std::vector<AccelTableData *> Values; MCSymbol *Sym; + /// Get all AccelTableData cast as a `T`. + template <typename T = AccelTableData *> auto getValues() const { + static_assert(std::is_pointer<T>()); + static_assert( + std::is_base_of<AccelTableData, std::remove_pointer_t<T>>()); + return map_range( + Values, [](AccelTableData *Data) { return static_cast<T>(Data); }); + } + #ifndef NDEBUG void print(raw_ostream &OS) const; void dump() const { print(dbgs()); } @@ -319,8 +328,7 @@ public: /// Needs to be called after DIE offsets are computed. void convertDieToOffset() { for (auto &Entry : Entries) { - for (AccelTableData *Value : Entry.second.Values) { - DWARF5AccelTableData *Data = static_cast<DWARF5AccelTableData *>(Value); + for (auto *Data : Entry.second.getValues<DWARF5AccelTableData *>()) { // For TU we normalize as each Unit is emitted. // So when this is invoked after CU construction we will be in mixed // state. @@ -332,8 +340,7 @@ public: void addTypeEntries(DWARF5AccelTable &Table) { for (auto &Entry : Table.getEntries()) { - for (AccelTableData *Value : Entry.second.Values) { - DWARF5AccelTableData *Data = static_cast<DWARF5AccelTableData *>(Value); + for (auto *Data : Entry.second.getValues<DWARF5AccelTableData *>()) { addName(Entry.second.Name, Data->getDieOffset(), Data->getDieTag(), Data->getUnitID(), true); } diff --git a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h index dfb8d5d9f2f5..bba675f1d3eb 100644 --- a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h +++ b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h @@ -21,11 +21,14 @@ #include "llvm/ADT/StringRef.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/IR/Module.h" +#include "llvm/IR/PassManager.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Support/Error.h" #include "llvm/Support/LineIterator.h" #include "llvm/Support/MemoryBuffer.h" +#include "llvm/Target/TargetMachine.h" + using namespace llvm; namespace llvm { @@ -72,25 +75,13 @@ template <> struct DenseMapInfo<UniqueBBID> { } }; -class BasicBlockSectionsProfileReader : public ImmutablePass { +class BasicBlockSectionsProfileReader { public: - static char ID; - + friend class BasicBlockSectionsProfileReaderWrapperPass; BasicBlockSectionsProfileReader(const MemoryBuffer *Buf) - : ImmutablePass(ID), MBuf(Buf), - LineIt(*Buf, /*SkipBlanks=*/true, /*CommentMarker=*/'#') { - initializeBasicBlockSectionsProfileReaderPass( - *PassRegistry::getPassRegistry()); - }; + : MBuf(Buf), LineIt(*Buf, /*SkipBlanks=*/true, /*CommentMarker=*/'#'){}; - BasicBlockSectionsProfileReader() : ImmutablePass(ID) { - initializeBasicBlockSectionsProfileReaderPass( - *PassRegistry::getPassRegistry()); - } - - StringRef getPassName() const override { - return "Basic Block Sections Profile Reader"; - } + BasicBlockSectionsProfileReader(){}; // Returns true if basic block sections profile exist for function \p // FuncName. @@ -109,10 +100,6 @@ public: SmallVector<SmallVector<unsigned>> getClonePathsForFunction(StringRef FuncName) const; - // Initializes the FunctionNameToDIFilename map for the current module and - // then reads the profile for the matching functions. - bool doInitialization(Module &M) override; - private: StringRef getAliasName(StringRef FuncName) const { auto R = FuncAliasMap.find(FuncName); @@ -170,7 +157,61 @@ private: // sections profile. \p Buf is a memory buffer that contains the list of // functions and basic block ids to selectively enable basic block sections. ImmutablePass * -createBasicBlockSectionsProfileReaderPass(const MemoryBuffer *Buf); +createBasicBlockSectionsProfileReaderWrapperPass(const MemoryBuffer *Buf); + +/// Analysis pass providing the \c BasicBlockSectionsProfileReader. +/// +/// Note that this pass's result cannot be invalidated, it is immutable for the +/// life of the module. +class BasicBlockSectionsProfileReaderAnalysis + : public AnalysisInfoMixin<BasicBlockSectionsProfileReaderAnalysis> { + +public: + static AnalysisKey Key; + typedef BasicBlockSectionsProfileReader Result; + BasicBlockSectionsProfileReaderAnalysis(const TargetMachine *TM) : TM(TM) {} + + Result run(Function &F, FunctionAnalysisManager &AM); + +private: + const TargetMachine *TM; +}; + +class BasicBlockSectionsProfileReaderWrapperPass : public ImmutablePass { +public: + static char ID; + BasicBlockSectionsProfileReader BBSPR; + + BasicBlockSectionsProfileReaderWrapperPass(const MemoryBuffer *Buf) + : ImmutablePass(ID), BBSPR(BasicBlockSectionsProfileReader(Buf)) { + initializeBasicBlockSectionsProfileReaderWrapperPassPass( + *PassRegistry::getPassRegistry()); + }; + + BasicBlockSectionsProfileReaderWrapperPass() + : ImmutablePass(ID), BBSPR(BasicBlockSectionsProfileReader()) { + initializeBasicBlockSectionsProfileReaderWrapperPassPass( + *PassRegistry::getPassRegistry()); + } + + StringRef getPassName() const override { + return "Basic Block Sections Profile Reader"; + } + + bool isFunctionHot(StringRef FuncName) const; + + std::pair<bool, SmallVector<BBClusterInfo>> + getClusterInfoForFunction(StringRef FuncName) const; + + SmallVector<SmallVector<unsigned>> + getClonePathsForFunction(StringRef FuncName) const; + + // Initializes the FunctionNameToDIFilename map for the current module and + // then reads the profile for the matching functions. + bool doInitialization(Module &M) override; + + BasicBlockSectionsProfileReader &getBBSPR(); +}; } // namespace llvm #endif // LLVM_CODEGEN_BASICBLOCKSECTIONSPROFILEREADER_H diff --git a/llvm/include/llvm/CodeGen/CodeGenPassBuilder.h b/llvm/include/llvm/CodeGen/CodeGenPassBuilder.h index 06da9fb57902..fa81ff504ac6 100644 --- a/llvm/include/llvm/CodeGen/CodeGenPassBuilder.h +++ b/llvm/include/llvm/CodeGen/CodeGenPassBuilder.h @@ -24,6 +24,7 @@ #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/TypeBasedAliasAnalysis.h" #include "llvm/CodeGen/CallBrPrepare.h" +#include "llvm/CodeGen/CodeGenPrepare.h" #include "llvm/CodeGen/DwarfEHPrepare.h" #include "llvm/CodeGen/ExpandMemCmp.h" #include "llvm/CodeGen/ExpandReductions.h" @@ -38,7 +39,9 @@ #include "llvm/CodeGen/ReplaceWithVeclib.h" #include "llvm/CodeGen/SafeStack.h" #include "llvm/CodeGen/SelectOptimize.h" +#include "llvm/CodeGen/ShadowStackGCLowering.h" #include "llvm/CodeGen/SjLjEHPrepare.h" +#include "llvm/CodeGen/StackProtector.h" #include "llvm/CodeGen/UnreachableBlockElim.h" #include "llvm/CodeGen/WasmEHPrepare.h" #include "llvm/CodeGen/WinEHPrepare.h" @@ -229,25 +232,27 @@ protected: C(&PassT::Key); } - template <typename PassT> void insertPass(AnalysisKey *ID, PassT Pass) { + template <typename PassT> void insertPass(MachinePassKey *ID, PassT Pass) { AfterCallbacks.emplace_back( - [this, ID, Pass = std::move(Pass)](AnalysisKey *PassID) { + [this, ID, Pass = std::move(Pass)](MachinePassKey *PassID) { if (PassID == ID) this->PM.addPass(std::move(Pass)); }); } - void disablePass(AnalysisKey *ID) { + void disablePass(MachinePassKey *ID) { BeforeCallbacks.emplace_back( - [ID](AnalysisKey *PassID) { return PassID != ID; }); + [ID](MachinePassKey *PassID) { return PassID != ID; }); } MachineFunctionPassManager releasePM() { return std::move(PM); } private: MachineFunctionPassManager &PM; - SmallVector<llvm::unique_function<bool(AnalysisKey *)>, 4> BeforeCallbacks; - SmallVector<llvm::unique_function<void(AnalysisKey *)>, 4> AfterCallbacks; + SmallVector<llvm::unique_function<bool(MachinePassKey *)>, 4> + BeforeCallbacks; + SmallVector<llvm::unique_function<void(MachinePassKey *)>, 4> + AfterCallbacks; }; LLVMTargetMachine &TM; @@ -485,6 +490,7 @@ Error CodeGenPassBuilder<Derived>::buildPipeline( AddIRPass addIRPass(MPM, Opt.DebugPM); // `ProfileSummaryInfo` is always valid. addIRPass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>()); + addIRPass(RequireAnalysisPass<CollectorMetadataAnalysis, Module>()); addISelPasses(addIRPass); AddMachinePass addPass(MFPM); @@ -640,6 +646,9 @@ void CodeGenPassBuilder<Derived>::addIRPasses(AddIRPass &addPass) const { // Run GC lowering passes for builtin collectors // TODO: add a pass insertion point here addPass(GCLoweringPass()); + // FIXME: `ShadowStackGCLoweringPass` now is a + // module pass, so it will trigger assertion. + // See comment of `AddingFunctionPasses` addPass(ShadowStackGCLoweringPass()); addPass(LowerConstantIntrinsicsPass()); @@ -727,7 +736,7 @@ void CodeGenPassBuilder<Derived>::addPassesToHandleExceptions( template <typename Derived> void CodeGenPassBuilder<Derived>::addCodeGenPrepare(AddIRPass &addPass) const { if (getOptLevel() != CodeGenOptLevel::None && !Opt.DisableCGP) - addPass(CodeGenPreparePass()); + addPass(CodeGenPreparePass(&TM)); // TODO: Default ctor'd RewriteSymbolPass is no-op. // addPass(RewriteSymbolPass()); } @@ -742,7 +751,7 @@ void CodeGenPassBuilder<Derived>::addISelPrepare(AddIRPass &addPass) const { // Add both the safe stack and the stack protection passes: each of them will // only protect functions that have corresponding attributes. addPass(SafeStackPass(&TM)); - addPass(StackProtectorPass()); + addPass(StackProtectorPass(&TM)); if (Opt.PrintISelInput) addPass(PrintFunctionPass(dbgs(), diff --git a/llvm/include/llvm/CodeGen/CodeGenPrepare.h b/llvm/include/llvm/CodeGen/CodeGenPrepare.h new file mode 100644 index 000000000000..dee3a9ee53d7 --- /dev/null +++ b/llvm/include/llvm/CodeGen/CodeGenPrepare.h @@ -0,0 +1,35 @@ +//===- CodeGenPrepare.h -----------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// Defines an IR pass for CodeGen Prepare. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_PREPARE_H +#define LLVM_CODEGEN_PREPARE_H + +#include "llvm/IR/PassManager.h" + +namespace llvm { + +class Function; +class TargetMachine; + +class CodeGenPreparePass : public PassInfoMixin<CodeGenPreparePass> { +private: + const TargetMachine *TM; + +public: + CodeGenPreparePass(const TargetMachine *TM) : TM(TM) {} + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); +}; + +} // end namespace llvm + +#endif // LLVM_CODEGEN_PREPARE_H diff --git a/llvm/include/llvm/CodeGen/FastISel.h b/llvm/include/llvm/CodeGen/FastISel.h index dc2931b40d35..0f17e51f0b7a 100644 --- a/llvm/include/llvm/CodeGen/FastISel.h +++ b/llvm/include/llvm/CodeGen/FastISel.h @@ -319,6 +319,10 @@ public: /// Reset InsertPt to the given old insert position. void leaveLocalValueArea(SavePoint Old); + /// Target-independent lowering of non-instruction debug info associated with + /// this instruction. + void handleDbgInfo(const Instruction *II); + protected: explicit FastISel(FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo, @@ -518,6 +522,16 @@ protected: return MF->getFunction().hasOptSize(); } + /// Target-independent lowering of debug information. Returns false if the + /// debug information couldn't be lowered and was instead discarded. + virtual bool lowerDbgValue(const Value *V, DIExpression *Expr, + DILocalVariable *Var, const DebugLoc &DL); + + /// Target-independent lowering of debug information. Returns false if the + /// debug information couldn't be lowered and was instead discarded. + virtual bool lowerDbgDeclare(const Value *V, DIExpression *Expr, + DILocalVariable *Var, const DebugLoc &DL); + private: /// Handle PHI nodes in successor blocks. /// diff --git a/llvm/include/llvm/CodeGen/GCMetadata.h b/llvm/include/llvm/CodeGen/GCMetadata.h index 9e4e8342ea29..ca6a511185c7 100644 --- a/llvm/include/llvm/CodeGen/GCMetadata.h +++ b/llvm/include/llvm/CodeGen/GCMetadata.h @@ -186,6 +186,17 @@ public: Result run(Function &F, FunctionAnalysisManager &FAM); }; +/// LowerIntrinsics - This pass rewrites calls to the llvm.gcread or +/// llvm.gcwrite intrinsics, replacing them with simple loads and stores as +/// directed by the GCStrategy. It also performs automatic root initialization +/// and custom intrinsic lowering. +/// +/// This pass requires `CollectorMetadataAnalysis`. +class GCLoweringPass : public PassInfoMixin<GCLoweringPass> { +public: + PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM); +}; + /// An analysis pass which caches information about the entire Module. /// Records both the function level information used by GCRoots and a /// cache of the 'active' gc strategy objects for the current Module. diff --git a/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h b/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h index bffc03ed0187..1b094d9d9fe7 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h @@ -366,6 +366,10 @@ private: BranchProbability BranchProbToNext, Register Reg, SwitchCG::BitTestCase &B, MachineBasicBlock *SwitchBB); + void splitWorkItem(SwitchCG::SwitchWorkList &WorkList, + const SwitchCG::SwitchWorkListItem &W, Value *Cond, + MachineBasicBlock *SwitchMBB, MachineIRBuilder &MIB); + bool lowerJumpTableWorkItem( SwitchCG::SwitchWorkListItem W, MachineBasicBlock *SwitchMBB, MachineBasicBlock *CurMBB, MachineBasicBlock *DefaultMBB, diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h index 851353042cc2..da330b517c28 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h @@ -1366,6 +1366,9 @@ public: // Adding Use to ArtifactList. WrapperObserver.changedInstr(Use); break; + case TargetOpcode::G_ASSERT_SEXT: + case TargetOpcode::G_ASSERT_ZEXT: + case TargetOpcode::G_ASSERT_ALIGN: case TargetOpcode::COPY: { Register Copy = Use.getOperand(0).getReg(); if (Copy.isVirtual()) @@ -1392,6 +1395,9 @@ private: case TargetOpcode::G_ANYEXT: case TargetOpcode::G_SEXT: case TargetOpcode::G_EXTRACT: + case TargetOpcode::G_ASSERT_SEXT: + case TargetOpcode::G_ASSERT_ZEXT: + case TargetOpcode::G_ASSERT_ALIGN: return MI.getOperand(1).getReg(); case TargetOpcode::G_UNMERGE_VALUES: return MI.getOperand(MI.getNumOperands() - 1).getReg(); @@ -1425,7 +1431,8 @@ private: if (MRI.hasOneUse(PrevRegSrc)) { if (TmpDef != &DefMI) { assert((TmpDef->getOpcode() == TargetOpcode::COPY || - isArtifactCast(TmpDef->getOpcode())) && + isArtifactCast(TmpDef->getOpcode()) || + isPreISelGenericOptimizationHint(TmpDef->getOpcode())) && "Expecting copy or artifact cast here"); DeadInsts.push_back(TmpDef); @@ -1509,16 +1516,8 @@ private: /// Looks through copy instructions and returns the actual /// source register. Register lookThroughCopyInstrs(Register Reg) { - using namespace llvm::MIPatternMatch; - - Register TmpReg; - while (mi_match(Reg, MRI, m_Copy(m_Reg(TmpReg)))) { - if (MRI.getType(TmpReg).isValid()) - Reg = TmpReg; - else - break; - } - return Reg; + Register TmpReg = getSrcRegIgnoringCopies(Reg, MRI); + return TmpReg.isValid() ? TmpReg : Reg; } }; diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h index 711ba10247c3..586679fa2954 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h @@ -288,11 +288,14 @@ private: // Implements floating-point environment read/write via library function call. LegalizeResult createGetStateLibcall(MachineIRBuilder &MIRBuilder, - MachineInstr &MI); + MachineInstr &MI, + LostDebugLocObserver &LocObserver); LegalizeResult createSetStateLibcall(MachineIRBuilder &MIRBuilder, - MachineInstr &MI); + MachineInstr &MI, + LostDebugLocObserver &LocObserver); LegalizeResult createResetStateLibcall(MachineIRBuilder &MIRBuilder, - MachineInstr &MI); + MachineInstr &MI, + LostDebugLocObserver &LocObserver); public: /// Return the alignment to use for a stack temporary object with the given @@ -347,6 +350,9 @@ public: LegalizeResult fewerElementsVectorReductions(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy); + LegalizeResult fewerElementsVectorSeqReductions(MachineInstr &MI, + unsigned TypeIdx, + LLT NarrowTy); LegalizeResult fewerElementsVectorShuffle(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy); @@ -440,13 +446,15 @@ public: LegalizerHelper::LegalizeResult createLibcall(MachineIRBuilder &MIRBuilder, const char *Name, const CallLowering::ArgInfo &Result, - ArrayRef<CallLowering::ArgInfo> Args, CallingConv::ID CC); + ArrayRef<CallLowering::ArgInfo> Args, CallingConv::ID CC, + LostDebugLocObserver &LocObserver, MachineInstr *MI = nullptr); /// Helper function that creates the given libcall. LegalizerHelper::LegalizeResult createLibcall(MachineIRBuilder &MIRBuilder, RTLIB::Libcall Libcall, const CallLowering::ArgInfo &Result, - ArrayRef<CallLowering::ArgInfo> Args); + ArrayRef<CallLowering::ArgInfo> Args, + LostDebugLocObserver &LocObserver, MachineInstr *MI = nullptr); /// Create a libcall to memcpy et al. LegalizerHelper::LegalizeResult diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h index e51a3ec94005..d09100b28f3e 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h @@ -35,6 +35,7 @@ extern cl::opt<bool> DisableGISelLegalityCheck; class MachineFunction; class raw_ostream; class LegalizerHelper; +class LostDebugLocObserver; class MachineInstr; class MachineRegisterInfo; class MCInstrInfo; @@ -223,6 +224,11 @@ struct TypePairAndMemDesc { } }; +/// True iff P is false. +template <typename Predicate> Predicate predNot(Predicate P) { + return [=](const LegalityQuery &Query) { return !P(Query); }; +} + /// True iff P0 and P1 are true. template<typename Predicate> Predicate all(Predicate P0, Predicate P1) { @@ -1288,8 +1294,8 @@ public: const MachineRegisterInfo &MRI) const; /// Called for instructions with the Custom LegalizationAction. - virtual bool legalizeCustom(LegalizerHelper &Helper, - MachineInstr &MI) const { + virtual bool legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI, + LostDebugLocObserver &LocObserver) const { llvm_unreachable("must implement this if custom action is used"); } diff --git a/llvm/include/llvm/CodeGen/MachinePassRegistry.def b/llvm/include/llvm/CodeGen/MachinePassRegistry.def index f950dfae7e33..e789747036ef 100644 --- a/llvm/include/llvm/CodeGen/MachinePassRegistry.def +++ b/llvm/include/llvm/CodeGen/MachinePassRegistry.def @@ -26,6 +26,7 @@ MODULE_ANALYSIS("pass-instrumentation", PassInstrumentationAnalysis, (PIC)) MODULE_PASS("pre-isel-intrinsic-lowering", PreISelIntrinsicLoweringPass, ()) MODULE_PASS("jmc-instrumenter", JMCInstrumenterPass, ()) MODULE_PASS("lower-emutls", LowerEmuTLSPass, ()) +MODULE_PASS("shadow-stack-gc-lowering", ShadowStackGCLoweringPass, ()) #undef MODULE_PASS #ifndef FUNCTION_ANALYSIS @@ -33,6 +34,7 @@ MODULE_PASS("lower-emutls", LowerEmuTLSPass, ()) #endif FUNCTION_ANALYSIS("gc-function", GCFunctionAnalysis, ()) FUNCTION_ANALYSIS("pass-instrumentation", PassInstrumentationAnalysis, (PIC)) +FUNCTION_ANALYSIS("ssp-layout", SSPLayoutAnalysis, ()) FUNCTION_ANALYSIS("targetir", TargetIRAnalysis, (std::move(TM.getTargetIRAnalysis()))) #undef FUNCTION_ANALYSIS @@ -42,6 +44,7 @@ FUNCTION_ANALYSIS("targetir", TargetIRAnalysis, #endif FUNCTION_PASS("callbrprepare", CallBrPreparePass, ()) FUNCTION_PASS("cfguard", CFGuardPass, ()) +FUNCTION_PASS("codegenprepare", CodeGenPreparePass, (TM)) FUNCTION_PASS("consthoist", ConstantHoistingPass, ()) FUNCTION_PASS("dwarf-eh-prepare", DwarfEHPreparePass, (TM)) FUNCTION_PASS("ee-instrument", EntryExitInstrumenterPass, (false)) @@ -50,6 +53,7 @@ FUNCTION_PASS("expand-large-fp-convert", ExpandLargeFpConvertPass, (TM)) FUNCTION_PASS("expand-memcmp", ExpandMemCmpPass, (TM)) FUNCTION_PASS("expand-reductions", ExpandReductionsPass, ()) FUNCTION_PASS("expandvp", ExpandVectorPredicationPass, ()) +FUNCTION_PASS("gc-lowering", GCLoweringPass, ()) FUNCTION_PASS("indirectbr-expand", IndirectBrExpandPass, (TM)) FUNCTION_PASS("interleaved-access", InterleavedAccessPass, (TM)) FUNCTION_PASS("interleaved-load-combine", InterleavedLoadCombinePass, (TM)) @@ -63,6 +67,7 @@ FUNCTION_PASS("safe-stack", SafeStackPass, (TM)) FUNCTION_PASS("scalarize-masked-mem-intrin", ScalarizeMaskedMemIntrinPass, ()) FUNCTION_PASS("select-optimize", SelectOptimizePass, (TM)) FUNCTION_PASS("sjlj-eh-prepare", SjLjEHPreparePass, (TM)) +FUNCTION_PASS("stack-protector", StackProtectorPass, (TM)) FUNCTION_PASS("tlshoist", TLSVariableHoistPass, ()) FUNCTION_PASS("unreachableblockelim", UnreachableBlockElimPass, ()) FUNCTION_PASS("verify", VerifierPass, ()) @@ -131,10 +136,6 @@ MACHINE_FUNCTION_ANALYSIS("pass-instrumentation", PassInstrumentationAnalysis, #define DUMMY_FUNCTION_PASS(NAME, PASS_NAME, CONSTRUCTOR) #endif DUMMY_FUNCTION_PASS("atomic-expand", AtomicExpandPass, ()) -DUMMY_FUNCTION_PASS("codegenprepare", CodeGenPreparePass, ()) -DUMMY_FUNCTION_PASS("gc-lowering", GCLoweringPass, ()) -DUMMY_FUNCTION_PASS("shadow-stack-gc-lowering", ShadowStackGCLoweringPass, ()) -DUMMY_FUNCTION_PASS("stack-protector", StackProtectorPass, ()) #undef DUMMY_FUNCTION_PASS #ifndef DUMMY_MACHINE_MODULE_PASS diff --git a/llvm/include/llvm/CodeGen/NonRelocatableStringpool.h b/llvm/include/llvm/CodeGen/NonRelocatableStringpool.h index fe07c70d85c5..3dc0731f5a04 100644 --- a/llvm/include/llvm/CodeGen/NonRelocatableStringpool.h +++ b/llvm/include/llvm/CodeGen/NonRelocatableStringpool.h @@ -32,7 +32,7 @@ public: bool PutEmptyString = false) : Translator(Translator) { if (PutEmptyString) - EmptyString = getEntry(""); + getEntry(""); } DwarfStringPoolEntryRef getEntry(StringRef S); @@ -59,7 +59,6 @@ private: MapTy Strings; uint64_t CurrentEndOffset = 0; unsigned NumEntries = 0; - DwarfStringPoolEntryRef EmptyString; std::function<StringRef(StringRef Input)> Translator; }; diff --git a/llvm/include/llvm/CodeGen/Passes.h b/llvm/include/llvm/CodeGen/Passes.h index ca9fbb1def76..bbfb8a0dbe26 100644 --- a/llvm/include/llvm/CodeGen/Passes.h +++ b/llvm/include/llvm/CodeGen/Passes.h @@ -93,9 +93,9 @@ namespace llvm { MachineFunctionPass *createResetMachineFunctionPass(bool EmitFallbackDiag, bool AbortOnFailedISel); - /// createCodeGenPreparePass - Transform the code to expose more pattern + /// createCodeGenPrepareLegacyPass - Transform the code to expose more pattern /// matching during instruction selection. - FunctionPass *createCodeGenPreparePass(); + FunctionPass *createCodeGenPrepareLegacyPass(); /// This pass implements generation of target-specific intrinsics to support /// handling of complex number arithmetic diff --git a/llvm/include/llvm/CodeGen/RuntimeLibcalls.h b/llvm/include/llvm/CodeGen/RuntimeLibcalls.h index 666420681510..3a407c4a4d94 100644 --- a/llvm/include/llvm/CodeGen/RuntimeLibcalls.h +++ b/llvm/include/llvm/CodeGen/RuntimeLibcalls.h @@ -82,6 +82,12 @@ namespace RTLIB { /// UNKNOWN_LIBCALL if there is none. Libcall getSYNC(unsigned Opc, MVT VT); + /// Return the outline atomics value for the given atomic ordering, access + /// size and set of libcalls for a given atomic, or UNKNOWN_LIBCALL if there + /// is none. + Libcall getOutlineAtomicHelper(const Libcall (&LC)[5][4], + AtomicOrdering Order, uint64_t MemSize); + /// Return the outline atomics value for the given opcode, atomic ordering /// and type, or UNKNOWN_LIBCALL if there is none. Libcall getOUTLINE_ATOMIC(unsigned Opc, AtomicOrdering Order, MVT VT); diff --git a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h index 5c44538fe699..ebf410cc94de 100644 --- a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h +++ b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h @@ -381,6 +381,7 @@ private: bool NoUnsignedWrap : 1; bool NoSignedWrap : 1; bool Exact : 1; + bool Disjoint : 1; bool NonNeg : 1; bool NoNaNs : 1; bool NoInfs : 1; @@ -402,10 +403,11 @@ private: public: /// Default constructor turns off all optimization flags. SDNodeFlags() - : NoUnsignedWrap(false), NoSignedWrap(false), Exact(false), NonNeg(false), - NoNaNs(false), NoInfs(false), NoSignedZeros(false), - AllowReciprocal(false), AllowContract(false), ApproximateFuncs(false), - AllowReassociation(false), NoFPExcept(false), Unpredictable(false) {} + : NoUnsignedWrap(false), NoSignedWrap(false), Exact(false), + Disjoint(false), NonNeg(false), NoNaNs(false), NoInfs(false), + NoSignedZeros(false), AllowReciprocal(false), AllowContract(false), + ApproximateFuncs(false), AllowReassociation(false), NoFPExcept(false), + Unpredictable(false) {} /// Propagate the fast-math-flags from an IR FPMathOperator. void copyFMF(const FPMathOperator &FPMO) { @@ -422,6 +424,7 @@ public: void setNoUnsignedWrap(bool b) { NoUnsignedWrap = b; } void setNoSignedWrap(bool b) { NoSignedWrap = b; } void setExact(bool b) { Exact = b; } + void setDisjoint(bool b) { Disjoint = b; } void setNonNeg(bool b) { NonNeg = b; } void setNoNaNs(bool b) { NoNaNs = b; } void setNoInfs(bool b) { NoInfs = b; } @@ -437,6 +440,7 @@ public: bool hasNoUnsignedWrap() const { return NoUnsignedWrap; } bool hasNoSignedWrap() const { return NoSignedWrap; } bool hasExact() const { return Exact; } + bool hasDisjoint() const { return Disjoint; } bool hasNonNeg() const { return NonNeg; } bool hasNoNaNs() const { return NoNaNs; } bool hasNoInfs() const { return NoInfs; } @@ -454,6 +458,7 @@ public: NoUnsignedWrap &= Flags.NoUnsignedWrap; NoSignedWrap &= Flags.NoSignedWrap; Exact &= Flags.Exact; + Disjoint &= Flags.Disjoint; NonNeg &= Flags.NonNeg; NoNaNs &= Flags.NoNaNs; NoInfs &= Flags.NoInfs; @@ -924,6 +929,9 @@ public: /// Helper method returns the integer value of a ConstantSDNode operand. inline uint64_t getConstantOperandVal(unsigned Num) const; + /// Helper method returns the zero-extended integer value of a ConstantSDNode. + inline uint64_t getAsZExtVal() const; + /// Helper method returns the APInt of a ConstantSDNode operand. inline const APInt &getConstantOperandAPInt(unsigned Num) const; @@ -1640,6 +1648,10 @@ uint64_t SDNode::getConstantOperandVal(unsigned Num) const { return cast<ConstantSDNode>(getOperand(Num))->getZExtValue(); } +uint64_t SDNode::getAsZExtVal() const { + return cast<ConstantSDNode>(this)->getZExtValue(); +} + const APInt &SDNode::getConstantOperandAPInt(unsigned Num) const { return cast<ConstantSDNode>(getOperand(Num))->getAPIntValue(); } diff --git a/llvm/include/llvm/CodeGen/ShadowStackGCLowering.h b/llvm/include/llvm/CodeGen/ShadowStackGCLowering.h new file mode 100644 index 000000000000..1586c6cf545b --- /dev/null +++ b/llvm/include/llvm/CodeGen/ShadowStackGCLowering.h @@ -0,0 +1,24 @@ +//===- llvm/CodeGen/ShadowStackGCLowering.h ---------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_SHADOWSTACKGCLOWERING_H +#define LLVM_CODEGEN_SHADOWSTACKGCLOWERING_H + +#include "llvm/IR/PassManager.h" + +namespace llvm { + +class ShadowStackGCLoweringPass + : public PassInfoMixin<ShadowStackGCLoweringPass> { +public: + PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM); +}; + +} // namespace llvm + +#endif // LLVM_CODEGEN_SHADOWSTACKGCLOWERING_H diff --git a/llvm/include/llvm/CodeGen/StackProtector.h b/llvm/include/llvm/CodeGen/StackProtector.h index 57cb7a1c85ae..eb5d9d0caebc 100644 --- a/llvm/include/llvm/CodeGen/StackProtector.h +++ b/llvm/include/llvm/CodeGen/StackProtector.h @@ -19,6 +19,7 @@ #include "llvm/Analysis/DomTreeUpdater.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/PassManager.h" #include "llvm/Pass.h" #include "llvm/TargetParser/Triple.h" @@ -30,25 +31,15 @@ class Module; class TargetLoweringBase; class TargetMachine; -class StackProtector : public FunctionPass { -private: +class SSPLayoutInfo { + friend class StackProtectorPass; + friend class SSPLayoutAnalysis; + friend class StackProtector; static constexpr unsigned DefaultSSPBufferSize = 8; /// A mapping of AllocaInsts to their required SSP layout. - using SSPLayoutMap = DenseMap<const AllocaInst *, - MachineFrameInfo::SSPLayoutKind>; - - const TargetMachine *TM = nullptr; - - /// TLI - Keep a pointer of a TargetLowering to consult for determining - /// target type sizes. - const TargetLoweringBase *TLI = nullptr; - Triple Trip; - - Function *F = nullptr; - Module *M = nullptr; - - std::optional<DomTreeUpdater> DTU; + using SSPLayoutMap = + DenseMap<const AllocaInst *, MachineFrameInfo::SSPLayoutKind>; /// Layout - Mapping of allocations to the required SSPLayoutKind. /// StackProtector analysis will update this map when determining if an @@ -59,23 +50,59 @@ private: /// protection when -fstack-protection is used. unsigned SSPBufferSize = DefaultSSPBufferSize; + bool RequireStackProtector = false; + // A prologue is generated. bool HasPrologue = false; // IR checking code is generated. bool HasIRCheck = false; - /// InsertStackProtectors - Insert code into the prologue and epilogue of - /// the function. - /// - /// - The prologue code loads and stores the stack guard onto the stack. - /// - The epilogue checks the value stored in the prologue against the - /// original value. It calls __stack_chk_fail if they differ. - bool InsertStackProtectors(); +public: + // Return true if StackProtector is supposed to be handled by SelectionDAG. + bool shouldEmitSDCheck(const BasicBlock &BB) const; + + void copyToMachineFrameInfo(MachineFrameInfo &MFI) const; +}; + +class SSPLayoutAnalysis : public AnalysisInfoMixin<SSPLayoutAnalysis> { + friend AnalysisInfoMixin<SSPLayoutAnalysis>; + using SSPLayoutMap = SSPLayoutInfo::SSPLayoutMap; + + static AnalysisKey Key; + +public: + using Result = SSPLayoutInfo; + + Result run(Function &F, FunctionAnalysisManager &FAM); - /// CreateFailBB - Create a basic block to jump to when the stack protector - /// check fails. - BasicBlock *CreateFailBB(); + /// Check whether or not \p F needs a stack protector based upon the stack + /// protector level. + static bool requiresStackProtector(Function *F, + SSPLayoutMap *Layout = nullptr); +}; + +class StackProtectorPass : public PassInfoMixin<StackProtectorPass> { + const TargetMachine *TM; + +public: + explicit StackProtectorPass(const TargetMachine *TM) : TM(TM) {} + PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM); +}; + +class StackProtector : public FunctionPass { +private: + /// A mapping of AllocaInsts to their required SSP layout. + using SSPLayoutMap = SSPLayoutInfo::SSPLayoutMap; + + const TargetMachine *TM = nullptr; + + Function *F = nullptr; + Module *M = nullptr; + + std::optional<DomTreeUpdater> DTU; + + SSPLayoutInfo LayoutInfo; public: static char ID; // Pass identification, replacement for typeid. @@ -85,16 +112,22 @@ public: void getAnalysisUsage(AnalysisUsage &AU) const override; // Return true if StackProtector is supposed to be handled by SelectionDAG. - bool shouldEmitSDCheck(const BasicBlock &BB) const; + bool shouldEmitSDCheck(const BasicBlock &BB) const { + return LayoutInfo.shouldEmitSDCheck(BB); + } bool runOnFunction(Function &Fn) override; - void copyToMachineFrameInfo(MachineFrameInfo &MFI) const; + void copyToMachineFrameInfo(MachineFrameInfo &MFI) const { + LayoutInfo.copyToMachineFrameInfo(MFI); + } /// Check whether or not \p F needs a stack protector based upon the stack /// protector level. - static bool requiresStackProtector(Function *F, SSPLayoutMap *Layout = nullptr); - + static bool requiresStackProtector(Function *F, + SSPLayoutMap *Layout = nullptr) { + return SSPLayoutAnalysis::requiresStackProtector(F, Layout); + } }; } // end namespace llvm diff --git a/llvm/include/llvm/CodeGen/SwitchLoweringUtils.h b/llvm/include/llvm/CodeGen/SwitchLoweringUtils.h index 5d06e21737b8..99478e9f39e2 100644 --- a/llvm/include/llvm/CodeGen/SwitchLoweringUtils.h +++ b/llvm/include/llvm/CodeGen/SwitchLoweringUtils.h @@ -293,6 +293,22 @@ public: MachineBasicBlock *Src, MachineBasicBlock *Dst, BranchProbability Prob = BranchProbability::getUnknown()) = 0; + /// Determine the rank by weight of CC in [First,Last]. If CC has more weight + /// than each cluster in the range, its rank is 0. + unsigned caseClusterRank(const CaseCluster &CC, CaseClusterIt First, + CaseClusterIt Last); + + struct SplitWorkItemInfo { + CaseClusterIt LastLeft; + CaseClusterIt FirstRight; + BranchProbability LeftProb; + BranchProbability RightProb; + }; + /// Compute information to balance the tree based on branch probabilities to + /// create a near-optimal (in terms of search time given key frequency) binary + /// search tree. See e.g. Kurt Mehlhorn "Nearly Optimal Binary Search Trees" + /// (1975). + SplitWorkItemInfo computeSplitWorkItemInfo(const SwitchWorkListItem &W); virtual ~SwitchLowering() = default; private: diff --git a/llvm/include/llvm/DWARFLinkerParallel/AddressesMap.h b/llvm/include/llvm/DWARFLinker/AddressesMap.h index b451fee4e0b7..d8b3b4407471 100644 --- a/llvm/include/llvm/DWARFLinkerParallel/AddressesMap.h +++ b/llvm/include/llvm/DWARFLinker/AddressesMap.h @@ -6,8 +6,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_DWARFLINKERPARALLEL_ADDRESSESMAP_H -#define LLVM_DWARFLINKERPARALLEL_ADDRESSESMAP_H +#ifndef LLVM_DWARFLINKER_ADDRESSESMAP_H +#define LLVM_DWARFLINKER_ADDRESSESMAP_H #include "llvm/ADT/AddressRanges.h" #include "llvm/DebugInfo/DWARF/DWARFContext.h" @@ -17,7 +17,7 @@ #include <cstdint> namespace llvm { -namespace dwarflinker_parallel { +namespace dwarf_linker { /// Mapped value in the address map is the offset to apply to the /// linked address. @@ -186,7 +186,7 @@ protected: } }; -} // end of namespace dwarflinker_parallel +} // namespace dwarf_linker } // end namespace llvm -#endif // LLVM_DWARFLINKERPARALLEL_ADDRESSESMAP_H +#endif // LLVM_DWARFLINKER_ADDRESSESMAP_H diff --git a/llvm/include/llvm/DWARFLinker/DWARFLinker.h b/llvm/include/llvm/DWARFLinker/Classic/DWARFLinker.h index 2bd85e30d3b1..d3aaa3baadc4 100644 --- a/llvm/include/llvm/DWARFLinker/DWARFLinker.h +++ b/llvm/include/llvm/DWARFLinker/Classic/DWARFLinker.h @@ -6,14 +6,15 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_DWARFLINKER_DWARFLINKER_H -#define LLVM_DWARFLINKER_DWARFLINKER_H +#ifndef LLVM_DWARFLINKER_CLASSIC_DWARFLINKER_H +#define LLVM_DWARFLINKER_CLASSIC_DWARFLINKER_H #include "llvm/ADT/AddressRanges.h" #include "llvm/ADT/DenseMap.h" #include "llvm/CodeGen/AccelTable.h" #include "llvm/CodeGen/NonRelocatableStringpool.h" -#include "llvm/DWARFLinker/DWARFLinkerCompileUnit.h" +#include "llvm/DWARFLinker/Classic/DWARFLinkerCompileUnit.h" +#include "llvm/DWARFLinker/DWARFLinkerBase.h" #include "llvm/DebugInfo/DWARF/DWARFContext.h" #include "llvm/DebugInfo/DWARF/DWARFDebugLine.h" #include "llvm/DebugInfo/DWARF/DWARFDebugRangeList.h" @@ -25,73 +26,11 @@ namespace llvm { class DWARFExpression; class DWARFUnit; class DataExtractor; -class DeclContextTree; template <typename T> class SmallVectorImpl; -enum class DwarfLinkerClient { Dsymutil, LLD, General }; - -/// AddressesMap represents information about valid addresses used -/// by debug information. Valid addresses are those which points to -/// live code sections. i.e. relocations for these addresses point -/// into sections which would be/are placed into resulting binary. -class AddressesMap { -public: - virtual ~AddressesMap(); - - /// Checks that there are valid relocations against a .debug_info - /// section. - virtual bool hasValidRelocs() = 0; - - /// Checks that the specified DWARF expression operand \p Op references live - /// code section and returns the relocation adjustment value (to get the - /// linked address this value might be added to the source expression operand - /// address). - /// \returns relocation adjustment value or std::nullopt if there is no - /// corresponding live address. - virtual std::optional<int64_t> - getExprOpAddressRelocAdjustment(DWARFUnit &U, - const DWARFExpression::Operation &Op, - uint64_t StartOffset, uint64_t EndOffset) = 0; - - /// Checks that the specified subprogram \p DIE references the live code - /// section and returns the relocation adjustment value (to get the linked - /// address this value might be added to the source subprogram address). - /// Allowed kinds of input DIE: DW_TAG_subprogram, DW_TAG_label. - /// \returns relocation adjustment value or std::nullopt if there is no - /// corresponding live address. - virtual std::optional<int64_t> - getSubprogramRelocAdjustment(const DWARFDie &DIE) = 0; - - /// Returns the file name associated to the AddessesMap - virtual std::optional<StringRef> getLibraryInstallName() = 0; - - /// Apply the valid relocations to the buffer \p Data, taking into - /// account that Data is at \p BaseOffset in the .debug_info section. - /// - /// \returns true whether any reloc has been applied. - virtual bool applyValidRelocs(MutableArrayRef<char> Data, uint64_t BaseOffset, - bool IsLittleEndian) = 0; - - /// Check if the linker needs to gather and save relocation info. - virtual bool needToSaveValidRelocs() = 0; - - /// Update and save original relocations located in between StartOffset and - /// EndOffset. LinkedOffset is the value which should be added to the original - /// relocation offset to get new relocation offset in linked binary. - virtual void updateAndSaveValidRelocs(bool IsDWARF5, - uint64_t OriginalUnitOffset, - int64_t LinkedOffset, - uint64_t StartOffset, - uint64_t EndOffset) = 0; - - /// Update the valid relocations that used OriginalUnitOffset as the compile - /// unit offset, and update their values to reflect OutputUnitOffset. - virtual void updateRelocationsWithUnitOffset(uint64_t OriginalUnitOffset, - uint64_t OutputUnitOffset) = 0; - - /// Erases all data. - virtual void clear() = 0; -}; +namespace dwarf_linker { +namespace classic { +class DeclContextTree; using Offset2UnitMap = DenseMap<uint64_t, CompileUnit *>; @@ -117,7 +56,7 @@ struct DebugDieValuePool { /// DwarfEmitter presents interface to generate all debug info tables. class DwarfEmitter { public: - virtual ~DwarfEmitter(); + virtual ~DwarfEmitter() = default; /// Emit section named SecName with data SecData. virtual void emitSectionContents(StringRef SecData, StringRef SecName) = 0; @@ -282,44 +221,6 @@ public: class DwarfStreamer; using UnitListTy = std::vector<std::unique_ptr<CompileUnit>>; -/// This class represents DWARF information for source file -/// and its address map. -class DWARFFile { -public: - using UnloadCallbackTy = std::function<void(StringRef FileName)>; - DWARFFile(StringRef Name, std::unique_ptr<DWARFContext> Dwarf, - std::unique_ptr<AddressesMap> Addresses, - UnloadCallbackTy UnloadFunc = nullptr) - : FileName(Name), Dwarf(std::move(Dwarf)), - Addresses(std::move(Addresses)), UnloadFunc(UnloadFunc) {} - - /// The object file name. - StringRef FileName; - - /// The source DWARF information. - std::unique_ptr<DWARFContext> Dwarf; - - /// Helpful address information(list of valid address ranges, relocations). - std::unique_ptr<AddressesMap> Addresses; - - /// Callback to the module keeping object file to unload. - UnloadCallbackTy UnloadFunc; - - /// Unloads object file and corresponding AddressesMap and Dwarf Context. - void unload() { - Addresses.reset(); - Dwarf.reset(); - - if (UnloadFunc) - UnloadFunc(FileName); - } -}; - -typedef std::map<std::string, std::string> swiftInterfacesMap; -typedef std::map<std::string, std::string> objectPrefixMap; - -typedef function_ref<void(const DWARFUnit &Unit)> CompileUnitHandler; - /// The core of the Dwarf linking logic. /// /// The generation of the dwarf information from the object files will be @@ -334,41 +235,20 @@ typedef function_ref<void(const DWARFUnit &Unit)> CompileUnitHandler; /// a variable). These relocations are called ValidRelocs in the /// AddressesInfo and are gathered as a very first step when we start /// processing a object file. -class DWARFLinker { +class DWARFLinker : public DWARFLinkerBase { public: - typedef std::function<void(const Twine &Warning, StringRef Context, - const DWARFDie *DIE)> - messageHandler; - DWARFLinker(messageHandler ErrorHandler, messageHandler WarningHandler, + DWARFLinker(MessageHandlerTy ErrorHandler, MessageHandlerTy WarningHandler, std::function<StringRef(StringRef)> StringsTranslator) - : DwarfLinkerClientID(DwarfLinkerClient::Dsymutil), - StringsTranslator(StringsTranslator), ErrorHandler(ErrorHandler), + : StringsTranslator(StringsTranslator), ErrorHandler(ErrorHandler), WarningHandler(WarningHandler) {} static std::unique_ptr<DWARFLinker> createLinker( - messageHandler ErrorHandler, messageHandler WarningHandler, + MessageHandlerTy ErrorHandler, MessageHandlerTy WarningHandler, std::function<StringRef(StringRef)> StringsTranslator = nullptr) { return std::make_unique<DWARFLinker>(ErrorHandler, WarningHandler, StringsTranslator); } - /// Type of output file. - enum class OutputFileType { - Object, - Assembly, - }; - - /// The kind of accelerator tables we should emit. - enum class AccelTableKind : uint8_t { - Apple, ///< .apple_names, .apple_namespaces, .apple_types, .apple_objc. - Pub, ///< .debug_pubnames, .debug_pubtypes - DebugNames ///< .debug_names. - }; - typedef std::function<void(const DWARFFile &File, llvm::StringRef Output)> inputVerificationHandler; - typedef std::function<ErrorOr<DWARFFile &>(StringRef ContainerName, - StringRef Path)> - objFileLoader; - Error createEmitter(const Triple &TheTriple, OutputFileType FileType, raw_pwrite_stream &OutFile); @@ -381,73 +261,82 @@ public: /// /// \pre NoODR, Update options should be set before call to addObjectFile. void addObjectFile( - DWARFFile &File, objFileLoader Loader = nullptr, - CompileUnitHandler OnCUDieLoaded = [](const DWARFUnit &) {}); + DWARFFile &File, ObjFileLoaderTy Loader = nullptr, + CompileUnitHandlerTy OnCUDieLoaded = [](const DWARFUnit &) {}) override; /// Link debug info for added objFiles. Object files are linked all together. - Error link(); + Error link() override; /// A number of methods setting various linking options: /// Allows to generate log of linking process to the standard output. - void setVerbosity(bool Verbose) { Options.Verbose = Verbose; } + void setVerbosity(bool Verbose) override { Options.Verbose = Verbose; } /// Print statistics to standard output. - void setStatistics(bool Statistics) { Options.Statistics = Statistics; } + void setStatistics(bool Statistics) override { + Options.Statistics = Statistics; + } /// Verify the input DWARF. - void setVerifyInputDWARF(bool Verify) { Options.VerifyInputDWARF = Verify; } + void setVerifyInputDWARF(bool Verify) override { + Options.VerifyInputDWARF = Verify; + } /// Do not unique types according to ODR. - void setNoODR(bool NoODR) { Options.NoODR = NoODR; } + void setNoODR(bool NoODR) override { Options.NoODR = NoODR; } /// Update index tables only(do not modify rest of DWARF). - void setUpdateIndexTablesOnly(bool Update) { Options.Update = Update; } + void setUpdateIndexTablesOnly(bool Update) override { + Options.Update = Update; + } /// Allow generating valid, but non-deterministic output. - void setAllowNonDeterministicOutput(bool) { /* Nothing to do. */ + void setAllowNonDeterministicOutput(bool) override { /* Nothing to do. */ } /// Set whether to keep the enclosing function for a static variable. - void setKeepFunctionForStatic(bool KeepFunctionForStatic) { + void setKeepFunctionForStatic(bool KeepFunctionForStatic) override { Options.KeepFunctionForStatic = KeepFunctionForStatic; } /// Use specified number of threads for parallel files linking. - void setNumThreads(unsigned NumThreads) { Options.Threads = NumThreads; } + void setNumThreads(unsigned NumThreads) override { + Options.Threads = NumThreads; + } /// Add kind of accelerator tables to be generated. - void addAccelTableKind(AccelTableKind Kind) { + void addAccelTableKind(AccelTableKind Kind) override { assert(!llvm::is_contained(Options.AccelTables, Kind)); Options.AccelTables.emplace_back(Kind); } /// Set prepend path for clang modules. - void setPrependPath(const std::string &Ppath) { Options.PrependPath = Ppath; } + void setPrependPath(StringRef Ppath) override { Options.PrependPath = Ppath; } /// Set estimated objects files amount, for preliminary data allocation. - void setEstimatedObjfilesAmount(unsigned ObjFilesNum) { + void setEstimatedObjfilesAmount(unsigned ObjFilesNum) override { ObjectContexts.reserve(ObjFilesNum); } /// Set verification handler which would be used to report verification /// errors. - void setInputVerificationHandler(inputVerificationHandler Handler) { + void + setInputVerificationHandler(InputVerificationHandlerTy Handler) override { Options.InputVerificationHandler = Handler; } /// Set map for Swift interfaces. - void setSwiftInterfacesMap(swiftInterfacesMap *Map) { + void setSwiftInterfacesMap(SwiftInterfacesMapTy *Map) override { Options.ParseableSwiftInterfaces = Map; } /// Set prefix map for objects. - void setObjectPrefixMap(objectPrefixMap *Map) { + void setObjectPrefixMap(ObjectPrefixMapTy *Map) override { Options.ObjectPrefixMap = Map; } /// Set target DWARF version. - Error setTargetDWARFVersion(uint16_t TargetDWARFVersion) { + Error setTargetDWARFVersion(uint16_t TargetDWARFVersion) override { if ((TargetDWARFVersion < 1) || (TargetDWARFVersion > 5)) return createStringError(std::errc::invalid_argument, "unsupported DWARF version: %d", @@ -619,16 +508,17 @@ private: /// pointing to the module, and a DW_AT_gnu_dwo_id with the module /// hash. bool registerModuleReference(const DWARFDie &CUDie, LinkContext &Context, - objFileLoader Loader, - CompileUnitHandler OnCUDieLoaded, + ObjFileLoaderTy Loader, + CompileUnitHandlerTy OnCUDieLoaded, unsigned Indent = 0); /// Recursively add the debug info in this clang module .pcm /// file (and all the modules imported by it in a bottom-up fashion) /// to ModuleUnits. - Error loadClangModule(objFileLoader Loader, const DWARFDie &CUDie, + Error loadClangModule(ObjFileLoaderTy Loader, const DWARFDie &CUDie, const std::string &PCMFile, LinkContext &Context, - CompileUnitHandler OnCUDieLoaded, unsigned Indent = 0); + CompileUnitHandlerTy OnCUDieLoaded, + unsigned Indent = 0); /// Clone specified Clang module unit \p Unit. Error cloneModuleUnit(LinkContext &Context, RefModuleUnit &Unit, @@ -911,18 +801,16 @@ private: /// Mapping the PCM filename to the DwoId. StringMap<uint64_t> ClangModules; - DwarfLinkerClient DwarfLinkerClientID; - std::function<StringRef(StringRef)> StringsTranslator = nullptr; /// A unique ID that identifies each compile unit. unsigned UniqueUnitID = 0; // error handler - messageHandler ErrorHandler = nullptr; + MessageHandlerTy ErrorHandler = nullptr; // warning handler - messageHandler WarningHandler = nullptr; + MessageHandlerTy WarningHandler = nullptr; /// linking options struct DWARFLinkerOptions { @@ -958,20 +846,22 @@ private: std::string PrependPath; // input verification handler - inputVerificationHandler InputVerificationHandler = nullptr; + InputVerificationHandlerTy InputVerificationHandler = nullptr; /// A list of all .swiftinterface files referenced by the debug /// info, mapping Module name to path on disk. The entries need to /// be uniqued and sorted and there are only few entries expected /// per compile unit, which is why this is a std::map. /// this is dsymutil specific fag. - swiftInterfacesMap *ParseableSwiftInterfaces = nullptr; + SwiftInterfacesMapTy *ParseableSwiftInterfaces = nullptr; /// A list of remappings to apply to file paths. - objectPrefixMap *ObjectPrefixMap = nullptr; + ObjectPrefixMapTy *ObjectPrefixMap = nullptr; } Options; }; -} // end namespace llvm +} // end of namespace classic +} // end of namespace dwarf_linker +} // end of namespace llvm -#endif // LLVM_DWARFLINKER_DWARFLINKER_H +#endif // LLVM_DWARFLINKER_CLASSIC_DWARFLINKER_H diff --git a/llvm/include/llvm/DWARFLinker/DWARFLinkerCompileUnit.h b/llvm/include/llvm/DWARFLinker/Classic/DWARFLinkerCompileUnit.h index 08ebd4bc70bc..bfe544946fd9 100644 --- a/llvm/include/llvm/DWARFLinker/DWARFLinkerCompileUnit.h +++ b/llvm/include/llvm/DWARFLinker/Classic/DWARFLinkerCompileUnit.h @@ -6,8 +6,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_DWARFLINKER_DWARFLINKERCOMPILEUNIT_H -#define LLVM_DWARFLINKER_DWARFLINKERCOMPILEUNIT_H +#ifndef LLVM_DWARFLINKER_CLASSIC_DWARFLINKERCOMPILEUNIT_H +#define LLVM_DWARFLINKER_CLASSIC_DWARFLINKERCOMPILEUNIT_H #include "llvm/ADT/AddressRanges.h" #include "llvm/ADT/DenseMap.h" @@ -16,6 +16,8 @@ #include <optional> namespace llvm { +namespace dwarf_linker { +namespace classic { class DeclContext; @@ -327,6 +329,8 @@ private: std::string ClangModuleName; }; -} // end namespace llvm +} // end of namespace classic +} // end of namespace dwarf_linker +} // end of namespace llvm -#endif // LLVM_DWARFLINKER_DWARFLINKERCOMPILEUNIT_H +#endif // LLVM_DWARFLINKER_CLASSIC_DWARFLINKERCOMPILEUNIT_H diff --git a/llvm/include/llvm/DWARFLinker/DWARFLinkerDeclContext.h b/llvm/include/llvm/DWARFLinker/Classic/DWARFLinkerDeclContext.h index fb02b0fc1b4d..b00f68c3be84 100644 --- a/llvm/include/llvm/DWARFLinker/DWARFLinkerDeclContext.h +++ b/llvm/include/llvm/DWARFLinker/Classic/DWARFLinkerDeclContext.h @@ -6,8 +6,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_DWARFLINKER_DWARFLINKERDECLCONTEXT_H -#define LLVM_DWARFLINKER_DWARFLINKERDECLCONTEXT_H +#ifndef LLVM_DWARFLINKER_CLASSIC_DWARFLINKERDECLCONTEXT_H +#define LLVM_DWARFLINKER_CLASSIC_DWARFLINKERDECLCONTEXT_H #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseMapInfo.h" @@ -21,6 +21,8 @@ #include <atomic> namespace llvm { +namespace dwarf_linker { +namespace classic { class CompileUnit; struct DeclMapInfo; @@ -184,6 +186,8 @@ struct DeclMapInfo : private DenseMapInfo<DeclContext *> { } }; -} // end namespace llvm +} // end of namespace classic +} // end of namespace dwarf_linker +} // end of namespace llvm -#endif // LLVM_DWARFLINKER_DWARFLINKERDECLCONTEXT_H +#endif // LLVM_DWARFLINKER_CLASSIC_DWARFLINKERDECLCONTEXT_H diff --git a/llvm/include/llvm/DWARFLinker/DWARFStreamer.h b/llvm/include/llvm/DWARFLinker/Classic/DWARFStreamer.h index 18eb7277bfa2..f010c348f121 100644 --- a/llvm/include/llvm/DWARFLinker/DWARFStreamer.h +++ b/llvm/include/llvm/DWARFLinker/Classic/DWARFStreamer.h @@ -6,12 +6,12 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_DWARFLINKER_DWARFSTREAMER_H -#define LLVM_DWARFLINKER_DWARFSTREAMER_H +#ifndef LLVM_DWARFLINKER_CLASSIC_DWARFSTREAMER_H +#define LLVM_DWARFLINKER_CLASSIC_DWARFSTREAMER_H +#include "DWARFLinker.h" #include "llvm/BinaryFormat/Swift.h" #include "llvm/CodeGen/AsmPrinter.h" -#include "llvm/DWARFLinker/DWARFLinker.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCInstrInfo.h" @@ -23,6 +23,12 @@ namespace llvm { template <typename DataT> class AccelTable; +class MCCodeEmitter; +class DWARFDebugMacro; + +namespace dwarf_linker { +namespace classic { + /// User of DwarfStreamer should call initialization code /// for AsmPrinter: /// @@ -31,21 +37,19 @@ template <typename DataT> class AccelTable; /// InitializeAllTargets(); /// InitializeAllAsmPrinters(); -class MCCodeEmitter; -class DWARFDebugMacro; - /// The Dwarf streaming logic. /// /// All interactions with the MC layer that is used to build the debug /// information binary representation are handled in this class. class DwarfStreamer : public DwarfEmitter { public: - DwarfStreamer(DWARFLinker::OutputFileType OutFileType, + DwarfStreamer(DWARFLinkerBase::OutputFileType OutFileType, raw_pwrite_stream &OutFile, std::function<StringRef(StringRef Input)> Translator, - DWARFLinker::messageHandler Warning) + DWARFLinkerBase::MessageHandlerTy Warning) : OutFile(OutFile), OutFileType(OutFileType), Translator(Translator), WarningHandler(Warning) {} + virtual ~DwarfStreamer() = default; Error init(Triple TheTriple, StringRef Swift5ReflectionSegmentName); @@ -310,9 +314,11 @@ private: const CompileUnit &Unit, const std::vector<CompileUnit::AccelInfo> &Names); - DWARFLinker::messageHandler WarningHandler = nullptr; + DWARFLinkerBase::MessageHandlerTy WarningHandler = nullptr; }; -} // end namespace llvm +} // end of namespace classic +} // end of namespace dwarf_linker +} // end of namespace llvm -#endif // LLVM_DWARFLINKER_DWARFSTREAMER_H +#endif // LLVM_DWARFLINKER_CLASSIC_DWARFSTREAMER_H diff --git a/llvm/include/llvm/DWARFLinkerParallel/DWARFFile.h b/llvm/include/llvm/DWARFLinker/DWARFFile.h index c320530569bb..c1d0fd87c7d7 100644 --- a/llvm/include/llvm/DWARFLinkerParallel/DWARFFile.h +++ b/llvm/include/llvm/DWARFLinker/DWARFFile.h @@ -6,18 +6,17 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_DWARFLINKERPARALLEL_DWARFFILE_H -#define LLVM_DWARFLINKERPARALLEL_DWARFFILE_H +#ifndef LLVM_DWARFLINKER_DWARFFILE_H +#define LLVM_DWARFLINKER_DWARFFILE_H +#include "AddressesMap.h" #include "llvm/ADT/StringRef.h" -#include "llvm/DWARFLinkerParallel/AddressesMap.h" #include "llvm/DebugInfo/DWARF/DWARFContext.h" -#include "llvm/Support/Endian.h" #include <functional> #include <memory> namespace llvm { -namespace dwarflinker_parallel { +namespace dwarf_linker { /// This class represents DWARF information for source file /// and it's address map. @@ -29,7 +28,9 @@ public: DWARFFile(StringRef Name, std::unique_ptr<DWARFContext> Dwarf, std::unique_ptr<AddressesMap> Addresses, - UnloadCallbackTy UnloadFunc = nullptr); + UnloadCallbackTy UnloadFunc = nullptr) + : FileName(Name), Dwarf(std::move(Dwarf)), + Addresses(std::move(Addresses)), UnloadFunc(UnloadFunc) {} /// Object file name. StringRef FileName; @@ -53,7 +54,7 @@ public: } }; -} // end namespace dwarflinker_parallel +} // namespace dwarf_linker } // end namespace llvm -#endif // LLVM_DWARFLINKERPARALLEL_DWARFFILE_H +#endif // LLVM_DWARFLINKER_DWARFFILE_H diff --git a/llvm/include/llvm/DWARFLinker/DWARFLinkerBase.h b/llvm/include/llvm/DWARFLinker/DWARFLinkerBase.h new file mode 100644 index 000000000000..626fb53d90f9 --- /dev/null +++ b/llvm/include/llvm/DWARFLinker/DWARFLinkerBase.h @@ -0,0 +1,100 @@ +//===- DWARFLinkerBase.h ----------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DWARFLINKER_DWARFLINKERBASE_H +#define LLVM_DWARFLINKER_DWARFLINKERBASE_H +#include "AddressesMap.h" +#include "DWARFFile.h" +#include "llvm/ADT/AddressRanges.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/DebugInfo/DWARF/DWARFContext.h" +#include "llvm/DebugInfo/DWARF/DWARFDebugLine.h" +#include "llvm/DebugInfo/DWARF/DWARFDebugRangeList.h" +#include "llvm/DebugInfo/DWARF/DWARFDie.h" +#include "llvm/DebugInfo/DWARF/DWARFExpression.h" +#include <map> +namespace llvm { +class DWARFUnit; + +namespace dwarf_linker { + +/// The base interface for DWARFLinker implementations. +class DWARFLinkerBase { +public: + virtual ~DWARFLinkerBase() = default; + using MessageHandlerTy = std::function<void( + const Twine &Warning, StringRef Context, const DWARFDie *DIE)>; + using ObjFileLoaderTy = std::function<ErrorOr<DWARFFile &>( + StringRef ContainerName, StringRef Path)>; + using InputVerificationHandlerTy = + std::function<void(const DWARFFile &File, llvm::StringRef Output)>; + using ObjectPrefixMapTy = std::map<std::string, std::string>; + using CompileUnitHandlerTy = function_ref<void(const DWARFUnit &Unit)>; + using TranslatorFuncTy = std::function<StringRef(StringRef)>; + using SwiftInterfacesMapTy = std::map<std::string, std::string>; + /// Type of output file. + enum class OutputFileType : uint8_t { + Object, + Assembly, + }; + /// The kind of accelerator tables to be emitted. + enum class AccelTableKind : uint8_t { + Apple, ///< .apple_names, .apple_namespaces, .apple_types, .apple_objc. + Pub, ///< .debug_pubnames, .debug_pubtypes + DebugNames ///< .debug_names. + }; + /// Add an object file to be linked. Pre-load compile unit die. Call + /// \p OnCUDieLoaded for each compile unit die. If \p File has reference to + /// a Clang module and UpdateIndexTablesOnly == false then the module is be + /// pre-loaded by \p Loader. + /// + /// \pre a call to setNoODR(true) and/or setUpdateIndexTablesOnly(bool Update) + /// must be made when required. + virtual void addObjectFile( + DWARFFile &File, ObjFileLoaderTy Loader = nullptr, + CompileUnitHandlerTy OnCUDieLoaded = [](const DWARFUnit &) {}) = 0; + /// Link the debug info for all object files added through calls to + /// addObjectFile. + virtual Error link() = 0; + /// A number of methods setting various linking options: + /// Enable logging to standard output. + virtual void setVerbosity(bool Verbose) = 0; + /// Print statistics to standard output. + virtual void setStatistics(bool Statistics) = 0; + /// Verify the input DWARF. + virtual void setVerifyInputDWARF(bool Verify) = 0; + /// Do not unique types according to ODR. + virtual void setNoODR(bool NoODR) = 0; + /// Update index tables only (do not modify rest of DWARF). + virtual void setUpdateIndexTablesOnly(bool Update) = 0; + /// Allows generating non-deterministic output in exchange for more + /// parallelism. + virtual void setAllowNonDeterministicOutput(bool) = 0; + /// Set whether to keep the enclosing function for a static variable. + virtual void setKeepFunctionForStatic(bool KeepFunctionForStatic) = 0; + /// Use specified number of threads for parallel files linking. + virtual void setNumThreads(unsigned NumThreads) = 0; + /// Add kind of accelerator tables to be generated. + virtual void addAccelTableKind(AccelTableKind Kind) = 0; + /// Set prepend path for clang modules. + virtual void setPrependPath(StringRef Ppath) = 0; + /// Set estimated objects files amount, for preliminary data allocation. + virtual void setEstimatedObjfilesAmount(unsigned ObjFilesNum) = 0; + /// Set verification handler used to report verification errors. + virtual void + setInputVerificationHandler(InputVerificationHandlerTy Handler) = 0; + /// Set map for Swift interfaces. + virtual void setSwiftInterfacesMap(SwiftInterfacesMapTy *Map) = 0; + /// Set prefix map for objects. + virtual void setObjectPrefixMap(ObjectPrefixMapTy *Map) = 0; + /// Set target DWARF version. + virtual Error setTargetDWARFVersion(uint16_t TargetDWARFVersion) = 0; +}; +} // end namespace dwarf_linker +} // end namespace llvm +#endif // LLVM_DWARFLINKER_DWARFLINKERBASE_H diff --git a/llvm/include/llvm/DWARFLinkerParallel/DWARFLinker.h b/llvm/include/llvm/DWARFLinker/Parallel/DWARFLinker.h index c16c94d65c2f..c38a9906940e 100644 --- a/llvm/include/llvm/DWARFLinkerParallel/DWARFLinker.h +++ b/llvm/include/llvm/DWARFLinker/Parallel/DWARFLinker.h @@ -6,11 +6,12 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_DWARFLINKERPARALLEL_DWARFLINKER_H -#define LLVM_DWARFLINKERPARALLEL_DWARFLINKER_H +#ifndef LLVM_DWARFLINKER_PARALLEL_DWARFLINKER_H +#define LLVM_DWARFLINKER_PARALLEL_DWARFLINKER_H #include "llvm/CodeGen/AsmPrinter.h" -#include "llvm/DWARFLinkerParallel/DWARFFile.h" +#include "llvm/DWARFLinker/DWARFFile.h" +#include "llvm/DWARFLinker/DWARFLinkerBase.h" #include "llvm/DebugInfo/DWARF/DWARFContext.h" #include "llvm/DebugInfo/DWARF/DWARFDie.h" #include "llvm/MC/MCDwarf.h" @@ -85,7 +86,8 @@ /// namespace llvm { -namespace dwarflinker_parallel { +namespace dwarf_linker { +namespace parallel { /// ExtraDwarfEmitter allows adding extra data to the DWARFLinker output. /// The finish() method should be called after all extra data are emitted. @@ -111,31 +113,8 @@ public: virtual AsmPrinter &getAsmPrinter() const = 0; }; -class DWARFLinker { +class DWARFLinker : public DWARFLinkerBase { public: - /// Type of output file. - enum class OutputFileType { - Object, - Assembly, - }; - - /// The kind of accelerator tables we should emit. - enum class AccelTableKind : uint8_t { - Apple, ///< .apple_names, .apple_namespaces, .apple_types, .apple_objc. - Pub, ///< .debug_pubnames, .debug_pubtypes - DebugNames ///< .debug_names. - }; - - using MessageHandlerTy = std::function<void( - const Twine &Warning, StringRef Context, const DWARFDie *DIE)>; - using ObjFileLoaderTy = std::function<ErrorOr<DWARFFile &>( - StringRef ContainerName, StringRef Path)>; - using InputVerificationHandlerTy = std::function<void(const DWARFFile &File, llvm::StringRef Output)>; - using ObjectPrefixMapTy = std::map<std::string, std::string>; - using CompileUnitHandlerTy = function_ref<void(const DWARFUnit &Unit)>; - using TranslatorFuncTy = std::function<StringRef(StringRef)>; - using SwiftInterfacesMapTy = std::map<std::string, std::string>; - virtual ~DWARFLinker() = default; /// Creates dwarf linker instance. @@ -149,75 +128,10 @@ public: /// Returns previously created dwarf emitter. May be nullptr. virtual ExtraDwarfEmitter *getEmitter() = 0; - - /// Add object file to be linked. Pre-load compile unit die. Call - /// \p OnCUDieLoaded for each compile unit die. If specified \p File - /// has reference to the Clang module then such module would be - /// pre-loaded by \p Loader for !Update case. - /// - /// \pre NoODR, Update options should be set before call to addObjectFile. - virtual void addObjectFile( - DWARFFile &File, ObjFileLoaderTy Loader = nullptr, - CompileUnitHandlerTy OnCUDieLoaded = [](const DWARFUnit &) {}) = 0; - - /// Link debug info for added files. - virtual Error link() = 0; - - /// \defgroup Methods setting various linking options: - /// - /// @{ - - /// Allows to generate log of linking process to the standard output. - virtual void setVerbosity(bool Verbose) = 0; - - /// Print statistics to standard output. - virtual void setStatistics(bool Statistics) = 0; - - /// Verify the input DWARF. - virtual void setVerifyInputDWARF(bool Verify) = 0; - - /// Do not unique types according to ODR. - virtual void setNoODR(bool NoODR) = 0; - - /// Update index tables only(do not modify rest of DWARF). - virtual void setUpdateIndexTablesOnly(bool UpdateIndexTablesOnly) = 0; - - /// Allow generating valid, but non-deterministic output. - virtual void - setAllowNonDeterministicOutput(bool AllowNonDeterministicOutput) = 0; - - /// Set to keep the enclosing function for a static variable. - virtual void setKeepFunctionForStatic(bool KeepFunctionForStatic) = 0; - - /// Use specified number of threads for parallel files linking. - virtual void setNumThreads(unsigned NumThreads) = 0; - - /// Add kind of accelerator tables to be generated. - virtual void addAccelTableKind(AccelTableKind Kind) = 0; - - /// Set prepend path for clang modules. - virtual void setPrependPath(const std::string &Ppath) = 0; - - /// Set estimated objects files amount, for preliminary data allocation. - virtual void setEstimatedObjfilesAmount(unsigned ObjFilesNum) = 0; - - /// Set verification handler which would be used to report verification - /// errors. - virtual void - setInputVerificationHandler(InputVerificationHandlerTy Handler) = 0; - - /// Set map for Swift interfaces. - virtual void setSwiftInterfacesMap(SwiftInterfacesMapTy *Map) = 0; - - /// Set prefix map for objects. - virtual void setObjectPrefixMap(ObjectPrefixMapTy *Map) = 0; - - /// Set target DWARF version. - virtual Error setTargetDWARFVersion(uint16_t TargetDWARFVersion) = 0; - /// @} }; -} // end namespace dwarflinker_parallel -} // end namespace llvm +} // end of namespace parallel +} // end of namespace dwarf_linker +} // end of namespace llvm -#endif // LLVM_DWARFLINKERPARALLEL_DWARFLINKER_H +#endif // LLVM_DWARFLINKER_PARALLEL_DWARFLINKER_H diff --git a/llvm/include/llvm/DWARFLinkerParallel/StringPool.h b/llvm/include/llvm/DWARFLinker/StringPool.h index e55909f34311..d0f4e211fac3 100644 --- a/llvm/include/llvm/DWARFLinkerParallel/StringPool.h +++ b/llvm/include/llvm/DWARFLinker/StringPool.h @@ -6,8 +6,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_DWARFLINKERPARALLEL_STRINGPOOL_H -#define LLVM_DWARFLINKERPARALLEL_STRINGPOOL_H +#ifndef LLVM_DWARFLINKER_STRINGPOOL_H +#define LLVM_DWARFLINKER_STRINGPOOL_H #include "llvm/ADT/ConcurrentHashtable.h" #include "llvm/CodeGen/DwarfStringPoolEntry.h" @@ -16,7 +16,7 @@ #include <string_view> namespace llvm { -namespace dwarflinker_parallel { +namespace dwarf_linker { /// StringEntry keeps data of the string: the length, external offset /// and a string body which is placed right after StringEntry. @@ -41,35 +41,38 @@ public: /// \returns newly created object of KeyDataTy type. static inline StringEntry * - create(const StringRef &Key, parallel::PerThreadBumpPtrAllocator &Allocator) { + create(const StringRef &Key, + llvm::parallel::PerThreadBumpPtrAllocator &Allocator) { return StringEntry::create(Key, Allocator); } }; class StringPool : public ConcurrentHashTableByPtr<StringRef, StringEntry, - parallel::PerThreadBumpPtrAllocator, + llvm::parallel::PerThreadBumpPtrAllocator, StringPoolEntryInfo> { public: StringPool() : ConcurrentHashTableByPtr<StringRef, StringEntry, - parallel::PerThreadBumpPtrAllocator, + llvm::parallel::PerThreadBumpPtrAllocator, StringPoolEntryInfo>(Allocator) {} StringPool(size_t InitialSize) : ConcurrentHashTableByPtr<StringRef, StringEntry, - parallel::PerThreadBumpPtrAllocator, + llvm::parallel::PerThreadBumpPtrAllocator, StringPoolEntryInfo>(Allocator, InitialSize) {} - parallel::PerThreadBumpPtrAllocator &getAllocatorRef() { return Allocator; } + llvm::parallel::PerThreadBumpPtrAllocator &getAllocatorRef() { + return Allocator; + } void clear() { Allocator.Reset(); } private: - parallel::PerThreadBumpPtrAllocator Allocator; + llvm::parallel::PerThreadBumpPtrAllocator Allocator; }; -} // end of namespace dwarflinker_parallel +} // namespace dwarf_linker } // end namespace llvm -#endif // LLVM_DWARFLINKERPARALLEL_STRINGPOOL_H +#endif // LLVM_DWARFLINKER_STRINGPOOL_H diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugMacro.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugMacro.h index 6b1b2ae6d7e0..df862f60cb2f 100644 --- a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugMacro.h +++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugMacro.h @@ -18,11 +18,16 @@ namespace llvm { class raw_ostream; + +namespace dwarf_linker { +namespace classic { class DwarfStreamer; +} +} // namespace dwarf_linker class DWARFDebugMacro { - friend DwarfStreamer; - friend dwarflinker_parallel::CompileUnit; + friend dwarf_linker::classic::DwarfStreamer; + friend dwarf_linker::parallel::CompileUnit; /// DWARFv5 section 6.3.1 Macro Information Header. enum HeaderFlagMask { diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h index 7084081ce61a..f20e71781f46 100644 --- a/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h +++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h @@ -43,9 +43,11 @@ class DWARFObject; class raw_ostream; struct DIDumpOptions; struct DWARFSection; -namespace dwarflinker_parallel { +namespace dwarf_linker { +namespace parallel { class CompileUnit; } +} // namespace dwarf_linker /// Base class describing the header of any kind of "unit." Some information /// is specific to certain unit types. We separate this class out so we can @@ -256,7 +258,7 @@ class DWARFUnit { std::shared_ptr<DWARFUnit> DWO; protected: - friend dwarflinker_parallel::CompileUnit; + friend dwarf_linker::parallel::CompileUnit; /// Return the index of a \p Die entry inside the unit's DIE vector. /// diff --git a/llvm/include/llvm/Demangle/Demangle.h b/llvm/include/llvm/Demangle/Demangle.h index 70cfc1418f0c..fe129603c078 100644 --- a/llvm/include/llvm/Demangle/Demangle.h +++ b/llvm/include/llvm/Demangle/Demangle.h @@ -32,7 +32,7 @@ enum : int { /// Returns a non-NULL pointer to a NUL-terminated C style string /// that should be explicitly freed, if successful. Otherwise, may return /// nullptr if mangled_name is not a valid mangling or is nullptr. -char *itaniumDemangle(std::string_view mangled_name); +char *itaniumDemangle(std::string_view mangled_name, bool ParseParams = true); enum MSDemangleFlags { MSDF_None = 0, @@ -68,7 +68,8 @@ char *dlangDemangle(std::string_view MangledName); std::string demangle(std::string_view MangledName); bool nonMicrosoftDemangle(std::string_view MangledName, std::string &Result, - bool CanHaveLeadingDot = true); + bool CanHaveLeadingDot = true, + bool ParseParams = true); /// "Partial" demangler. This supports demangling a string into an AST /// (typically an intermediate stage in itaniumDemangle) and querying certain diff --git a/llvm/include/llvm/Demangle/ItaniumDemangle.h b/llvm/include/llvm/Demangle/ItaniumDemangle.h index e0ff035d47cf..06956f47c1f0 100644 --- a/llvm/include/llvm/Demangle/ItaniumDemangle.h +++ b/llvm/include/llvm/Demangle/ItaniumDemangle.h @@ -2793,7 +2793,7 @@ template <typename Derived, typename Alloc> struct AbstractManglingParser { Node *parseClassEnumType(); Node *parseQualifiedType(); - Node *parseEncoding(); + Node *parseEncoding(bool ParseParams = true); bool parseCallOffset(); Node *parseSpecialName(); @@ -2910,7 +2910,7 @@ template <typename Derived, typename Alloc> struct AbstractManglingParser { Node *parseDestructorName(); /// Top-level entry point into the parser. - Node *parse(); + Node *parse(bool ParseParams = true); }; const char* parse_discriminator(const char* first, const char* last); @@ -5404,7 +5404,7 @@ Node *AbstractManglingParser<Derived, Alloc>::parseSpecialName() { // ::= <data name> // ::= <special-name> template <typename Derived, typename Alloc> -Node *AbstractManglingParser<Derived, Alloc>::parseEncoding() { +Node *AbstractManglingParser<Derived, Alloc>::parseEncoding(bool ParseParams) { // The template parameters of an encoding are unrelated to those of the // enclosing context. SaveTemplateParams SaveTemplateParamsScope(this); @@ -5430,6 +5430,16 @@ Node *AbstractManglingParser<Derived, Alloc>::parseEncoding() { if (IsEndOfEncoding()) return Name; + // ParseParams may be false at the top level only, when called from parse(). + // For example in the mangled name _Z3fooILZ3BarEET_f, ParseParams may be + // false when demangling 3fooILZ3BarEET_f but is always true when demangling + // 3Bar. + if (!ParseParams) { + while (consume()) + ; + return Name; + } + Node *Attrs = nullptr; if (consumeIf("Ua9enable_ifI")) { size_t BeforeArgs = Names.size(); @@ -5894,9 +5904,9 @@ AbstractManglingParser<Derived, Alloc>::parseTemplateArgs(bool TagTemplates) { // extension ::= ___Z <encoding> _block_invoke<decimal-digit>+ // extension ::= ___Z <encoding> _block_invoke_<decimal-digit>+ template <typename Derived, typename Alloc> -Node *AbstractManglingParser<Derived, Alloc>::parse() { +Node *AbstractManglingParser<Derived, Alloc>::parse(bool ParseParams) { if (consumeIf("_Z") || consumeIf("__Z")) { - Node *Encoding = getDerived().parseEncoding(); + Node *Encoding = getDerived().parseEncoding(ParseParams); if (Encoding == nullptr) return nullptr; if (look() == '.') { @@ -5910,7 +5920,7 @@ Node *AbstractManglingParser<Derived, Alloc>::parse() { } if (consumeIf("___Z") || consumeIf("____Z")) { - Node *Encoding = getDerived().parseEncoding(); + Node *Encoding = getDerived().parseEncoding(ParseParams); if (Encoding == nullptr || !consumeIf("_block_invoke")) return nullptr; bool RequireNumber = consumeIf('_'); diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/JITLink.h b/llvm/include/llvm/ExecutionEngine/JITLink/JITLink.h index 8a019492c12d..f4d2f56c34d9 100644 --- a/llvm/include/llvm/ExecutionEngine/JITLink/JITLink.h +++ b/llvm/include/llvm/ExecutionEngine/JITLink/JITLink.h @@ -19,6 +19,7 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h" #include "llvm/ExecutionEngine/JITSymbol.h" +#include "llvm/ExecutionEngine/Orc/Core.h" #include "llvm/ExecutionEngine/Orc/Shared/ExecutorAddress.h" #include "llvm/ExecutionEngine/Orc/Shared/ExecutorSymbolDef.h" #include "llvm/ExecutionEngine/Orc/Shared/MemoryFlags.h" @@ -1923,6 +1924,10 @@ void visitExistingEdges(LinkGraph &G, VisitorTs &&...Vs) { Expected<std::unique_ptr<LinkGraph>> createLinkGraphFromObject(MemoryBufferRef ObjectBuffer); +/// Create a \c LinkGraph defining the given absolute symbols. +std::unique_ptr<LinkGraph> absoluteSymbolsLinkGraph(const Triple &TT, + orc::SymbolMap Symbols); + /// Link the given graph. void link(std::unique_ptr<LinkGraph> G, std::unique_ptr<JITLinkContext> Ctx); diff --git a/llvm/include/llvm/ExecutionEngine/Orc/Core.h b/llvm/include/llvm/ExecutionEngine/Orc/Core.h index ba164c6b629e..6a9bcf712169 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/Core.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/Core.h @@ -1210,14 +1210,13 @@ private: SymbolTableEntry() = default; SymbolTableEntry(JITSymbolFlags Flags) : Flags(Flags), State(static_cast<uint8_t>(SymbolState::NeverSearched)), - MaterializerAttached(false), PendingRemoval(false) {} + MaterializerAttached(false) {} ExecutorAddr getAddress() const { return Addr; } JITSymbolFlags getFlags() const { return Flags; } SymbolState getState() const { return static_cast<SymbolState>(State); } bool hasMaterializerAttached() const { return MaterializerAttached; } - bool isPendingRemoval() const { return PendingRemoval; } void setAddress(ExecutorAddr Addr) { this->Addr = Addr; } void setFlags(JITSymbolFlags Flags) { this->Flags = Flags; } @@ -1231,18 +1230,13 @@ private: this->MaterializerAttached = MaterializerAttached; } - void setPendingRemoval(bool PendingRemoval) { - this->PendingRemoval = PendingRemoval; - } - ExecutorSymbolDef getSymbol() const { return {Addr, Flags}; } private: ExecutorAddr Addr; JITSymbolFlags Flags; - uint8_t State : 6; + uint8_t State : 7; uint8_t MaterializerAttached : 1; - uint8_t PendingRemoval : 1; }; using SymbolTable = DenseMap<SymbolStringPtr, SymbolTableEntry>; diff --git a/llvm/include/llvm/ExecutionEngine/Orc/EPCDynamicLibrarySearchGenerator.h b/llvm/include/llvm/ExecutionEngine/Orc/EPCDynamicLibrarySearchGenerator.h index 63797edec89e..e56afe4fe656 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/EPCDynamicLibrarySearchGenerator.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/EPCDynamicLibrarySearchGenerator.h @@ -25,6 +25,7 @@ class ExecutorProcessControl; class EPCDynamicLibrarySearchGenerator : public DefinitionGenerator { public: using SymbolPredicate = unique_function<bool(const SymbolStringPtr &)>; + using AddAbsoluteSymbolsFn = unique_function<Error(JITDylib &, SymbolMap)>; /// Create a DynamicLibrarySearchGenerator that searches for symbols in the /// library with the given handle. @@ -32,24 +33,31 @@ public: /// If the Allow predicate is given then only symbols matching the predicate /// will be searched for. If the predicate is not given then all symbols will /// be searched for. - EPCDynamicLibrarySearchGenerator(ExecutionSession &ES, - tpctypes::DylibHandle H, - SymbolPredicate Allow = SymbolPredicate()) - : EPC(ES.getExecutorProcessControl()), H(H), Allow(std::move(Allow)) {} + /// + /// If \p AddAbsoluteSymbols is provided, it is used to add the symbols to the + /// \c JITDylib; otherwise it uses JD.define(absoluteSymbols(...)). + EPCDynamicLibrarySearchGenerator( + ExecutionSession &ES, tpctypes::DylibHandle H, + SymbolPredicate Allow = SymbolPredicate(), + AddAbsoluteSymbolsFn AddAbsoluteSymbols = nullptr) + : EPC(ES.getExecutorProcessControl()), H(H), Allow(std::move(Allow)), + AddAbsoluteSymbols(std::move(AddAbsoluteSymbols)) {} /// Permanently loads the library at the given path and, on success, returns /// a DynamicLibrarySearchGenerator that will search it for symbol definitions /// in the library. On failure returns the reason the library failed to load. static Expected<std::unique_ptr<EPCDynamicLibrarySearchGenerator>> Load(ExecutionSession &ES, const char *LibraryPath, - SymbolPredicate Allow = SymbolPredicate()); + SymbolPredicate Allow = SymbolPredicate(), + AddAbsoluteSymbolsFn AddAbsoluteSymbols = nullptr); /// Creates a EPCDynamicLibrarySearchGenerator that searches for symbols in /// the target process. static Expected<std::unique_ptr<EPCDynamicLibrarySearchGenerator>> GetForTargetProcess(ExecutionSession &ES, - SymbolPredicate Allow = SymbolPredicate()) { - return Load(ES, nullptr, std::move(Allow)); + SymbolPredicate Allow = SymbolPredicate(), + AddAbsoluteSymbolsFn AddAbsoluteSymbols = nullptr) { + return Load(ES, nullptr, std::move(Allow), std::move(AddAbsoluteSymbols)); } Error tryToGenerate(LookupState &LS, LookupKind K, JITDylib &JD, @@ -60,6 +68,7 @@ private: ExecutorProcessControl &EPC; tpctypes::DylibHandle H; SymbolPredicate Allow; + AddAbsoluteSymbolsFn AddAbsoluteSymbols; }; } // end namespace orc diff --git a/llvm/include/llvm/ExecutionEngine/Orc/EPCGenericDylibManager.h b/llvm/include/llvm/ExecutionEngine/Orc/EPCGenericDylibManager.h index 02e580c86f54..6ee2deef04d0 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/EPCGenericDylibManager.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/EPCGenericDylibManager.h @@ -19,6 +19,7 @@ #define LLVM_EXECUTIONENGINE_ORC_EPCGENERICDYLIBMANAGER_H #include "llvm/ExecutionEngine/Orc/ExecutorProcessControl.h" +#include "llvm/ExecutionEngine/Orc/Shared/ExecutorSymbolDef.h" #include "llvm/ExecutionEngine/Orc/Shared/SimpleRemoteEPCUtils.h" namespace llvm { @@ -49,11 +50,11 @@ public: Expected<tpctypes::DylibHandle> open(StringRef Path, uint64_t Mode); /// Looks up symbols within the given dylib. - Expected<std::vector<ExecutorAddr>> lookup(tpctypes::DylibHandle H, - const SymbolLookupSet &Lookup); + Expected<std::vector<ExecutorSymbolDef>> + lookup(tpctypes::DylibHandle H, const SymbolLookupSet &Lookup); /// Looks up symbols within the given dylib. - Expected<std::vector<ExecutorAddr>> + Expected<std::vector<ExecutorSymbolDef>> lookup(tpctypes::DylibHandle H, const RemoteSymbolLookupSet &Lookup); private: diff --git a/llvm/include/llvm/ExecutionEngine/Orc/ExecutionUtils.h b/llvm/include/llvm/ExecutionEngine/Orc/ExecutionUtils.h index 6a43376a5bd9..f7c286bec778 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/ExecutionUtils.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/ExecutionUtils.h @@ -216,6 +216,7 @@ private: class DynamicLibrarySearchGenerator : public DefinitionGenerator { public: using SymbolPredicate = std::function<bool(const SymbolStringPtr &)>; + using AddAbsoluteSymbolsFn = unique_function<Error(JITDylib &, SymbolMap)>; /// Create a DynamicLibrarySearchGenerator that searches for symbols in the /// given sys::DynamicLibrary. @@ -223,22 +224,30 @@ public: /// If the Allow predicate is given then only symbols matching the predicate /// will be searched for. If the predicate is not given then all symbols will /// be searched for. - DynamicLibrarySearchGenerator(sys::DynamicLibrary Dylib, char GlobalPrefix, - SymbolPredicate Allow = SymbolPredicate()); + /// + /// If \p AddAbsoluteSymbols is provided, it is used to add the symbols to the + /// \c JITDylib; otherwise it uses JD.define(absoluteSymbols(...)). + DynamicLibrarySearchGenerator( + sys::DynamicLibrary Dylib, char GlobalPrefix, + SymbolPredicate Allow = SymbolPredicate(), + AddAbsoluteSymbolsFn AddAbsoluteSymbols = nullptr); /// Permanently loads the library at the given path and, on success, returns /// a DynamicLibrarySearchGenerator that will search it for symbol definitions /// in the library. On failure returns the reason the library failed to load. static Expected<std::unique_ptr<DynamicLibrarySearchGenerator>> Load(const char *FileName, char GlobalPrefix, - SymbolPredicate Allow = SymbolPredicate()); + SymbolPredicate Allow = SymbolPredicate(), + AddAbsoluteSymbolsFn AddAbsoluteSymbols = nullptr); /// Creates a DynamicLibrarySearchGenerator that searches for symbols in /// the current process. static Expected<std::unique_ptr<DynamicLibrarySearchGenerator>> GetForCurrentProcess(char GlobalPrefix, - SymbolPredicate Allow = SymbolPredicate()) { - return Load(nullptr, GlobalPrefix, std::move(Allow)); + SymbolPredicate Allow = SymbolPredicate(), + AddAbsoluteSymbolsFn AddAbsoluteSymbols = nullptr) { + return Load(nullptr, GlobalPrefix, std::move(Allow), + std::move(AddAbsoluteSymbols)); } Error tryToGenerate(LookupState &LS, LookupKind K, JITDylib &JD, @@ -248,6 +257,7 @@ public: private: sys::DynamicLibrary Dylib; SymbolPredicate Allow; + AddAbsoluteSymbolsFn AddAbsoluteSymbols; char GlobalPrefix; }; diff --git a/llvm/include/llvm/ExecutionEngine/Orc/Shared/ExecutorSymbolDef.h b/llvm/include/llvm/ExecutionEngine/Orc/Shared/ExecutorSymbolDef.h index 5c58a7255ebd..68ccdf83bd12 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/Shared/ExecutorSymbolDef.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/Shared/ExecutorSymbolDef.h @@ -15,6 +15,7 @@ #include "llvm/ExecutionEngine/JITSymbol.h" #include "llvm/ExecutionEngine/Orc/Shared/ExecutorAddress.h" +#include "llvm/ExecutionEngine/Orc/Shared/SimplePackedSerialization.h" namespace llvm { namespace orc { @@ -48,6 +49,63 @@ private: JITSymbolFlags Flags; }; +namespace shared { + +using SPSJITSymbolFlags = + SPSTuple<JITSymbolFlags::UnderlyingType, JITSymbolFlags::TargetFlagsType>; + +/// SPS serializatior for JITSymbolFlags. +template <> class SPSSerializationTraits<SPSJITSymbolFlags, JITSymbolFlags> { + using FlagsArgList = SPSJITSymbolFlags::AsArgList; + +public: + static size_t size(const JITSymbolFlags &F) { + return FlagsArgList::size(F.getRawFlagsValue(), F.getTargetFlags()); + } + + static bool serialize(SPSOutputBuffer &BOB, const JITSymbolFlags &F) { + return FlagsArgList::serialize(BOB, F.getRawFlagsValue(), + F.getTargetFlags()); + } + + static bool deserialize(SPSInputBuffer &BIB, JITSymbolFlags &F) { + JITSymbolFlags::UnderlyingType RawFlags; + JITSymbolFlags::TargetFlagsType TargetFlags; + if (!FlagsArgList::deserialize(BIB, RawFlags, TargetFlags)) + return false; + F = JITSymbolFlags{static_cast<JITSymbolFlags::FlagNames>(RawFlags), + TargetFlags}; + return true; + } +}; + +using SPSExecutorSymbolDef = SPSTuple<SPSExecutorAddr, SPSJITSymbolFlags>; + +/// SPS serializatior for ExecutorSymbolDef. +template <> +class SPSSerializationTraits<SPSExecutorSymbolDef, ExecutorSymbolDef> { + using DefArgList = SPSExecutorSymbolDef::AsArgList; + +public: + static size_t size(const ExecutorSymbolDef &ESD) { + return DefArgList::size(ESD.getAddress(), ESD.getFlags()); + } + + static bool serialize(SPSOutputBuffer &BOB, const ExecutorSymbolDef &ESD) { + return DefArgList::serialize(BOB, ESD.getAddress(), ESD.getFlags()); + } + + static bool deserialize(SPSInputBuffer &BIB, ExecutorSymbolDef &ESD) { + ExecutorAddr Addr; + JITSymbolFlags Flags; + if (!DefArgList::deserialize(BIB, Addr, Flags)) + return false; + ESD = ExecutorSymbolDef{Addr, Flags}; + return true; + } +}; + +} // End namespace shared. } // End namespace orc. } // End namespace llvm. diff --git a/llvm/include/llvm/ExecutionEngine/Orc/Shared/OrcRTBridge.h b/llvm/include/llvm/ExecutionEngine/Orc/Shared/OrcRTBridge.h index 8e336ca03eaf..0c549bcbf013 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/Shared/OrcRTBridge.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/Shared/OrcRTBridge.h @@ -14,6 +14,7 @@ #define LLVM_EXECUTIONENGINE_ORC_SHARED_ORCRTBRIDGE_H #include "llvm/ExecutionEngine/Orc/Shared/ExecutorAddress.h" +#include "llvm/ExecutionEngine/Orc/Shared/ExecutorSymbolDef.h" #include "llvm/ExecutionEngine/Orc/Shared/SimpleRemoteEPCUtils.h" #include "llvm/ExecutionEngine/Orc/Shared/TargetProcessControlTypes.h" @@ -54,7 +55,7 @@ using SPSSimpleExecutorDylibManagerOpenSignature = shared::SPSString, uint64_t); using SPSSimpleExecutorDylibManagerLookupSignature = - shared::SPSExpected<shared::SPSSequence<shared::SPSExecutorAddr>>( + shared::SPSExpected<shared::SPSSequence<shared::SPSExecutorSymbolDef>>( shared::SPSExecutorAddr, shared::SPSExecutorAddr, shared::SPSRemoteSymbolLookupSet); diff --git a/llvm/include/llvm/ExecutionEngine/Orc/Shared/TargetProcessControlTypes.h b/llvm/include/llvm/ExecutionEngine/Orc/Shared/TargetProcessControlTypes.h index 7322674559c9..e91d8d926d88 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/Shared/TargetProcessControlTypes.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/Shared/TargetProcessControlTypes.h @@ -19,6 +19,7 @@ #include "llvm/ExecutionEngine/JITSymbol.h" #include "llvm/ExecutionEngine/Orc/Shared/AllocationActions.h" #include "llvm/ExecutionEngine/Orc/Shared/ExecutorAddress.h" +#include "llvm/ExecutionEngine/Orc/Shared/ExecutorSymbolDef.h" #include "llvm/ExecutionEngine/Orc/Shared/MemoryFlags.h" #include "llvm/ExecutionEngine/Orc/Shared/SimplePackedSerialization.h" #include "llvm/ExecutionEngine/Orc/Shared/WrapperFunctionUtils.h" @@ -113,7 +114,7 @@ struct PointerWrite { /// A handle used to represent a loaded dylib in the target process. using DylibHandle = ExecutorAddr; -using LookupResult = std::vector<ExecutorAddr>; +using LookupResult = std::vector<ExecutorSymbolDef>; } // end namespace tpctypes diff --git a/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/JITLoaderGDB.h b/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/JITLoaderGDB.h index 99175d796974..9f91a64e95ce 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/JITLoaderGDB.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/JITLoaderGDB.h @@ -16,6 +16,32 @@ #include "llvm/ExecutionEngine/Orc/Shared/WrapperFunctionUtils.h" #include <cstdint> +// Keep in sync with gdb/gdb/jit.h +extern "C" { + +typedef enum { + JIT_NOACTION = 0, + JIT_REGISTER_FN, + JIT_UNREGISTER_FN +} jit_actions_t; + +struct jit_code_entry { + struct jit_code_entry *next_entry; + struct jit_code_entry *prev_entry; + const char *symfile_addr; + uint64_t symfile_size; +}; + +struct jit_descriptor { + uint32_t version; + // This should be jit_actions_t, but we want to be specific about the + // bit-width. + uint32_t action_flag; + struct jit_code_entry *relevant_entry; + struct jit_code_entry *first_entry; +}; +} + extern "C" llvm::orc::shared::CWrapperFunctionResult llvm_orc_registerJITLoaderGDBWrapper(const char *Data, uint64_t Size); diff --git a/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/SimpleExecutorDylibManager.h b/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/SimpleExecutorDylibManager.h index fd4504cfb7fb..00fd84e3ec14 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/SimpleExecutorDylibManager.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/SimpleExecutorDylibManager.h @@ -18,6 +18,7 @@ #include "llvm/ADT/DenseSet.h" #include "llvm/ExecutionEngine/Orc/Shared/ExecutorAddress.h" +#include "llvm/ExecutionEngine/Orc/Shared/ExecutorSymbolDef.h" #include "llvm/ExecutionEngine/Orc/Shared/SimpleRemoteEPCUtils.h" #include "llvm/ExecutionEngine/Orc/Shared/TargetProcessControlTypes.h" #include "llvm/ExecutionEngine/Orc/Shared/WrapperFunctionUtils.h" @@ -37,8 +38,8 @@ public: virtual ~SimpleExecutorDylibManager(); Expected<tpctypes::DylibHandle> open(const std::string &Path, uint64_t Mode); - Expected<std::vector<ExecutorAddr>> lookup(tpctypes::DylibHandle H, - const RemoteSymbolLookupSet &L); + Expected<std::vector<ExecutorSymbolDef>> + lookup(tpctypes::DylibHandle H, const RemoteSymbolLookupSet &L); Error shutdown() override; void addBootstrapSymbols(StringMap<ExecutorAddr> &M) override; diff --git a/llvm/include/llvm/IR/Dominators.h b/llvm/include/llvm/IR/Dominators.h index 8784a425d284..42db4c4ea3a5 100644 --- a/llvm/include/llvm/IR/Dominators.h +++ b/llvm/include/llvm/IR/Dominators.h @@ -293,11 +293,14 @@ public: explicit DominatorTreePrinterPass(raw_ostream &OS); PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); + + static bool isRequired() { return true; } }; /// Verifier pass for the \c DominatorTree. struct DominatorTreeVerifierPass : PassInfoMixin<DominatorTreeVerifierPass> { PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); + static bool isRequired() { return true; } }; /// Enables verification of dominator trees. diff --git a/llvm/include/llvm/IR/GetElementPtrTypeIterator.h b/llvm/include/llvm/IR/GetElementPtrTypeIterator.h index f3272327c3f8..1092b636e023 100644 --- a/llvm/include/llvm/IR/GetElementPtrTypeIterator.h +++ b/llvm/include/llvm/IR/GetElementPtrTypeIterator.h @@ -16,6 +16,7 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/PointerUnion.h" +#include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Operator.h" #include "llvm/IR/User.h" @@ -30,7 +31,39 @@ template <typename ItTy = User::const_op_iterator> class generic_gep_type_iterator { ItTy OpIt; - PointerUnion<StructType *, Type *> CurTy; + // We use two different mechanisms to store the type a GEP index applies to. + // In some cases, we need to know the outer aggregate type the index is + // applied within, e.g. a struct. In such cases, we store the aggregate type + // in the iterator, and derive the element type on the fly. + // + // However, this is not always possible, because for the outermost index there + // is no containing type. In such cases, or if the containing type is not + // relevant, e.g. for arrays, the element type is stored as Type* in CurTy. + // + // If CurTy contains a Type* value, this does not imply anything about the + // type itself, because it is the element type and not the outer type. + // In particular, Type* can be a struct type. + // + // Consider this example: + // + // %my.struct = type { i32, [ 4 x float ] } + // [...] + // %gep = getelementptr %my.struct, ptr %ptr, i32 10, i32 1, 32 3 + // + // Iterating over the indices of this GEP, CurTy will contain the following + // values: + // * i32 10: The outer index always operates on the GEP value type. + // CurTy contains a Type* pointing at `%my.struct`. + // * i32 1: This index is within a struct. + // CurTy contains a StructType* pointing at `%my.struct`. + // * i32 3: This index is within an array. We reuse the "flat" indexing + // for arrays which is also used in the top level GEP index. + // CurTy contains a Type* pointing at `float`. + // + // Vectors are handled separately because the layout of vectors is different + // for overaligned elements: Vectors are always bit-packed, whereas arrays + // respect ABI alignment of the elements. + PointerUnion<StructType *, VectorType *, Type *> CurTy; generic_gep_type_iterator() = default; @@ -69,6 +102,8 @@ public: Type *getIndexedType() const { if (auto *T = dyn_cast_if_present<Type *>(CurTy)) return T; + if (auto *VT = dyn_cast_if_present<VectorType *>(CurTy)) + return VT->getElementType(); return cast<StructType *>(CurTy)->getTypeAtIndex(getOperand()); } @@ -79,7 +114,7 @@ public: if (auto *ATy = dyn_cast<ArrayType>(Ty)) CurTy = ATy->getElementType(); else if (auto *VTy = dyn_cast<VectorType>(Ty)) - CurTy = VTy->getElementType(); + CurTy = VTy; else CurTy = dyn_cast<StructType>(Ty); ++OpIt; @@ -108,7 +143,23 @@ public: // that. bool isStruct() const { return isa<StructType *>(CurTy); } - bool isSequential() const { return isa<Type *>(CurTy); } + bool isVector() const { return isa<VectorType *>(CurTy); } + bool isSequential() const { return !isStruct(); } + + // For sequential GEP indices (all except those into structs), the index value + // can be translated into a byte offset by multiplying with an element stride. + // This function returns this stride, which both depends on the element type, + // and the containing aggregate type, as vectors always tightly bit-pack their + // elements. + TypeSize getSequentialElementStride(const DataLayout &DL) const { + assert(isSequential()); + Type *ElemTy = getIndexedType(); + if (isVector()) { + assert(DL.typeSizeEqualsStoreSize(ElemTy) && "Not byte-addressable"); + return DL.getTypeStoreSize(ElemTy); + } + return DL.getTypeAllocSize(ElemTy); + } StructType *getStructType() const { return cast<StructType *>(CurTy); } diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td index 531b11123545..e5596258847f 100644 --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -837,7 +837,7 @@ class AMDGPUImageDimIntrinsicEval<AMDGPUDimProfile P_> { // All dimension-aware intrinsics are derived from this class. class AMDGPUImageDimIntrinsic<AMDGPUDimProfile P_, list<IntrinsicProperty> props, - list<SDNodeProperty> sdnodeprops> : DefaultAttrsIntrinsic< + list<SDNodeProperty> sdnodeprops> : Intrinsic< P_.RetTypes, // vdata(VGPR) -- for load/atomic-with-return !listconcat( !foreach(arg, P_.DataArgs, arg.Type), // vdata(VGPR) -- for store/atomic @@ -851,11 +851,12 @@ class AMDGPUImageDimIntrinsic<AMDGPUDimProfile P_, // gfx12+ imm: bits [0-2] = th, bits [3-4] = scope) // TODO-GFX12: Update all other cachepolicy descriptions. - !listconcat(props, + !listconcat(props, [IntrNoCallback, IntrNoFree, IntrWillReturn], !if(P_.IsAtomic, [], [ImmArg<ArgIndex<AMDGPUImageDimIntrinsicEval<P_>.DmaskArgIndex>>]), !if(P_.IsSample, [ImmArg<ArgIndex<AMDGPUImageDimIntrinsicEval<P_>.UnormArgIndex>>], []), [ImmArg<ArgIndex<AMDGPUImageDimIntrinsicEval<P_>.TexFailCtrlArgIndex>>, - ImmArg<ArgIndex<AMDGPUImageDimIntrinsicEval<P_>.CachePolicyArgIndex>>]), + ImmArg<ArgIndex<AMDGPUImageDimIntrinsicEval<P_>.CachePolicyArgIndex>>], + !if(P_.IsAtomic, [], [IntrNoSync])), "", sdnodeprops>, diff --git a/llvm/include/llvm/IR/IntrinsicsSPIRV.td b/llvm/include/llvm/IR/IntrinsicsSPIRV.td index 736be8ca3212..ea0074d22a44 100644 --- a/llvm/include/llvm/IR/IntrinsicsSPIRV.td +++ b/llvm/include/llvm/IR/IntrinsicsSPIRV.td @@ -28,6 +28,7 @@ let TargetPrefix = "spv" in { def int_spv_insertelt : Intrinsic<[llvm_any_ty], [llvm_any_ty, llvm_any_ty, llvm_anyint_ty]>; def int_spv_const_composite : Intrinsic<[llvm_i32_ty], [llvm_vararg_ty]>; def int_spv_bitcast : Intrinsic<[llvm_any_ty], [llvm_any_ty]>; + def int_spv_ptrcast : Intrinsic<[llvm_any_ty], [llvm_any_ty, llvm_metadata_ty, llvm_i32_ty], [ImmArg<ArgIndex<2>>]>; def int_spv_switch : Intrinsic<[], [llvm_any_ty, llvm_vararg_ty]>; def int_spv_cmpxchg : Intrinsic<[llvm_i32_ty], [llvm_any_ty, llvm_vararg_ty]>; def int_spv_unreachable : Intrinsic<[], []>; diff --git a/llvm/include/llvm/IR/PatternMatch.h b/llvm/include/llvm/IR/PatternMatch.h index 48afdb867ba6..447ac0f2aa61 100644 --- a/llvm/include/llvm/IR/PatternMatch.h +++ b/llvm/include/llvm/IR/PatternMatch.h @@ -1270,7 +1270,7 @@ inline DisjointOr_match<LHS, RHS, true> m_c_DisjointOr(const LHS &L, return DisjointOr_match<LHS, RHS, true>(L, R); } -/// Match either "and" or "or disjoint". +/// Match either "add" or "or disjoint". template <typename LHS, typename RHS> inline match_combine_or<BinaryOp_match<LHS, RHS, Instruction::Add>, DisjointOr_match<LHS, RHS>> diff --git a/llvm/include/llvm/IR/SafepointIRVerifier.h b/llvm/include/llvm/IR/SafepointIRVerifier.h index 246d236adb38..2ee998e4c68f 100644 --- a/llvm/include/llvm/IR/SafepointIRVerifier.h +++ b/llvm/include/llvm/IR/SafepointIRVerifier.h @@ -40,6 +40,8 @@ public: explicit SafepointIRVerifierPass() = default; PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); + + static bool isRequired() { return true; } }; } diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h index 46b1e95c3c15..3db639a68724 100644 --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -54,7 +54,7 @@ void initializeAssignmentTrackingAnalysisPass(PassRegistry &); void initializeAssumptionCacheTrackerPass(PassRegistry&); void initializeAtomicExpandPass(PassRegistry&); void initializeBasicBlockPathCloningPass(PassRegistry &); -void initializeBasicBlockSectionsProfileReaderPass(PassRegistry &); +void initializeBasicBlockSectionsProfileReaderWrapperPassPass(PassRegistry &); void initializeBasicBlockSectionsPass(PassRegistry &); void initializeBarrierNoopPass(PassRegistry&); void initializeBasicAAWrapperPassPass(PassRegistry&); @@ -75,7 +75,7 @@ void initializeCallGraphDOTPrinterPass(PassRegistry&); void initializeCallGraphViewerPass(PassRegistry&); void initializeCallGraphWrapperPassPass(PassRegistry&); void initializeCheckDebugMachineModulePass(PassRegistry &); -void initializeCodeGenPreparePass(PassRegistry&); +void initializeCodeGenPrepareLegacyPassPass(PassRegistry &); void initializeComplexDeinterleavingLegacyPassPass(PassRegistry&); void initializeConstantHoistingLegacyPassPass(PassRegistry&); void initializeCycleInfoWrapperPassPass(PassRegistry &); diff --git a/llvm/include/llvm/LTO/LTO.h b/llvm/include/llvm/LTO/LTO.h index be85c4098347..1050f24161fb 100644 --- a/llvm/include/llvm/LTO/LTO.h +++ b/llvm/include/llvm/LTO/LTO.h @@ -404,7 +404,9 @@ private: }; // Global mapping from mangled symbol names to resolutions. - StringMap<GlobalResolution> GlobalResolutions; + // Make this an optional to guard against accessing after it has been reset + // (to reduce memory after we're done with it). + std::optional<StringMap<GlobalResolution>> GlobalResolutions; void addModuleToGlobalRes(ArrayRef<InputFile::Symbol> Syms, ArrayRef<SymbolResolution> Res, unsigned Partition, diff --git a/llvm/include/llvm/LinkAllPasses.h b/llvm/include/llvm/LinkAllPasses.h index 7a21876e565a..fe7fedad18bc 100644 --- a/llvm/include/llvm/LinkAllPasses.h +++ b/llvm/include/llvm/LinkAllPasses.h @@ -113,7 +113,7 @@ namespace { (void) llvm::createTailCallEliminationPass(); (void)llvm::createTLSVariableHoistPass(); (void) llvm::createConstantHoistingPass(); - (void) llvm::createCodeGenPreparePass(); + (void)llvm::createCodeGenPrepareLegacyPass(); (void) llvm::createEarlyCSEPass(); (void) llvm::createGVNPass(); (void) llvm::createPostDomTree(); diff --git a/llvm/include/llvm/MC/MCAsmBackend.h b/llvm/include/llvm/MC/MCAsmBackend.h index 8931e8cab2fa..01a64fb425a9 100644 --- a/llvm/include/llvm/MC/MCAsmBackend.h +++ b/llvm/include/llvm/MC/MCAsmBackend.h @@ -198,9 +198,9 @@ public: // Defined by linker relaxation targets to possibly emit LEB128 relocations // and set Value at the relocated location. - virtual bool relaxLEB128(MCLEBFragment &LF, MCAsmLayout &Layout, - int64_t &Value) const { - return false; + virtual std::pair<bool, bool> + relaxLEB128(MCLEBFragment &LF, MCAsmLayout &Layout, int64_t &Value) const { + return std::make_pair(false, false); } /// @} diff --git a/llvm/include/llvm/Object/ELFObjectFile.h b/llvm/include/llvm/Object/ELFObjectFile.h index 99477644de4d..86f42654f8af 100644 --- a/llvm/include/llvm/Object/ELFObjectFile.h +++ b/llvm/include/llvm/Object/ELFObjectFile.h @@ -454,6 +454,7 @@ public: uint8_t getBytesInAddress() const override; StringRef getFileFormatName() const override; Triple::ArchType getArch() const override; + Triple::OSType getOS() const override; Expected<uint64_t> getStartAddress() const override; unsigned getPlatformFlags() const override { return EF.getHeader().e_flags; } @@ -1349,6 +1350,12 @@ template <class ELFT> Triple::ArchType ELFObjectFile<ELFT>::getArch() const { return Triple::UnknownArch; } + case ELF::EM_CUDA: { + if (EF.getHeader().e_ident[ELF::EI_CLASS] == ELF::ELFCLASS32) + return Triple::nvptx; + return Triple::nvptx64; + } + case ELF::EM_BPF: return IsLittleEndian ? Triple::bpfel : Triple::bpfeb; @@ -1375,6 +1382,35 @@ template <class ELFT> Triple::ArchType ELFObjectFile<ELFT>::getArch() const { } } +template <class ELFT> Triple::OSType ELFObjectFile<ELFT>::getOS() const { + switch (EF.getHeader().e_ident[ELF::EI_OSABI]) { + case ELF::ELFOSABI_NETBSD: + return Triple::NetBSD; + case ELF::ELFOSABI_LINUX: + return Triple::Linux; + case ELF::ELFOSABI_HURD: + return Triple::Hurd; + case ELF::ELFOSABI_SOLARIS: + return Triple::Solaris; + case ELF::ELFOSABI_AIX: + return Triple::AIX; + case ELF::ELFOSABI_FREEBSD: + return Triple::FreeBSD; + case ELF::ELFOSABI_OPENBSD: + return Triple::OpenBSD; + case ELF::ELFOSABI_CUDA: + return Triple::CUDA; + case ELF::ELFOSABI_AMDGPU_HSA: + return Triple::AMDHSA; + case ELF::ELFOSABI_AMDGPU_PAL: + return Triple::AMDPAL; + case ELF::ELFOSABI_AMDGPU_MESA3D: + return Triple::Mesa3D; + default: + return Triple::UnknownOS; + } +} + template <class ELFT> Expected<uint64_t> ELFObjectFile<ELFT>::getStartAddress() const { return EF.getHeader().e_entry; diff --git a/llvm/include/llvm/Object/ELFTypes.h b/llvm/include/llvm/Object/ELFTypes.h index d3351a2d1650..956f7811dd6c 100644 --- a/llvm/include/llvm/Object/ELFTypes.h +++ b/llvm/include/llvm/Object/ELFTypes.h @@ -885,6 +885,9 @@ struct PGOAnalysisMap { bool BBFreq : 1; bool BrProb : 1; + // True if at least one feature is enabled + bool anyEnabled() const { return FuncEntryCount || BBFreq || BrProb; } + // Encodes to minimum bit width representation. uint8_t encode() const { return (static_cast<uint8_t>(FuncEntryCount) << 0) | diff --git a/llvm/include/llvm/Object/ObjectFile.h b/llvm/include/llvm/Object/ObjectFile.h index c254fc2ccfde..8c868c7643ed 100644 --- a/llvm/include/llvm/Object/ObjectFile.h +++ b/llvm/include/llvm/Object/ObjectFile.h @@ -337,6 +337,7 @@ public: virtual StringRef getFileFormatName() const = 0; virtual Triple::ArchType getArch() const = 0; + virtual Triple::OSType getOS() const { return Triple::UnknownOS; } virtual Expected<SubtargetFeatures> getFeatures() const = 0; virtual std::optional<StringRef> tryGetCPUName() const { return std::nullopt; diff --git a/llvm/include/llvm/Passes/StandardInstrumentations.h b/llvm/include/llvm/Passes/StandardInstrumentations.h index 2ec36cad244f..8c6a44876d54 100644 --- a/llvm/include/llvm/Passes/StandardInstrumentations.h +++ b/llvm/include/llvm/Passes/StandardInstrumentations.h @@ -65,7 +65,7 @@ private: bool shouldPrintBeforePass(StringRef PassID); bool shouldPrintAfterPass(StringRef PassID); bool shouldPrintPassNumbers(); - bool shouldPrintAtPassNumber(); + bool shouldPrintBeforePassNumber(); void pushPassRunDescriptor(StringRef PassID, Any IR, std::string &DumpIRFilename); diff --git a/llvm/include/llvm/Support/RISCVISAInfo.h b/llvm/include/llvm/Support/RISCVISAInfo.h index 09c4edd6df60..c539448683d3 100644 --- a/llvm/include/llvm/Support/RISCVISAInfo.h +++ b/llvm/include/llvm/Support/RISCVISAInfo.h @@ -68,9 +68,8 @@ public: parseFeatures(unsigned XLen, const std::vector<std::string> &Features); /// Convert RISC-V ISA info to a feature vector. - void toFeatures(std::vector<StringRef> &Features, - llvm::function_ref<StringRef(const Twine &)> StrAlloc, - bool AddAllExtensions) const; + std::vector<std::string> toFeatures(bool AddAllExtensions = false, + bool IgnoreUnknown = true) const; const OrderedExtensionMap &getExtensions() const { return Exts; }; @@ -83,7 +82,6 @@ public: bool hasExtension(StringRef Ext) const; std::string toString() const; - std::vector<std::string> toFeatureVector() const; StringRef computeDefaultABI() const; static bool isSupportedExtensionFeature(StringRef Ext); diff --git a/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td b/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td index f28c1edc3d95..5e704f0b9a75 100644 --- a/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td +++ b/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td @@ -244,6 +244,7 @@ def : GINodeEquiv<G_ATOMICRMW_FMIN, atomic_load_fmin>; def : GINodeEquiv<G_ATOMICRMW_UINC_WRAP, atomic_load_uinc_wrap>; def : GINodeEquiv<G_ATOMICRMW_UDEC_WRAP, atomic_load_udec_wrap>; def : GINodeEquiv<G_FENCE, atomic_fence>; +def : GINodeEquiv<G_PREFETCH, prefetch>; // Specifies the GlobalISel equivalents for SelectionDAG's ComplexPattern. // Should be used on defs that subclass GIComplexOperandMatcher<>. diff --git a/llvm/include/llvm/Target/TargetInstrPredicate.td b/llvm/include/llvm/Target/TargetInstrPredicate.td index 9f2cde9d9230..82c4c7b23a49 100644 --- a/llvm/include/llvm/Target/TargetInstrPredicate.td +++ b/llvm/include/llvm/Target/TargetInstrPredicate.td @@ -95,6 +95,12 @@ class MCOperandPredicate<int Index> : MCInstPredicate { // Return true if machine operand at position `Index` is a register operand. class CheckIsRegOperand<int Index> : MCOperandPredicate<Index>; +// Return true if machine operand at position `Index` is a virtual register operand. +class CheckIsVRegOperand<int Index> : MCOperandPredicate<Index>; + +// Return true if machine operand at position `Index` is not a virtual register operand. +class CheckIsNotVRegOperand<int Index> : CheckNot<CheckIsVRegOperand<Index>>; + // Return true if machine operand at position `Index` is an immediate operand. class CheckIsImmOperand<int Index> : MCOperandPredicate<Index>; diff --git a/llvm/include/llvm/Target/TargetMachine.h b/llvm/include/llvm/Target/TargetMachine.h index 4c29f25bedf4..1fe47dec70b1 100644 --- a/llvm/include/llvm/Target/TargetMachine.h +++ b/llvm/include/llvm/Target/TargetMachine.h @@ -362,7 +362,9 @@ public: virtual TargetTransformInfo getTargetTransformInfo(const Function &F) const; /// Allow the target to modify the pass pipeline. - virtual void registerPassBuilderCallbacks(PassBuilder &) {} + // TODO: Populate all pass names by using <Target>PassRegistry.def. + virtual void registerPassBuilderCallbacks(PassBuilder &, + bool PopulateClassToPassNames) {} /// Allow the target to register alias analyses with the AAManager for use /// with the new pass manager. Only affects the "default" AAManager. diff --git a/llvm/include/llvm/Target/TargetSchedule.td b/llvm/include/llvm/Target/TargetSchedule.td index 949baa5d2105..2016d452afb6 100644 --- a/llvm/include/llvm/Target/TargetSchedule.td +++ b/llvm/include/llvm/Target/TargetSchedule.td @@ -584,3 +584,116 @@ class MemoryQueue<ProcResourceKind PR> { class LoadQueue<ProcResourceKind LDQueue> : MemoryQueue<LDQueue>; class StoreQueue<ProcResourceKind STQueue> : MemoryQueue<STQueue>; + +// The target instruction that FusionPredicate will be evaluated on. +class FusionTarget; +def first_fusion_target : FusionTarget; +def second_fusion_target : FusionTarget; +def both_fusion_target : FusionTarget; + +// Base class of FusionPredicate, etc. The avaliable variables are: +// * const TargetInstrInfo &TII +// * const TargetSubtargetInfo &STI +// * const MachineRegisterInfo &MRI +// * const MachineInstr *FirstMI +// * const MachineInstr &SecondMI +class FusionPredicate<FusionTarget target> { + FusionTarget Target = target; +} +class FirstFusionPredicate: FusionPredicate<first_fusion_target>; +class SecondFusionPredicate: FusionPredicate<second_fusion_target>; +class BothFusionPredicate: FusionPredicate<both_fusion_target>; + +// FusionPredicate with raw code predicate. +class FusionPredicateWithCode<code pred> : FusionPredicate<both_fusion_target> { + code Predicate = pred; +} + +// FusionPredicate with MCInstPredicate. +class FusionPredicateWithMCInstPredicate<FusionTarget target, MCInstPredicate pred> + : FusionPredicate<target> { + MCInstPredicate Predicate = pred; +} +class FirstFusionPredicateWithMCInstPredicate<MCInstPredicate pred> + : FusionPredicateWithMCInstPredicate<first_fusion_target, pred>; +class SecondFusionPredicateWithMCInstPredicate<MCInstPredicate pred> + : FusionPredicateWithMCInstPredicate<second_fusion_target, pred>; +// The pred will be applied on both firstMI and secondMI. +class BothFusionPredicateWithMCInstPredicate<MCInstPredicate pred> + : FusionPredicateWithMCInstPredicate<second_fusion_target, pred>; + +// Tie firstOpIdx and secondOpIdx. The operand of `FirstMI` at position +// `firstOpIdx` should be the same as the operand of `SecondMI` at position +// `secondOpIdx`. +class TieReg<int firstOpIdx, int secondOpIdx> : BothFusionPredicate { + int FirstOpIdx = firstOpIdx; + int SecondOpIdx = secondOpIdx; +} + +// A predicate for wildcard. The generated code will be like: +// ``` +// if (!FirstMI) +// return ReturnValue; +// ``` +class WildcardPred<bit ret> : FirstFusionPredicate { + bit ReturnValue = ret; +} +def WildcardFalse : WildcardPred<0>; +def WildcardTrue : WildcardPred<1>; + +// Indicates that the destination register of `FirstMI` should have one use if +// it is a virtual register. +class OneUsePred : FirstFusionPredicate; +def OneUse : OneUsePred; + +// Handled by MacroFusionPredicatorEmitter backend. +// The generated predicator will be like: +// ``` +// bool isNAME(const TargetInstrInfo &TII, +// const TargetSubtargetInfo &STI, +// const MachineInstr *FirstMI, +// const MachineInstr &SecondMI) { +// auto &MRI = SecondMI.getMF()->getRegInfo(); +// /* Predicates */ +// return true; +// } +// ``` +class Fusion<list<FusionPredicate> predicates> { + list<FusionPredicate> Predicates = predicates; +} + +// The generated predicator will be like: +// ``` +// bool isNAME(const TargetInstrInfo &TII, +// const TargetSubtargetInfo &STI, +// const MachineInstr *FirstMI, +// const MachineInstr &SecondMI) { +// auto &MRI = SecondMI.getMF()->getRegInfo(); +// /* Prolog */ +// /* Predicate for `SecondMI` */ +// /* Wildcard */ +// /* Predicate for `FirstMI` */ +// /* Check One Use */ +// /* Tie registers */ +// /* Epilog */ +// return true; +// } +// ``` +class SimpleFusion<MCInstPredicate firstPred, MCInstPredicate secondPred, + list<FusionPredicate> prolog = [], + list<FusionPredicate> epilog = []> + : Fusion<!listconcat( + prolog, + [ + SecondFusionPredicateWithMCInstPredicate<secondPred>, + WildcardTrue, + FirstFusionPredicateWithMCInstPredicate<firstPred>, + SecondFusionPredicateWithMCInstPredicate< + CheckAny<[ + CheckIsVRegOperand<0>, + CheckSameRegOperand<0, 1> + ]>>, + OneUse, + TieReg<0, 1>, + ], + epilog)>; diff --git a/llvm/include/llvm/Target/TargetSelectionDAG.td b/llvm/include/llvm/Target/TargetSelectionDAG.td index d4fc9d8a96db..22360353790d 100644 --- a/llvm/include/llvm/Target/TargetSelectionDAG.td +++ b/llvm/include/llvm/Target/TargetSelectionDAG.td @@ -614,6 +614,12 @@ def strict_sint_to_fp : SDNode<"ISD::STRICT_SINT_TO_FP", SDTIntToFPOp, [SDNPHasChain]>; def strict_uint_to_fp : SDNode<"ISD::STRICT_UINT_TO_FP", SDTIntToFPOp, [SDNPHasChain]>; + +def strict_f16_to_fp : SDNode<"ISD::STRICT_FP16_TO_FP", + SDTIntToFPOp, [SDNPHasChain]>; +def strict_fp_to_f16 : SDNode<"ISD::STRICT_FP_TO_FP16", + SDTFPToIntOp, [SDNPHasChain]>; + def strict_fsetcc : SDNode<"ISD::STRICT_FSETCC", SDTSetCC, [SDNPHasChain]>; def strict_fsetccs : SDNode<"ISD::STRICT_FSETCCS", SDTSetCC, [SDNPHasChain]>; @@ -1576,6 +1582,13 @@ def any_fsetccs : PatFrags<(ops node:$lhs, node:$rhs, node:$pred), [(strict_fsetccs node:$lhs, node:$rhs, node:$pred), (setcc node:$lhs, node:$rhs, node:$pred)]>; +def any_f16_to_fp : PatFrags<(ops node:$src), + [(f16_to_fp node:$src), + (strict_f16_to_fp node:$src)]>; +def any_fp_to_f16 : PatFrags<(ops node:$src), + [(fp_to_f16 node:$src), + (strict_fp_to_f16 node:$src)]>; + multiclass binary_atomic_op_ord { def NAME#_monotonic : PatFrag<(ops node:$ptr, node:$val), (!cast<SDPatternOperator>(NAME) node:$ptr, node:$val)> { diff --git a/llvm/include/llvm/TargetParser/AArch64TargetParser.h b/llvm/include/llvm/TargetParser/AArch64TargetParser.h index 53dc2be825f2..2fe4d5eeb742 100644 --- a/llvm/include/llvm/TargetParser/AArch64TargetParser.h +++ b/llvm/include/llvm/TargetParser/AArch64TargetParser.h @@ -482,8 +482,7 @@ inline constexpr CpuInfo CpuInfos[] = { (AArch64::ExtensionBitset( {AArch64::AEK_AES, AArch64::AEK_SHA2, AArch64::AEK_FP16, AArch64::AEK_DOTPROD, AArch64::AEK_RCPC, AArch64::AEK_SSBS, - AArch64::AEK_PROFILE, AArch64::AEK_FLAGM, AArch64::AEK_PAUTH, - AArch64::AEK_FP16FML}))}, + AArch64::AEK_PROFILE, AArch64::AEK_FLAGM, AArch64::AEK_PAUTH}))}, {"cortex-a710", ARMV9A, (AArch64::ExtensionBitset( {AArch64::AEK_MTE, AArch64::AEK_PAUTH, AArch64::AEK_FLAGM, @@ -514,13 +513,13 @@ inline constexpr CpuInfo CpuInfos[] = { (AArch64::ExtensionBitset( {AArch64::AEK_AES, AArch64::AEK_SHA2, AArch64::AEK_FP16, AArch64::AEK_DOTPROD, AArch64::AEK_RCPC, AArch64::AEK_SSBS, - AArch64::AEK_PAUTH, AArch64::AEK_PROFILE}))}, + AArch64::AEK_PAUTH, AArch64::AEK_PROFILE, AArch64::AEK_FLAGM}))}, {"cortex-x2", ARMV9A, (AArch64::ExtensionBitset( {AArch64::AEK_MTE, AArch64::AEK_BF16, AArch64::AEK_I8MM, AArch64::AEK_PAUTH, AArch64::AEK_SSBS, AArch64::AEK_SB, AArch64::AEK_SVE, AArch64::AEK_SVE2, AArch64::AEK_SVE2BITPERM, - AArch64::AEK_FP16FML}))}, + AArch64::AEK_FP16FML, AArch64::AEK_FLAGM}))}, {"cortex-x3", ARMV9A, (AArch64::ExtensionBitset( {AArch64::AEK_SVE, AArch64::AEK_PERFMON, AArch64::AEK_PROFILE, diff --git a/llvm/include/llvm/TargetParser/Triple.h b/llvm/include/llvm/TargetParser/Triple.h index 47904621c096..95014a546f72 100644 --- a/llvm/include/llvm/TargetParser/Triple.h +++ b/llvm/include/llvm/TargetParser/Triple.h @@ -434,6 +434,12 @@ public: /// string (separated by a '-' if the environment component is present). StringRef getOSAndEnvironmentName() const; + /// Get the version component of the environment component as a single + /// string (the version after the environment). + /// + /// For example, "fooos1.2.3" would return "1.2.3". + StringRef getEnvironmentVersionString() const; + /// @} /// @name Convenience Predicates /// @{ diff --git a/llvm/include/llvm/Transforms/Scalar/IVUsersPrinter.h b/llvm/include/llvm/Transforms/Scalar/IVUsersPrinter.h index 4136c45e1905..6bc01ececcf3 100644 --- a/llvm/include/llvm/Transforms/Scalar/IVUsersPrinter.h +++ b/llvm/include/llvm/Transforms/Scalar/IVUsersPrinter.h @@ -25,6 +25,7 @@ public: explicit IVUsersPrinterPass(raw_ostream &OS) : OS(OS) {} PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U); + static bool isRequired() { return true; } }; } diff --git a/llvm/include/llvm/Transforms/Scalar/LoopAccessAnalysisPrinter.h b/llvm/include/llvm/Transforms/Scalar/LoopAccessAnalysisPrinter.h index 4d1f934ae91d..f445e0696b5f 100644 --- a/llvm/include/llvm/Transforms/Scalar/LoopAccessAnalysisPrinter.h +++ b/llvm/include/llvm/Transforms/Scalar/LoopAccessAnalysisPrinter.h @@ -24,6 +24,7 @@ class LoopAccessInfoPrinterPass public: explicit LoopAccessInfoPrinterPass(raw_ostream &OS) : OS(OS) {} PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); + static bool isRequired() { return true; } }; } // End llvm namespace diff --git a/llvm/include/llvm/Transforms/Utils/PredicateInfo.h b/llvm/include/llvm/Transforms/Utils/PredicateInfo.h index b433d2ec89dc..365b215c051f 100644 --- a/llvm/include/llvm/Transforms/Utils/PredicateInfo.h +++ b/llvm/include/llvm/Transforms/Utils/PredicateInfo.h @@ -215,11 +215,13 @@ class PredicateInfoPrinterPass public: explicit PredicateInfoPrinterPass(raw_ostream &OS) : OS(OS) {} PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); + static bool isRequired() { return true; } }; /// Verifier pass for \c PredicateInfo. struct PredicateInfoVerifierPass : PassInfoMixin<PredicateInfoVerifierPass> { PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); + static bool isRequired() { return true; } }; } // end namespace llvm diff --git a/llvm/lib/Analysis/AssumptionCache.cpp b/llvm/lib/Analysis/AssumptionCache.cpp index fb3a6f8de2d6..1b7277df0e0c 100644 --- a/llvm/lib/Analysis/AssumptionCache.cpp +++ b/llvm/lib/Analysis/AssumptionCache.cpp @@ -17,6 +17,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/AssumeBundleQueries.h" #include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Function.h" #include "llvm/IR/InstrTypes.h" @@ -77,9 +78,15 @@ findAffectedValues(CallBase *CI, TargetTransformInfo *TTI, }; for (unsigned Idx = 0; Idx != CI->getNumOperandBundles(); Idx++) { - if (CI->getOperandBundleAt(Idx).Inputs.size() > ABA_WasOn && - CI->getOperandBundleAt(Idx).getTagName() != IgnoreBundleTag) - AddAffected(CI->getOperandBundleAt(Idx).Inputs[ABA_WasOn], Idx); + OperandBundleUse Bundle = CI->getOperandBundleAt(Idx); + if (Bundle.getTagName() == "separate_storage") { + assert(Bundle.Inputs.size() == 2 && + "separate_storage must have two args"); + AddAffected(getUnderlyingObject(Bundle.Inputs[0]), Idx); + AddAffected(getUnderlyingObject(Bundle.Inputs[1]), Idx); + } else if (Bundle.Inputs.size() > ABA_WasOn && + Bundle.getTagName() != IgnoreBundleTag) + AddAffected(Bundle.Inputs[ABA_WasOn], Idx); } Value *Cond = CI->getArgOperand(0), *A, *B; diff --git a/llvm/lib/Analysis/BasicAliasAnalysis.cpp b/llvm/lib/Analysis/BasicAliasAnalysis.cpp index 3de147368f23..a4a0846df7af 100644 --- a/llvm/lib/Analysis/BasicAliasAnalysis.cpp +++ b/llvm/lib/Analysis/BasicAliasAnalysis.cpp @@ -69,7 +69,7 @@ static cl::opt<bool> EnableRecPhiAnalysis("basic-aa-recphi", cl::Hidden, cl::init(true)); static cl::opt<bool> EnableSeparateStorageAnalysis("basic-aa-separate-storage", - cl::Hidden, cl::init(false)); + cl::Hidden, cl::init(true)); /// SearchLimitReached / SearchTimes shows how often the limit of /// to decompose GEPs is reached. It will affect the precision @@ -639,7 +639,7 @@ BasicAAResult::DecomposeGEPExpression(const Value *V, const DataLayout &DL, continue; // Don't attempt to analyze GEPs if the scalable index is not zero. - TypeSize AllocTypeSize = DL.getTypeAllocSize(GTI.getIndexedType()); + TypeSize AllocTypeSize = GTI.getSequentialElementStride(DL); if (AllocTypeSize.isScalable()) { Decomposed.Base = V; return Decomposed; @@ -650,7 +650,7 @@ BasicAAResult::DecomposeGEPExpression(const Value *V, const DataLayout &DL, continue; } - TypeSize AllocTypeSize = DL.getTypeAllocSize(GTI.getIndexedType()); + TypeSize AllocTypeSize = GTI.getSequentialElementStride(DL); if (AllocTypeSize.isScalable()) { Decomposed.Base = V; return Decomposed; @@ -1543,28 +1543,45 @@ AliasResult BasicAAResult::aliasCheck(const Value *V1, LocationSize V1Size, TLI, NullIsValidLocation))) return AliasResult::NoAlias; - if (CtxI && EnableSeparateStorageAnalysis) { - for (auto &AssumeVH : AC.assumptions()) { - if (!AssumeVH) + if (EnableSeparateStorageAnalysis) { + for (AssumptionCache::ResultElem &Elem : AC.assumptionsFor(O1)) { + if (!Elem || Elem.Index == AssumptionCache::ExprResultIdx) continue; - AssumeInst *Assume = cast<AssumeInst>(AssumeVH); - - for (unsigned Idx = 0; Idx < Assume->getNumOperandBundles(); Idx++) { - OperandBundleUse OBU = Assume->getOperandBundleAt(Idx); - if (OBU.getTagName() == "separate_storage") { - assert(OBU.Inputs.size() == 2); - const Value *Hint1 = OBU.Inputs[0].get(); - const Value *Hint2 = OBU.Inputs[1].get(); - // This is often a no-op; instcombine rewrites this for us. No-op - // getUnderlyingObject calls are fast, though. - const Value *HintO1 = getUnderlyingObject(Hint1); - const Value *HintO2 = getUnderlyingObject(Hint2); - - if (((O1 == HintO1 && O2 == HintO2) || - (O1 == HintO2 && O2 == HintO1)) && - isValidAssumeForContext(Assume, CtxI, DT)) + AssumeInst *Assume = cast<AssumeInst>(Elem); + OperandBundleUse OBU = Assume->getOperandBundleAt(Elem.Index); + if (OBU.getTagName() == "separate_storage") { + assert(OBU.Inputs.size() == 2); + const Value *Hint1 = OBU.Inputs[0].get(); + const Value *Hint2 = OBU.Inputs[1].get(); + // This is often a no-op; instcombine rewrites this for us. No-op + // getUnderlyingObject calls are fast, though. + const Value *HintO1 = getUnderlyingObject(Hint1); + const Value *HintO2 = getUnderlyingObject(Hint2); + + auto ValidAssumeForPtrContext = [&](const Value *Ptr) { + if (const Instruction *PtrI = dyn_cast<Instruction>(Ptr)) { + return isValidAssumeForContext(Assume, PtrI, DT, + /* AllowEphemerals */ true); + } + if (const Argument *PtrA = dyn_cast<Argument>(Ptr)) { + const Instruction *FirstI = + &*PtrA->getParent()->getEntryBlock().begin(); + return isValidAssumeForContext(Assume, FirstI, DT, + /* AllowEphemerals */ true); + } + return false; + }; + + if ((O1 == HintO1 && O2 == HintO2) || (O1 == HintO2 && O2 == HintO1)) { + // Note that we go back to V1 and V2 for the + // ValidAssumeForPtrContext checks; they're dominated by O1 and O2, + // so strictly more assumptions are valid for them. + if ((CtxI && isValidAssumeForContext(Assume, CtxI, DT, + /* AllowEphemerals */ true)) || + ValidAssumeForPtrContext(V1) || ValidAssumeForPtrContext(V2)) { return AliasResult::NoAlias; + } } } } diff --git a/llvm/lib/Analysis/ConstraintSystem.cpp b/llvm/lib/Analysis/ConstraintSystem.cpp index 35bdd869a88d..1a9c7c21e9ce 100644 --- a/llvm/lib/Analysis/ConstraintSystem.cpp +++ b/llvm/lib/Analysis/ConstraintSystem.cpp @@ -95,14 +95,14 @@ bool ConstraintSystem::eliminateUsingFM() { IdxUpper++; } - if (MulOverflow(UpperV, ((-1) * LowerLast), M1)) + if (MulOverflow(UpperV, -1 * LowerLast, M1)) return false; if (IdxLower < LowerRow.size() && LowerRow[IdxLower].Id == CurrentId) { LowerV = LowerRow[IdxLower].Coefficient; IdxLower++; } - if (MulOverflow(LowerV, (UpperLast), M2)) + if (MulOverflow(LowerV, UpperLast, M2)) return false; if (AddOverflow(M1, M2, N)) return false; diff --git a/llvm/lib/Analysis/InlineCost.cpp b/llvm/lib/Analysis/InlineCost.cpp index 7096e06d925a..1fa7badaa4fa 100644 --- a/llvm/lib/Analysis/InlineCost.cpp +++ b/llvm/lib/Analysis/InlineCost.cpp @@ -1429,7 +1429,7 @@ bool CallAnalyzer::accumulateGEPOffset(GEPOperator &GEP, APInt &Offset) { continue; } - APInt TypeSize(IntPtrWidth, DL.getTypeAllocSize(GTI.getIndexedType())); + APInt TypeSize(IntPtrWidth, GTI.getSequentialElementStride(DL)); Offset += OpC->getValue().sextOrTrunc(IntPtrWidth) * TypeSize; } return true; diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp index 78a833476334..d0c27cae0dff 100644 --- a/llvm/lib/Analysis/InstructionSimplify.cpp +++ b/llvm/lib/Analysis/InstructionSimplify.cpp @@ -2204,6 +2204,13 @@ static Value *simplifyAndInst(Value *Op0, Value *Op1, const SimplifyQuery &Q, match(Op1, m_c_Xor(m_Specific(Or), m_Specific(Y)))) return Constant::getNullValue(Op0->getType()); + const APInt *C1; + Value *A; + // (A ^ C) & (A ^ ~C) -> 0 + if (match(Op0, m_Xor(m_Value(A), m_APInt(C1))) && + match(Op1, m_Xor(m_Specific(A), m_SpecificInt(~*C1)))) + return Constant::getNullValue(Op0->getType()); + if (Op0->getType()->isIntOrIntVectorTy(1)) { if (std::optional<bool> Implied = isImpliedCondition(Op0, Op1, Q.DL)) { // If Op0 is true implies Op1 is true, then Op0 is a subset of Op1. @@ -2473,6 +2480,11 @@ static Value *simplifyOrInst(Value *Op0, Value *Op1, const SimplifyQuery &Q, if (Value *V = threadBinOpOverPHI(Instruction::Or, Op0, Op1, Q, MaxRecurse)) return V; + // (A ^ C) | (A ^ ~C) -> -1, i.e. all bits set to one. + if (match(Op0, m_Xor(m_Value(A), m_APInt(C1))) && + match(Op1, m_Xor(m_Specific(A), m_SpecificInt(~*C1)))) + return Constant::getAllOnesValue(Op0->getType()); + if (Op0->getType()->isIntOrIntVectorTy(1)) { if (std::optional<bool> Implied = isImpliedCondition(Op0, Op1, Q.DL, false)) { @@ -4301,7 +4313,7 @@ static Value *simplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp, // For vector types, the simplification must hold per-lane, so forbid // potentially cross-lane operations like shufflevector. if (!I->getType()->isVectorTy() || isa<ShuffleVectorInst>(I) || - isa<CallBase>(I)) + isa<CallBase>(I) || isa<BitCastInst>(I)) return nullptr; } diff --git a/llvm/lib/Analysis/Local.cpp b/llvm/lib/Analysis/Local.cpp index 30757abeb098..f5e080d2c78e 100644 --- a/llvm/lib/Analysis/Local.cpp +++ b/llvm/lib/Analysis/Local.cpp @@ -64,7 +64,7 @@ Value *llvm::emitGEPOffset(IRBuilderBase *Builder, const DataLayout &DL, // Convert to correct type. if (Op->getType() != IntIdxTy) Op = Builder->CreateIntCast(Op, IntIdxTy, true, Op->getName() + ".c"); - TypeSize TSize = DL.getTypeAllocSize(GTI.getIndexedType()); + TypeSize TSize = GTI.getSequentialElementStride(DL); if (TSize != TypeSize::getFixed(1)) { Value *Scale = Builder->CreateTypeSize(IntIdxTy->getScalarType(), TSize); if (IntIdxTy->isVectorTy()) diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp index 89666018d925..aed60cc5a3f5 100644 --- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp +++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp @@ -2703,7 +2703,10 @@ static unsigned getGEPInductionOperand(const GetElementPtrInst *Gep) { // If it's a type with the same allocation size as the result of the GEP we // can peel off the zero index. - if (DL.getTypeAllocSize(GEPTI.getIndexedType()) != GEPAllocSize) + TypeSize ElemSize = GEPTI.isStruct() + ? DL.getTypeAllocSize(GEPTI.getIndexedType()) + : GEPTI.getSequentialElementStride(DL); + if (ElemSize != GEPAllocSize) break; --LastOperand; } diff --git a/llvm/lib/Analysis/LoopInfo.cpp b/llvm/lib/Analysis/LoopInfo.cpp index 87ddfe3e92ae..59c96a3371e8 100644 --- a/llvm/lib/Analysis/LoopInfo.cpp +++ b/llvm/lib/Analysis/LoopInfo.cpp @@ -969,7 +969,9 @@ LoopInfo LoopAnalysis::run(Function &F, FunctionAnalysisManager &AM) { PreservedAnalyses LoopPrinterPass::run(Function &F, FunctionAnalysisManager &AM) { - AM.getResult<LoopAnalysis>(F).print(OS); + auto &LI = AM.getResult<LoopAnalysis>(F); + OS << "Loop info for function '" << F.getName() << "':\n"; + LI.print(OS); return PreservedAnalyses::all(); } diff --git a/llvm/lib/Analysis/MemoryBuiltins.cpp b/llvm/lib/Analysis/MemoryBuiltins.cpp index 9e6811f3bf88..46a7a921d86d 100644 --- a/llvm/lib/Analysis/MemoryBuiltins.cpp +++ b/llvm/lib/Analysis/MemoryBuiltins.cpp @@ -577,10 +577,12 @@ Value *llvm::getFreedOperand(const CallBase *CB, const TargetLibraryInfo *TLI) { //===----------------------------------------------------------------------===// // Utility functions to compute size of objects. // -static APInt getSizeWithOverflow(const SizeOffsetType &Data) { - if (Data.second.isNegative() || Data.first.ult(Data.second)) - return APInt(Data.first.getBitWidth(), 0); - return Data.first - Data.second; +static APInt getSizeWithOverflow(const SizeOffsetAPInt &Data) { + APInt Size = Data.Size; + APInt Offset = Data.Offset; + if (Offset.isNegative() || Size.ult(Offset)) + return APInt(Size.getBitWidth(), 0); + return Size - Offset; } /// Compute the size of the object pointed by Ptr. Returns true and the @@ -590,8 +592,8 @@ static APInt getSizeWithOverflow(const SizeOffsetType &Data) { bool llvm::getObjectSize(const Value *Ptr, uint64_t &Size, const DataLayout &DL, const TargetLibraryInfo *TLI, ObjectSizeOpts Opts) { ObjectSizeOffsetVisitor Visitor(DL, TLI, Ptr->getContext(), Opts); - SizeOffsetType Data = Visitor.compute(const_cast<Value*>(Ptr)); - if (!Visitor.bothKnown(Data)) + SizeOffsetAPInt Data = Visitor.compute(const_cast<Value *>(Ptr)); + if (!Data.bothKnown()) return false; Size = getSizeWithOverflow(Data).getZExtValue(); @@ -640,8 +642,7 @@ Value *llvm::lowerObjectSizeCall( } else { LLVMContext &Ctx = ObjectSize->getFunction()->getContext(); ObjectSizeOffsetEvaluator Eval(DL, TLI, Ctx, EvalOptions); - SizeOffsetEvalType SizeOffsetPair = - Eval.compute(ObjectSize->getArgOperand(0)); + SizeOffsetValue SizeOffsetPair = Eval.compute(ObjectSize->getArgOperand(0)); if (SizeOffsetPair != ObjectSizeOffsetEvaluator::unknown()) { IRBuilder<TargetFolder, IRBuilderCallbackInserter> Builder( @@ -651,19 +652,19 @@ Value *llvm::lowerObjectSizeCall( })); Builder.SetInsertPoint(ObjectSize); + Value *Size = SizeOffsetPair.Size; + Value *Offset = SizeOffsetPair.Offset; + // If we've outside the end of the object, then we can always access // exactly 0 bytes. - Value *ResultSize = - Builder.CreateSub(SizeOffsetPair.first, SizeOffsetPair.second); - Value *UseZero = - Builder.CreateICmpULT(SizeOffsetPair.first, SizeOffsetPair.second); + Value *ResultSize = Builder.CreateSub(Size, Offset); + Value *UseZero = Builder.CreateICmpULT(Size, Offset); ResultSize = Builder.CreateZExtOrTrunc(ResultSize, ResultType); Value *Ret = Builder.CreateSelect( UseZero, ConstantInt::get(ResultType, 0), ResultSize); // The non-constant size expression cannot evaluate to -1. - if (!isa<Constant>(SizeOffsetPair.first) || - !isa<Constant>(SizeOffsetPair.second)) + if (!isa<Constant>(Size) || !isa<Constant>(Offset)) Builder.CreateAssumption( Builder.CreateICmpNE(Ret, ConstantInt::get(ResultType, -1))); @@ -697,12 +698,12 @@ ObjectSizeOffsetVisitor::ObjectSizeOffsetVisitor(const DataLayout &DL, // a different address space. } -SizeOffsetType ObjectSizeOffsetVisitor::compute(Value *V) { +SizeOffsetAPInt ObjectSizeOffsetVisitor::compute(Value *V) { InstructionsVisited = 0; return computeImpl(V); } -SizeOffsetType ObjectSizeOffsetVisitor::computeImpl(Value *V) { +SizeOffsetAPInt ObjectSizeOffsetVisitor::computeImpl(Value *V) { unsigned InitialIntTyBits = DL.getIndexTypeSizeInBits(V->getType()); // Stripping pointer casts can strip address space casts which can change the @@ -719,7 +720,7 @@ SizeOffsetType ObjectSizeOffsetVisitor::computeImpl(Value *V) { IntTyBits = DL.getIndexTypeSizeInBits(V->getType()); Zero = APInt::getZero(IntTyBits); - SizeOffsetType SOT = computeValue(V); + SizeOffsetAPInt SOT = computeValue(V); bool IndexTypeSizeChanged = InitialIntTyBits != IntTyBits; if (!IndexTypeSizeChanged && Offset.isZero()) @@ -729,27 +730,28 @@ SizeOffsetType ObjectSizeOffsetVisitor::computeImpl(Value *V) { // accumulated some constant offset (or both). Readjust the bit width to match // the argument index type size and apply the offset, as required. if (IndexTypeSizeChanged) { - if (knownSize(SOT) && !::CheckedZextOrTrunc(SOT.first, InitialIntTyBits)) - SOT.first = APInt(); - if (knownOffset(SOT) && !::CheckedZextOrTrunc(SOT.second, InitialIntTyBits)) - SOT.second = APInt(); + if (SOT.knownSize() && !::CheckedZextOrTrunc(SOT.Size, InitialIntTyBits)) + SOT.Size = APInt(); + if (SOT.knownOffset() && + !::CheckedZextOrTrunc(SOT.Offset, InitialIntTyBits)) + SOT.Offset = APInt(); } // If the computed offset is "unknown" we cannot add the stripped offset. - return {SOT.first, - SOT.second.getBitWidth() > 1 ? SOT.second + Offset : SOT.second}; + return {SOT.Size, + SOT.Offset.getBitWidth() > 1 ? SOT.Offset + Offset : SOT.Offset}; } -SizeOffsetType ObjectSizeOffsetVisitor::computeValue(Value *V) { +SizeOffsetAPInt ObjectSizeOffsetVisitor::computeValue(Value *V) { if (Instruction *I = dyn_cast<Instruction>(V)) { // If we have already seen this instruction, bail out. Cycles can happen in // unreachable code after constant propagation. - auto P = SeenInsts.try_emplace(I, unknown()); + auto P = SeenInsts.try_emplace(I, ObjectSizeOffsetVisitor::unknown()); if (!P.second) return P.first->second; ++InstructionsVisited; if (InstructionsVisited > ObjectSizeOffsetVisitorMaxVisitInstructions) - return unknown(); - SizeOffsetType Res = visit(*I); + return ObjectSizeOffsetVisitor::unknown(); + SizeOffsetAPInt Res = visit(*I); // Cache the result for later visits. If we happened to visit this during // the above recursion, we would consider it unknown until now. SeenInsts[I] = Res; @@ -768,55 +770,55 @@ SizeOffsetType ObjectSizeOffsetVisitor::computeValue(Value *V) { LLVM_DEBUG(dbgs() << "ObjectSizeOffsetVisitor::compute() unhandled value: " << *V << '\n'); - return unknown(); + return ObjectSizeOffsetVisitor::unknown(); } bool ObjectSizeOffsetVisitor::CheckedZextOrTrunc(APInt &I) { return ::CheckedZextOrTrunc(I, IntTyBits); } -SizeOffsetType ObjectSizeOffsetVisitor::visitAllocaInst(AllocaInst &I) { +SizeOffsetAPInt ObjectSizeOffsetVisitor::visitAllocaInst(AllocaInst &I) { TypeSize ElemSize = DL.getTypeAllocSize(I.getAllocatedType()); if (ElemSize.isScalable() && Options.EvalMode != ObjectSizeOpts::Mode::Min) - return unknown(); + return ObjectSizeOffsetVisitor::unknown(); APInt Size(IntTyBits, ElemSize.getKnownMinValue()); if (!I.isArrayAllocation()) - return std::make_pair(align(Size, I.getAlign()), Zero); + return SizeOffsetAPInt(align(Size, I.getAlign()), Zero); Value *ArraySize = I.getArraySize(); if (const ConstantInt *C = dyn_cast<ConstantInt>(ArraySize)) { APInt NumElems = C->getValue(); if (!CheckedZextOrTrunc(NumElems)) - return unknown(); + return ObjectSizeOffsetVisitor::unknown(); bool Overflow; Size = Size.umul_ov(NumElems, Overflow); - return Overflow ? unknown() - : std::make_pair(align(Size, I.getAlign()), Zero); + return Overflow ? ObjectSizeOffsetVisitor::unknown() + : SizeOffsetAPInt(align(Size, I.getAlign()), Zero); } - return unknown(); + return ObjectSizeOffsetVisitor::unknown(); } -SizeOffsetType ObjectSizeOffsetVisitor::visitArgument(Argument &A) { +SizeOffsetAPInt ObjectSizeOffsetVisitor::visitArgument(Argument &A) { Type *MemoryTy = A.getPointeeInMemoryValueType(); // No interprocedural analysis is done at the moment. if (!MemoryTy|| !MemoryTy->isSized()) { ++ObjectVisitorArgument; - return unknown(); + return ObjectSizeOffsetVisitor::unknown(); } APInt Size(IntTyBits, DL.getTypeAllocSize(MemoryTy)); - return std::make_pair(align(Size, A.getParamAlign()), Zero); + return SizeOffsetAPInt(align(Size, A.getParamAlign()), Zero); } -SizeOffsetType ObjectSizeOffsetVisitor::visitCallBase(CallBase &CB) { +SizeOffsetAPInt ObjectSizeOffsetVisitor::visitCallBase(CallBase &CB) { if (std::optional<APInt> Size = getAllocSize(&CB, TLI)) - return std::make_pair(*Size, Zero); - return unknown(); + return SizeOffsetAPInt(*Size, Zero); + return ObjectSizeOffsetVisitor::unknown(); } -SizeOffsetType -ObjectSizeOffsetVisitor::visitConstantPointerNull(ConstantPointerNull& CPN) { +SizeOffsetAPInt +ObjectSizeOffsetVisitor::visitConstantPointerNull(ConstantPointerNull &CPN) { // If null is unknown, there's nothing we can do. Additionally, non-zero // address spaces can make use of null, so we don't presume to know anything // about that. @@ -825,45 +827,46 @@ ObjectSizeOffsetVisitor::visitConstantPointerNull(ConstantPointerNull& CPN) { // them on the floor, but it's unclear what we should do when a NULL from // addrspace(1) gets casted to addrspace(0) (or vice-versa). if (Options.NullIsUnknownSize || CPN.getType()->getAddressSpace()) - return unknown(); - return std::make_pair(Zero, Zero); + return ObjectSizeOffsetVisitor::unknown(); + return SizeOffsetAPInt(Zero, Zero); } -SizeOffsetType -ObjectSizeOffsetVisitor::visitExtractElementInst(ExtractElementInst&) { - return unknown(); +SizeOffsetAPInt +ObjectSizeOffsetVisitor::visitExtractElementInst(ExtractElementInst &) { + return ObjectSizeOffsetVisitor::unknown(); } -SizeOffsetType -ObjectSizeOffsetVisitor::visitExtractValueInst(ExtractValueInst&) { +SizeOffsetAPInt +ObjectSizeOffsetVisitor::visitExtractValueInst(ExtractValueInst &) { // Easy cases were already folded by previous passes. - return unknown(); + return ObjectSizeOffsetVisitor::unknown(); } -SizeOffsetType ObjectSizeOffsetVisitor::visitGlobalAlias(GlobalAlias &GA) { +SizeOffsetAPInt ObjectSizeOffsetVisitor::visitGlobalAlias(GlobalAlias &GA) { if (GA.isInterposable()) - return unknown(); + return ObjectSizeOffsetVisitor::unknown(); return computeImpl(GA.getAliasee()); } -SizeOffsetType ObjectSizeOffsetVisitor::visitGlobalVariable(GlobalVariable &GV){ +SizeOffsetAPInt +ObjectSizeOffsetVisitor::visitGlobalVariable(GlobalVariable &GV) { if (!GV.getValueType()->isSized() || GV.hasExternalWeakLinkage() || ((!GV.hasInitializer() || GV.isInterposable()) && Options.EvalMode != ObjectSizeOpts::Mode::Min)) - return unknown(); + return ObjectSizeOffsetVisitor::unknown(); APInt Size(IntTyBits, DL.getTypeAllocSize(GV.getValueType())); - return std::make_pair(align(Size, GV.getAlign()), Zero); + return SizeOffsetAPInt(align(Size, GV.getAlign()), Zero); } -SizeOffsetType ObjectSizeOffsetVisitor::visitIntToPtrInst(IntToPtrInst&) { +SizeOffsetAPInt ObjectSizeOffsetVisitor::visitIntToPtrInst(IntToPtrInst &) { // clueless - return unknown(); + return ObjectSizeOffsetVisitor::unknown(); } -SizeOffsetType ObjectSizeOffsetVisitor::findLoadSizeOffset( +SizeOffsetAPInt ObjectSizeOffsetVisitor::findLoadSizeOffset( LoadInst &Load, BasicBlock &BB, BasicBlock::iterator From, - SmallDenseMap<BasicBlock *, SizeOffsetType, 8> &VisitedBlocks, + SmallDenseMap<BasicBlock *, SizeOffsetAPInt, 8> &VisitedBlocks, unsigned &ScannedInstCount) { constexpr unsigned MaxInstsToScan = 128; @@ -871,10 +874,10 @@ SizeOffsetType ObjectSizeOffsetVisitor::findLoadSizeOffset( if (Where != VisitedBlocks.end()) return Where->second; - auto Unknown = [this, &BB, &VisitedBlocks]() { - return VisitedBlocks[&BB] = unknown(); + auto Unknown = [&BB, &VisitedBlocks]() { + return VisitedBlocks[&BB] = ObjectSizeOffsetVisitor::unknown(); }; - auto Known = [&BB, &VisitedBlocks](SizeOffsetType SO) { + auto Known = [&BB, &VisitedBlocks](SizeOffsetAPInt SO) { return VisitedBlocks[&BB] = SO; }; @@ -951,46 +954,47 @@ SizeOffsetType ObjectSizeOffsetVisitor::findLoadSizeOffset( return Unknown(); } while (From-- != BB.begin()); - SmallVector<SizeOffsetType> PredecessorSizeOffsets; + SmallVector<SizeOffsetAPInt> PredecessorSizeOffsets; for (auto *PredBB : predecessors(&BB)) { PredecessorSizeOffsets.push_back(findLoadSizeOffset( Load, *PredBB, BasicBlock::iterator(PredBB->getTerminator()), VisitedBlocks, ScannedInstCount)); - if (!bothKnown(PredecessorSizeOffsets.back())) + if (!PredecessorSizeOffsets.back().bothKnown()) return Unknown(); } if (PredecessorSizeOffsets.empty()) return Unknown(); - return Known(std::accumulate(PredecessorSizeOffsets.begin() + 1, - PredecessorSizeOffsets.end(), - PredecessorSizeOffsets.front(), - [this](SizeOffsetType LHS, SizeOffsetType RHS) { - return combineSizeOffset(LHS, RHS); - })); + return Known(std::accumulate( + PredecessorSizeOffsets.begin() + 1, PredecessorSizeOffsets.end(), + PredecessorSizeOffsets.front(), + [this](SizeOffsetAPInt LHS, SizeOffsetAPInt RHS) { + return combineSizeOffset(LHS, RHS); + })); } -SizeOffsetType ObjectSizeOffsetVisitor::visitLoadInst(LoadInst &LI) { +SizeOffsetAPInt ObjectSizeOffsetVisitor::visitLoadInst(LoadInst &LI) { if (!Options.AA) { ++ObjectVisitorLoad; - return unknown(); + return ObjectSizeOffsetVisitor::unknown(); } - SmallDenseMap<BasicBlock *, SizeOffsetType, 8> VisitedBlocks; + SmallDenseMap<BasicBlock *, SizeOffsetAPInt, 8> VisitedBlocks; unsigned ScannedInstCount = 0; - SizeOffsetType SO = + SizeOffsetAPInt SO = findLoadSizeOffset(LI, *LI.getParent(), BasicBlock::iterator(LI), VisitedBlocks, ScannedInstCount); - if (!bothKnown(SO)) + if (!SO.bothKnown()) ++ObjectVisitorLoad; return SO; } -SizeOffsetType ObjectSizeOffsetVisitor::combineSizeOffset(SizeOffsetType LHS, - SizeOffsetType RHS) { - if (!bothKnown(LHS) || !bothKnown(RHS)) - return unknown(); +SizeOffsetAPInt +ObjectSizeOffsetVisitor::combineSizeOffset(SizeOffsetAPInt LHS, + SizeOffsetAPInt RHS) { + if (!LHS.bothKnown() || !RHS.bothKnown()) + return ObjectSizeOffsetVisitor::unknown(); switch (Options.EvalMode) { case ObjectSizeOpts::Mode::Min: @@ -998,40 +1002,45 @@ SizeOffsetType ObjectSizeOffsetVisitor::combineSizeOffset(SizeOffsetType LHS, case ObjectSizeOpts::Mode::Max: return (getSizeWithOverflow(LHS).sgt(getSizeWithOverflow(RHS))) ? LHS : RHS; case ObjectSizeOpts::Mode::ExactSizeFromOffset: - return (getSizeWithOverflow(LHS).eq(getSizeWithOverflow(RHS))) ? LHS - : unknown(); + return (getSizeWithOverflow(LHS).eq(getSizeWithOverflow(RHS))) + ? LHS + : ObjectSizeOffsetVisitor::unknown(); case ObjectSizeOpts::Mode::ExactUnderlyingSizeAndOffset: - return LHS == RHS ? LHS : unknown(); + return LHS == RHS ? LHS : ObjectSizeOffsetVisitor::unknown(); } llvm_unreachable("missing an eval mode"); } -SizeOffsetType ObjectSizeOffsetVisitor::visitPHINode(PHINode &PN) { +SizeOffsetAPInt ObjectSizeOffsetVisitor::visitPHINode(PHINode &PN) { if (PN.getNumIncomingValues() == 0) - return unknown(); + return ObjectSizeOffsetVisitor::unknown(); auto IncomingValues = PN.incoming_values(); return std::accumulate(IncomingValues.begin() + 1, IncomingValues.end(), computeImpl(*IncomingValues.begin()), - [this](SizeOffsetType LHS, Value *VRHS) { + [this](SizeOffsetAPInt LHS, Value *VRHS) { return combineSizeOffset(LHS, computeImpl(VRHS)); }); } -SizeOffsetType ObjectSizeOffsetVisitor::visitSelectInst(SelectInst &I) { +SizeOffsetAPInt ObjectSizeOffsetVisitor::visitSelectInst(SelectInst &I) { return combineSizeOffset(computeImpl(I.getTrueValue()), computeImpl(I.getFalseValue())); } -SizeOffsetType ObjectSizeOffsetVisitor::visitUndefValue(UndefValue&) { - return std::make_pair(Zero, Zero); +SizeOffsetAPInt ObjectSizeOffsetVisitor::visitUndefValue(UndefValue &) { + return SizeOffsetAPInt(Zero, Zero); } -SizeOffsetType ObjectSizeOffsetVisitor::visitInstruction(Instruction &I) { +SizeOffsetAPInt ObjectSizeOffsetVisitor::visitInstruction(Instruction &I) { LLVM_DEBUG(dbgs() << "ObjectSizeOffsetVisitor unknown instruction:" << I << '\n'); - return unknown(); + return ObjectSizeOffsetVisitor::unknown(); } +// Just set these right here... +SizeOffsetValue::SizeOffsetValue(const SizeOffsetWeakTrackingVH &SOT) + : SizeOffsetType(SOT.Size, SOT.Offset) {} + ObjectSizeOffsetEvaluator::ObjectSizeOffsetEvaluator( const DataLayout &DL, const TargetLibraryInfo *TLI, LLVMContext &Context, ObjectSizeOpts EvalOpts) @@ -1044,21 +1053,21 @@ ObjectSizeOffsetEvaluator::ObjectSizeOffsetEvaluator( // be different for later objects. } -SizeOffsetEvalType ObjectSizeOffsetEvaluator::compute(Value *V) { +SizeOffsetValue ObjectSizeOffsetEvaluator::compute(Value *V) { // XXX - Are vectors of pointers possible here? IntTy = cast<IntegerType>(DL.getIndexType(V->getType())); Zero = ConstantInt::get(IntTy, 0); - SizeOffsetEvalType Result = compute_(V); + SizeOffsetValue Result = compute_(V); - if (!bothKnown(Result)) { + if (!Result.bothKnown()) { // Erase everything that was computed in this iteration from the cache, so // that no dangling references are left behind. We could be a bit smarter if // we kept a dependency graph. It's probably not worth the complexity. for (const Value *SeenVal : SeenVals) { CacheMapTy::iterator CacheIt = CacheMap.find(SeenVal); // non-computable results can be safely cached - if (CacheIt != CacheMap.end() && anyKnown(CacheIt->second)) + if (CacheIt != CacheMap.end() && CacheIt->second.anyKnown()) CacheMap.erase(CacheIt); } @@ -1074,12 +1083,12 @@ SizeOffsetEvalType ObjectSizeOffsetEvaluator::compute(Value *V) { return Result; } -SizeOffsetEvalType ObjectSizeOffsetEvaluator::compute_(Value *V) { +SizeOffsetValue ObjectSizeOffsetEvaluator::compute_(Value *V) { ObjectSizeOffsetVisitor Visitor(DL, TLI, Context, EvalOpts); - SizeOffsetType Const = Visitor.compute(V); - if (Visitor.bothKnown(Const)) - return std::make_pair(ConstantInt::get(Context, Const.first), - ConstantInt::get(Context, Const.second)); + SizeOffsetAPInt Const = Visitor.compute(V); + if (Const.bothKnown()) + return SizeOffsetValue(ConstantInt::get(Context, Const.Size), + ConstantInt::get(Context, Const.Offset)); V = V->stripPointerCasts(); @@ -1095,13 +1104,13 @@ SizeOffsetEvalType ObjectSizeOffsetEvaluator::compute_(Value *V) { Builder.SetInsertPoint(I); // Now compute the size and offset. - SizeOffsetEvalType Result; + SizeOffsetValue Result; // Record the pointers that were handled in this run, so that they can be // cleaned later if something fails. We also use this set to break cycles that // can occur in dead code. if (!SeenVals.insert(V).second) { - Result = unknown(); + Result = ObjectSizeOffsetEvaluator::unknown(); } else if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) { Result = visitGEPOperator(*GEP); } else if (Instruction *I = dyn_cast<Instruction>(V)) { @@ -1112,22 +1121,22 @@ SizeOffsetEvalType ObjectSizeOffsetEvaluator::compute_(Value *V) { isa<GlobalAlias>(V) || isa<GlobalVariable>(V)) { // Ignore values where we cannot do more than ObjectSizeVisitor. - Result = unknown(); + Result = ObjectSizeOffsetEvaluator::unknown(); } else { LLVM_DEBUG( dbgs() << "ObjectSizeOffsetEvaluator::compute() unhandled value: " << *V << '\n'); - Result = unknown(); + Result = ObjectSizeOffsetEvaluator::unknown(); } // Don't reuse CacheIt since it may be invalid at this point. - CacheMap[V] = Result; + CacheMap[V] = SizeOffsetWeakTrackingVH(Result); return Result; } -SizeOffsetEvalType ObjectSizeOffsetEvaluator::visitAllocaInst(AllocaInst &I) { +SizeOffsetValue ObjectSizeOffsetEvaluator::visitAllocaInst(AllocaInst &I) { if (!I.getAllocatedType()->isSized()) - return unknown(); + return ObjectSizeOffsetEvaluator::unknown(); // must be a VLA assert(I.isArrayAllocation()); @@ -1143,86 +1152,85 @@ SizeOffsetEvalType ObjectSizeOffsetEvaluator::visitAllocaInst(AllocaInst &I) { Value *Size = ConstantInt::get(ArraySize->getType(), DL.getTypeAllocSize(I.getAllocatedType())); Size = Builder.CreateMul(Size, ArraySize); - return std::make_pair(Size, Zero); + return SizeOffsetValue(Size, Zero); } -SizeOffsetEvalType ObjectSizeOffsetEvaluator::visitCallBase(CallBase &CB) { +SizeOffsetValue ObjectSizeOffsetEvaluator::visitCallBase(CallBase &CB) { std::optional<AllocFnsTy> FnData = getAllocationSize(&CB, TLI); if (!FnData) - return unknown(); + return ObjectSizeOffsetEvaluator::unknown(); // Handle strdup-like functions separately. if (FnData->AllocTy == StrDupLike) { // TODO: implement evaluation of strdup/strndup - return unknown(); + return ObjectSizeOffsetEvaluator::unknown(); } Value *FirstArg = CB.getArgOperand(FnData->FstParam); FirstArg = Builder.CreateZExtOrTrunc(FirstArg, IntTy); if (FnData->SndParam < 0) - return std::make_pair(FirstArg, Zero); + return SizeOffsetValue(FirstArg, Zero); Value *SecondArg = CB.getArgOperand(FnData->SndParam); SecondArg = Builder.CreateZExtOrTrunc(SecondArg, IntTy); Value *Size = Builder.CreateMul(FirstArg, SecondArg); - return std::make_pair(Size, Zero); + return SizeOffsetValue(Size, Zero); } -SizeOffsetEvalType -ObjectSizeOffsetEvaluator::visitExtractElementInst(ExtractElementInst&) { - return unknown(); +SizeOffsetValue +ObjectSizeOffsetEvaluator::visitExtractElementInst(ExtractElementInst &) { + return ObjectSizeOffsetEvaluator::unknown(); } -SizeOffsetEvalType -ObjectSizeOffsetEvaluator::visitExtractValueInst(ExtractValueInst&) { - return unknown(); +SizeOffsetValue +ObjectSizeOffsetEvaluator::visitExtractValueInst(ExtractValueInst &) { + return ObjectSizeOffsetEvaluator::unknown(); } -SizeOffsetEvalType -ObjectSizeOffsetEvaluator::visitGEPOperator(GEPOperator &GEP) { - SizeOffsetEvalType PtrData = compute_(GEP.getPointerOperand()); - if (!bothKnown(PtrData)) - return unknown(); +SizeOffsetValue ObjectSizeOffsetEvaluator::visitGEPOperator(GEPOperator &GEP) { + SizeOffsetValue PtrData = compute_(GEP.getPointerOperand()); + if (!PtrData.bothKnown()) + return ObjectSizeOffsetEvaluator::unknown(); Value *Offset = emitGEPOffset(&Builder, DL, &GEP, /*NoAssumptions=*/true); - Offset = Builder.CreateAdd(PtrData.second, Offset); - return std::make_pair(PtrData.first, Offset); + Offset = Builder.CreateAdd(PtrData.Offset, Offset); + return SizeOffsetValue(PtrData.Size, Offset); } -SizeOffsetEvalType ObjectSizeOffsetEvaluator::visitIntToPtrInst(IntToPtrInst&) { +SizeOffsetValue ObjectSizeOffsetEvaluator::visitIntToPtrInst(IntToPtrInst &) { // clueless - return unknown(); + return ObjectSizeOffsetEvaluator::unknown(); } -SizeOffsetEvalType ObjectSizeOffsetEvaluator::visitLoadInst(LoadInst &LI) { - return unknown(); +SizeOffsetValue ObjectSizeOffsetEvaluator::visitLoadInst(LoadInst &LI) { + return ObjectSizeOffsetEvaluator::unknown(); } -SizeOffsetEvalType ObjectSizeOffsetEvaluator::visitPHINode(PHINode &PHI) { +SizeOffsetValue ObjectSizeOffsetEvaluator::visitPHINode(PHINode &PHI) { // Create 2 PHIs: one for size and another for offset. PHINode *SizePHI = Builder.CreatePHI(IntTy, PHI.getNumIncomingValues()); PHINode *OffsetPHI = Builder.CreatePHI(IntTy, PHI.getNumIncomingValues()); // Insert right away in the cache to handle recursive PHIs. - CacheMap[&PHI] = std::make_pair(SizePHI, OffsetPHI); + CacheMap[&PHI] = SizeOffsetWeakTrackingVH(SizePHI, OffsetPHI); // Compute offset/size for each PHI incoming pointer. for (unsigned i = 0, e = PHI.getNumIncomingValues(); i != e; ++i) { BasicBlock *IncomingBlock = PHI.getIncomingBlock(i); Builder.SetInsertPoint(IncomingBlock, IncomingBlock->getFirstInsertionPt()); - SizeOffsetEvalType EdgeData = compute_(PHI.getIncomingValue(i)); + SizeOffsetValue EdgeData = compute_(PHI.getIncomingValue(i)); - if (!bothKnown(EdgeData)) { + if (!EdgeData.bothKnown()) { OffsetPHI->replaceAllUsesWith(PoisonValue::get(IntTy)); OffsetPHI->eraseFromParent(); InsertedInstructions.erase(OffsetPHI); SizePHI->replaceAllUsesWith(PoisonValue::get(IntTy)); SizePHI->eraseFromParent(); InsertedInstructions.erase(SizePHI); - return unknown(); + return ObjectSizeOffsetEvaluator::unknown(); } - SizePHI->addIncoming(EdgeData.first, IncomingBlock); - OffsetPHI->addIncoming(EdgeData.second, IncomingBlock); + SizePHI->addIncoming(EdgeData.Size, IncomingBlock); + OffsetPHI->addIncoming(EdgeData.Offset, IncomingBlock); } Value *Size = SizePHI, *Offset = OffsetPHI; @@ -1238,27 +1246,27 @@ SizeOffsetEvalType ObjectSizeOffsetEvaluator::visitPHINode(PHINode &PHI) { OffsetPHI->eraseFromParent(); InsertedInstructions.erase(OffsetPHI); } - return std::make_pair(Size, Offset); + return SizeOffsetValue(Size, Offset); } -SizeOffsetEvalType ObjectSizeOffsetEvaluator::visitSelectInst(SelectInst &I) { - SizeOffsetEvalType TrueSide = compute_(I.getTrueValue()); - SizeOffsetEvalType FalseSide = compute_(I.getFalseValue()); +SizeOffsetValue ObjectSizeOffsetEvaluator::visitSelectInst(SelectInst &I) { + SizeOffsetValue TrueSide = compute_(I.getTrueValue()); + SizeOffsetValue FalseSide = compute_(I.getFalseValue()); - if (!bothKnown(TrueSide) || !bothKnown(FalseSide)) - return unknown(); + if (!TrueSide.bothKnown() || !FalseSide.bothKnown()) + return ObjectSizeOffsetEvaluator::unknown(); if (TrueSide == FalseSide) return TrueSide; - Value *Size = Builder.CreateSelect(I.getCondition(), TrueSide.first, - FalseSide.first); - Value *Offset = Builder.CreateSelect(I.getCondition(), TrueSide.second, - FalseSide.second); - return std::make_pair(Size, Offset); + Value *Size = + Builder.CreateSelect(I.getCondition(), TrueSide.Size, FalseSide.Size); + Value *Offset = + Builder.CreateSelect(I.getCondition(), TrueSide.Offset, FalseSide.Offset); + return SizeOffsetValue(Size, Offset); } -SizeOffsetEvalType ObjectSizeOffsetEvaluator::visitInstruction(Instruction &I) { +SizeOffsetValue ObjectSizeOffsetEvaluator::visitInstruction(Instruction &I) { LLVM_DEBUG(dbgs() << "ObjectSizeOffsetEvaluator unknown instruction:" << I << '\n'); - return unknown(); + return ObjectSizeOffsetEvaluator::unknown(); } diff --git a/llvm/lib/Analysis/MemorySSAUpdater.cpp b/llvm/lib/Analysis/MemorySSAUpdater.cpp index 9ad60f774e9f..e87ae7d71fff 100644 --- a/llvm/lib/Analysis/MemorySSAUpdater.cpp +++ b/llvm/lib/Analysis/MemorySSAUpdater.cpp @@ -568,7 +568,6 @@ static MemoryAccess *onlySingleValue(MemoryPhi *MP) { static MemoryAccess *getNewDefiningAccessForClone(MemoryAccess *MA, const ValueToValueMapTy &VMap, PhiToDefMap &MPhiMap, - bool CloneWasSimplified, MemorySSA *MSSA) { MemoryAccess *InsnDefining = MA; if (MemoryDef *DefMUD = dyn_cast<MemoryDef>(InsnDefining)) { @@ -578,18 +577,10 @@ static MemoryAccess *getNewDefiningAccessForClone(MemoryAccess *MA, if (Instruction *NewDefMUDI = cast_or_null<Instruction>(VMap.lookup(DefMUDI))) { InsnDefining = MSSA->getMemoryAccess(NewDefMUDI); - if (!CloneWasSimplified) - assert(InsnDefining && "Defining instruction cannot be nullptr."); - else if (!InsnDefining || isa<MemoryUse>(InsnDefining)) { + if (!InsnDefining || isa<MemoryUse>(InsnDefining)) { // The clone was simplified, it's no longer a MemoryDef, look up. - auto DefIt = DefMUD->getDefsIterator(); - // Since simplified clones only occur in single block cloning, a - // previous definition must exist, otherwise NewDefMUDI would not - // have been found in VMap. - assert(DefIt != MSSA->getBlockDefs(DefMUD->getBlock())->begin() && - "Previous def must exist"); InsnDefining = getNewDefiningAccessForClone( - &*(--DefIt), VMap, MPhiMap, CloneWasSimplified, MSSA); + DefMUD->getDefiningAccess(), VMap, MPhiMap, MSSA); } } } @@ -624,9 +615,9 @@ void MemorySSAUpdater::cloneUsesAndDefs(BasicBlock *BB, BasicBlock *NewBB, MemoryAccess *NewUseOrDef = MSSA->createDefinedAccess( NewInsn, getNewDefiningAccessForClone(MUD->getDefiningAccess(), VMap, - MPhiMap, CloneWasSimplified, MSSA), + MPhiMap, MSSA), /*Template=*/CloneWasSimplified ? nullptr : MUD, - /*CreationMustSucceed=*/CloneWasSimplified ? false : true); + /*CreationMustSucceed=*/false); if (NewUseOrDef) MSSA->insertIntoListsForBlock(NewUseOrDef, NewBB, MemorySSA::End); } diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp index 67246afa2314..a5a18a538d76 100644 --- a/llvm/lib/Analysis/TargetTransformInfo.cpp +++ b/llvm/lib/Analysis/TargetTransformInfo.cpp @@ -37,6 +37,10 @@ static cl::opt<unsigned> CacheLineSize( cl::desc("Use this to override the target cache line size when " "specified by the user.")); +static cl::opt<unsigned> MinPageSize( + "min-page-size", cl::init(0), cl::Hidden, + cl::desc("Use this to override the target's minimum page size.")); + static cl::opt<unsigned> PredictableBranchThreshold( "predictable-branch-threshold", cl::init(99), cl::Hidden, cl::desc( @@ -762,6 +766,11 @@ TargetTransformInfo::getCacheAssociativity(CacheLevel Level) const { return TTIImpl->getCacheAssociativity(Level); } +std::optional<unsigned> TargetTransformInfo::getMinPageSize() const { + return MinPageSize.getNumOccurrences() > 0 ? MinPageSize + : TTIImpl->getMinPageSize(); +} + unsigned TargetTransformInfo::getPrefetchDistance() const { return TTIImpl->getPrefetchDistance(); } diff --git a/llvm/lib/Analysis/VFABIDemangling.cpp b/llvm/lib/Analysis/VFABIDemangling.cpp index 426f98c0c628..8562d8fbfa1e 100644 --- a/llvm/lib/Analysis/VFABIDemangling.cpp +++ b/llvm/lib/Analysis/VFABIDemangling.cpp @@ -326,10 +326,6 @@ getScalableECFromSignature(const FunctionType *Signature, const VFISAKind ISA, // Only vector parameters are used when determining the VF; uniform or // linear are left as scalars, so do not affect VF. if (Param.ParamKind == VFParamKind::Vector) { - // If the scalar function doesn't actually have a corresponding argument, - // reject the mapping. - if (Param.ParamPos >= Signature->getNumParams()) - return std::nullopt; Type *PTy = Signature->getParamType(Param.ParamPos); std::optional<ElementCount> EC = getElementCountForTy(ISA, PTy); @@ -427,6 +423,11 @@ std::optional<VFInfo> VFABI::tryDemangleForVFABI(StringRef MangledName, if (Parameters.empty()) return std::nullopt; + // If the number of arguments of the scalar function does not match the + // vector variant we have just demangled then reject the mapping. + if (Parameters.size() != FTy->getNumParams()) + return std::nullopt; + // Figure out the number of lanes in vectors for this function variant. This // is easy for fixed length, as the vlen encoding just gives us the value // directly. However, if the vlen mangling indicated that this function diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp index 16d78c1ded6d..940ae9eb7ee2 100644 --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -485,7 +485,8 @@ bool llvm::isAssumeLikeIntrinsic(const Instruction *I) { bool llvm::isValidAssumeForContext(const Instruction *Inv, const Instruction *CxtI, - const DominatorTree *DT) { + const DominatorTree *DT, + bool AllowEphemerals) { // There are two restrictions on the use of an assume: // 1. The assume must dominate the context (or the control flow must // reach the assume whenever it reaches the context). @@ -503,7 +504,7 @@ bool llvm::isValidAssumeForContext(const Instruction *Inv, // Don't let an assume affect itself - this would cause the problems // `isEphemeralValueOf` is trying to prevent, and it would also make // the loop below go out of bounds. - if (Inv == CxtI) + if (!AllowEphemerals && Inv == CxtI) return false; // The context comes first, but they're both in the same block. @@ -516,7 +517,7 @@ bool llvm::isValidAssumeForContext(const Instruction *Inv, if (!isGuaranteedToTransferExecutionToSuccessor(Range, 15)) return false; - return !isEphemeralValueOf(Inv, CxtI); + return AllowEphemerals || !isEphemeralValueOf(Inv, CxtI); } // Inv and CxtI are in different blocks. @@ -1196,7 +1197,7 @@ static void computeKnownBitsFromOperator(const Operator *I, unsigned IndexBitWidth = Index->getType()->getScalarSizeInBits(); KnownBits IndexBits(IndexBitWidth); computeKnownBits(Index, IndexBits, Depth + 1, Q); - TypeSize IndexTypeSize = Q.DL.getTypeAllocSize(IndexedTy); + TypeSize IndexTypeSize = GTI.getSequentialElementStride(Q.DL); uint64_t TypeSizeInBytes = IndexTypeSize.getKnownMinValue(); KnownBits ScalingFactor(IndexBitWidth); // Multiply by current sizeof type. @@ -2128,7 +2129,7 @@ static bool isGEPKnownNonNull(const GEPOperator *GEP, unsigned Depth, } // If we have a zero-sized type, the index doesn't matter. Keep looping. - if (Q.DL.getTypeAllocSize(GTI.getIndexedType()).isZero()) + if (GTI.getSequentialElementStride(Q.DL).isZero()) continue; // Fast path the constant operand case both for efficiency and so we don't diff --git a/llvm/lib/BinaryFormat/AMDGPUMetadataVerifier.cpp b/llvm/lib/BinaryFormat/AMDGPUMetadataVerifier.cpp index dda3380c04ea..33eed07c4629 100644 --- a/llvm/lib/BinaryFormat/AMDGPUMetadataVerifier.cpp +++ b/llvm/lib/BinaryFormat/AMDGPUMetadataVerifier.cpp @@ -134,6 +134,7 @@ bool MetadataVerifier::verifyKernelArgs(msgpack::DocNode &Node) { .Case("hidden_default_queue", true) .Case("hidden_completion_action", true) .Case("hidden_multigrid_sync_arg", true) + .Case("hidden_dynamic_lds_size", true) .Case("hidden_private_base", true) .Case("hidden_shared_base", true) .Case("hidden_queue_ptr", true) diff --git a/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp b/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp index 30ea7eef3a12..b72c17aa6f54 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp @@ -182,11 +182,7 @@ public: /// Class responsible for emitting a DWARF v5 Accelerator Table. The only /// public function is emit(), which performs the actual emission. /// -/// The class is templated in its data type. This allows us to emit both dyamic -/// and static data entries. A callback abstract the logic to provide a CU -/// index for a given entry, which is different per data type, but identical -/// for every entry in the same table. -template <typename DataT> +/// A callback abstracts the logic to provide a CU index for a given entry. class Dwarf5AccelTableWriter : public AccelTableWriter { struct Header { uint16_t Version = 5; @@ -216,7 +212,7 @@ class Dwarf5AccelTableWriter : public AccelTableWriter { ArrayRef<std::variant<MCSymbol *, uint64_t>> CompUnits; ArrayRef<std::variant<MCSymbol *, uint64_t>> TypeUnits; llvm::function_ref<std::optional<DWARF5AccelTable::UnitIndexAndEncoding>( - const DataT &)> + const DWARF5AccelTableData &)> getIndexForEntry; MCSymbol *ContributionEnd = nullptr; MCSymbol *AbbrevStart = Asm->createTempSymbol("names_abbrev_start"); @@ -232,7 +228,7 @@ class Dwarf5AccelTableWriter : public AccelTableWriter { void emitBuckets() const; void emitStringOffsets() const; void emitAbbrevs() const; - void emitEntry(const DataT &Entry) const; + void emitEntry(const DWARF5AccelTableData &Entry) const; void emitData() const; public: @@ -240,8 +236,8 @@ public: AsmPrinter *Asm, const AccelTableBase &Contents, ArrayRef<std::variant<MCSymbol *, uint64_t>> CompUnits, ArrayRef<std::variant<MCSymbol *, uint64_t>> TypeUnits, - llvm::function_ref< - std::optional<DWARF5AccelTable::UnitIndexAndEncoding>(const DataT &)> + llvm::function_ref<std::optional<DWARF5AccelTable::UnitIndexAndEncoding>( + const DWARF5AccelTableData &)> getIndexForEntry, bool IsSplitDwarf); @@ -346,8 +342,8 @@ void AppleAccelTableWriter::emitData() const { Asm->emitDwarfStringOffset(Hash->Name); Asm->OutStreamer->AddComment("Num DIEs"); Asm->emitInt32(Hash->Values.size()); - for (const auto *V : Hash->Values) - static_cast<const AppleAccelTableData *>(V)->emit(Asm); + for (const auto *V : Hash->getValues<const AppleAccelTableData *>()) + V->emit(Asm); PrevHash = Hash->HashValue; } // Emit the final end marker for the bucket. @@ -370,8 +366,7 @@ DWARF5AccelTableData::DWARF5AccelTableData(const DIE &Die, const bool IsTU) : OffsetVal(&Die), DieTag(Die.getTag()), UnitID(UnitID), IsTU(IsTU) {} -template <typename DataT> -void Dwarf5AccelTableWriter<DataT>::Header::emit(Dwarf5AccelTableWriter &Ctx) { +void Dwarf5AccelTableWriter::Header::emit(Dwarf5AccelTableWriter &Ctx) { assert(CompUnitCount > 0 && "Index must have at least one CU."); AsmPrinter *Asm = Ctx.Asm; @@ -417,14 +412,13 @@ static uint32_t constructAbbreviationTag( AbbrvTag |= Tag << LowerBitSize; return AbbrvTag; } -template <typename DataT> -void Dwarf5AccelTableWriter<DataT>::populateAbbrevsMap() { +void Dwarf5AccelTableWriter::populateAbbrevsMap() { for (auto &Bucket : Contents.getBuckets()) { for (auto *Hash : Bucket) { - for (auto *Value : Hash->Values) { + for (auto *Value : Hash->getValues<DWARF5AccelTableData *>()) { std::optional<DWARF5AccelTable::UnitIndexAndEncoding> EntryRet = - getIndexForEntry(*static_cast<const DataT *>(Value)); - unsigned Tag = static_cast<const DataT *>(Value)->getDieTag(); + getIndexForEntry(*Value); + unsigned Tag = Value->getDieTag(); uint32_t AbbrvTag = constructAbbreviationTag(Tag, EntryRet); if (Abbreviations.count(AbbrvTag) == 0) { SmallVector<DWARF5AccelTableData::AttributeEncoding, 2> UA; @@ -438,8 +432,7 @@ void Dwarf5AccelTableWriter<DataT>::populateAbbrevsMap() { } } -template <typename DataT> -void Dwarf5AccelTableWriter<DataT>::emitCUList() const { +void Dwarf5AccelTableWriter::emitCUList() const { for (const auto &CU : enumerate(CompUnits)) { Asm->OutStreamer->AddComment("Compilation unit " + Twine(CU.index())); if (std::holds_alternative<MCSymbol *>(CU.value())) @@ -449,8 +442,7 @@ void Dwarf5AccelTableWriter<DataT>::emitCUList() const { } } -template <typename DataT> -void Dwarf5AccelTableWriter<DataT>::emitTUList() const { +void Dwarf5AccelTableWriter::emitTUList() const { for (const auto &TU : enumerate(TypeUnits)) { Asm->OutStreamer->AddComment("Type unit " + Twine(TU.index())); if (std::holds_alternative<MCSymbol *>(TU.value())) @@ -462,8 +454,7 @@ void Dwarf5AccelTableWriter<DataT>::emitTUList() const { } } -template <typename DataT> -void Dwarf5AccelTableWriter<DataT>::emitBuckets() const { +void Dwarf5AccelTableWriter::emitBuckets() const { uint32_t Index = 1; for (const auto &Bucket : enumerate(Contents.getBuckets())) { Asm->OutStreamer->AddComment("Bucket " + Twine(Bucket.index())); @@ -472,8 +463,7 @@ void Dwarf5AccelTableWriter<DataT>::emitBuckets() const { } } -template <typename DataT> -void Dwarf5AccelTableWriter<DataT>::emitStringOffsets() const { +void Dwarf5AccelTableWriter::emitStringOffsets() const { for (const auto &Bucket : enumerate(Contents.getBuckets())) { for (auto *Hash : Bucket.value()) { DwarfStringPoolEntryRef String = Hash->Name; @@ -484,8 +474,7 @@ void Dwarf5AccelTableWriter<DataT>::emitStringOffsets() const { } } -template <typename DataT> -void Dwarf5AccelTableWriter<DataT>::emitAbbrevs() const { +void Dwarf5AccelTableWriter::emitAbbrevs() const { Asm->OutStreamer->emitLabel(AbbrevStart); for (const auto &Abbrev : Abbreviations) { Asm->OutStreamer->AddComment("Abbrev code"); @@ -506,8 +495,8 @@ void Dwarf5AccelTableWriter<DataT>::emitAbbrevs() const { Asm->OutStreamer->emitLabel(AbbrevEnd); } -template <typename DataT> -void Dwarf5AccelTableWriter<DataT>::emitEntry(const DataT &Entry) const { +void Dwarf5AccelTableWriter::emitEntry( + const DWARF5AccelTableData &Entry) const { std::optional<DWARF5AccelTable::UnitIndexAndEncoding> EntryRet = getIndexForEntry(Entry); uint32_t AbbrvTag = constructAbbreviationTag(Entry.getDieTag(), EntryRet); @@ -537,27 +526,26 @@ void Dwarf5AccelTableWriter<DataT>::emitEntry(const DataT &Entry) const { } } -template <typename DataT> void Dwarf5AccelTableWriter<DataT>::emitData() const { +void Dwarf5AccelTableWriter::emitData() const { Asm->OutStreamer->emitLabel(EntryPool); for (auto &Bucket : Contents.getBuckets()) { for (auto *Hash : Bucket) { // Remember to emit the label for our offset. Asm->OutStreamer->emitLabel(Hash->Sym); for (const auto *Value : Hash->Values) - emitEntry(*static_cast<const DataT *>(Value)); + emitEntry(*static_cast<const DWARF5AccelTableData *>(Value)); Asm->OutStreamer->AddComment("End of list: " + Hash->Name.getString()); Asm->emitInt8(0); } } } -template <typename DataT> -Dwarf5AccelTableWriter<DataT>::Dwarf5AccelTableWriter( +Dwarf5AccelTableWriter::Dwarf5AccelTableWriter( AsmPrinter *Asm, const AccelTableBase &Contents, ArrayRef<std::variant<MCSymbol *, uint64_t>> CompUnits, ArrayRef<std::variant<MCSymbol *, uint64_t>> TypeUnits, - llvm::function_ref< - std::optional<DWARF5AccelTable::UnitIndexAndEncoding>(const DataT &)> + llvm::function_ref<std::optional<DWARF5AccelTable::UnitIndexAndEncoding>( + const DWARF5AccelTableData &)> getIndexForEntry, bool IsSplitDwarf) : AccelTableWriter(Asm, Contents, false), @@ -570,7 +558,7 @@ Dwarf5AccelTableWriter<DataT>::Dwarf5AccelTableWriter( populateAbbrevsMap(); } -template <typename DataT> void Dwarf5AccelTableWriter<DataT>::emit() { +void Dwarf5AccelTableWriter::emit() { Header.emit(*this); emitCUList(); emitTUList(); @@ -635,7 +623,7 @@ void llvm::emitDWARF5AccelTable( DIEInteger::BestForm(/*IsSigned*/ false, CompUnits.size() - 1); dwarf::Form TUIndexForm = DIEInteger::BestForm(/*IsSigned*/ false, TypeUnits.size() - 1); - Dwarf5AccelTableWriter<DWARF5AccelTableData>( + Dwarf5AccelTableWriter( Asm, Contents, CompUnits, TypeUnits, [&](const DWARF5AccelTableData &Entry) -> std::optional<DWARF5AccelTable::UnitIndexAndEncoding> { @@ -667,8 +655,7 @@ void llvm::emitDWARF5AccelTable( getIndexForEntry) { std::vector<std::variant<MCSymbol *, uint64_t>> TypeUnits; Contents.finalize(Asm, "names"); - Dwarf5AccelTableWriter<DWARF5AccelTableData>(Asm, Contents, CUs, TypeUnits, - getIndexForEntry, false) + Dwarf5AccelTableWriter(Asm, Contents, CUs, TypeUnits, getIndexForEntry, false) .emit(); } diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 4dd27702786e..7df1c82bf357 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -40,6 +40,7 @@ #include "llvm/CodeGen/GCMetadataPrinter.h" #include "llvm/CodeGen/LazyMachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineBranchProbabilityInfo.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -140,6 +141,26 @@ static cl::opt<std::string> BasicBlockProfileDump( "performed with -basic-block-sections=labels. Enabling this " "flag during in-process ThinLTO is not supported.")); +// This is a replication of fields of object::PGOAnalysisMap::Features. It +// should match the order of the fields so that +// `object::PGOAnalysisMap::Features::decode(PgoAnalysisMapFeatures.getBits())` +// succeeds. +enum class PGOMapFeaturesEnum { + FuncEntryCount, + BBFreq, + BrProb, +}; +static cl::bits<PGOMapFeaturesEnum> PgoAnalysisMapFeatures( + "pgo-analysis-map", cl::Hidden, cl::CommaSeparated, + cl::values(clEnumValN(PGOMapFeaturesEnum::FuncEntryCount, + "func-entry-count", "Function Entry Count"), + clEnumValN(PGOMapFeaturesEnum::BBFreq, "bb-freq", + "Basic Block Frequency"), + clEnumValN(PGOMapFeaturesEnum::BrProb, "br-prob", + "Branch Probability")), + cl::desc("Enable extended information within the BBAddrMap that is " + "extracted from PGO related analysis.")); + const char DWARFGroupName[] = "dwarf"; const char DWARFGroupDescription[] = "DWARF Emission"; const char DbgTimerName[] = "emit"; @@ -428,6 +449,7 @@ void AsmPrinter::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired<MachineOptimizationRemarkEmitterPass>(); AU.addRequired<GCModuleInfo>(); AU.addRequired<LazyMachineBlockFrequencyInfoPass>(); + AU.addRequired<MachineBranchProbabilityInfo>(); } bool AsmPrinter::doInitialization(Module &M) { @@ -1379,7 +1401,8 @@ void AsmPrinter::emitBBAddrMapSection(const MachineFunction &MF) { uint8_t BBAddrMapVersion = OutStreamer->getContext().getBBAddrMapVersion(); OutStreamer->emitInt8(BBAddrMapVersion); OutStreamer->AddComment("feature"); - OutStreamer->emitInt8(0); + auto FeaturesBits = static_cast<uint8_t>(PgoAnalysisMapFeatures.getBits()); + OutStreamer->emitInt8(FeaturesBits); OutStreamer->AddComment("function address"); OutStreamer->emitSymbolValue(FunctionSymbol, getPointerSize()); OutStreamer->AddComment("number of basic blocks"); @@ -1409,6 +1432,51 @@ void AsmPrinter::emitBBAddrMapSection(const MachineFunction &MF) { OutStreamer->emitULEB128IntValue(getBBAddrMapMetadata(MBB)); PrevMBBEndSymbol = MBB.getEndSymbol(); } + + if (FeaturesBits != 0) { + assert(BBAddrMapVersion >= 2 && + "PGOAnalysisMap only supports version 2 or later"); + + auto FeatEnable = + cantFail(object::PGOAnalysisMap::Features::decode(FeaturesBits)); + + if (FeatEnable.FuncEntryCount) { + OutStreamer->AddComment("function entry count"); + auto MaybeEntryCount = MF.getFunction().getEntryCount(); + OutStreamer->emitULEB128IntValue( + MaybeEntryCount ? MaybeEntryCount->getCount() : 0); + } + const MachineBlockFrequencyInfo *MBFI = + FeatEnable.BBFreq + ? &getAnalysis<LazyMachineBlockFrequencyInfoPass>().getBFI() + : nullptr; + const MachineBranchProbabilityInfo *MBPI = + FeatEnable.BrProb ? &getAnalysis<MachineBranchProbabilityInfo>() + : nullptr; + + if (FeatEnable.BBFreq || FeatEnable.BrProb) { + for (const MachineBasicBlock &MBB : MF) { + if (FeatEnable.BBFreq) { + OutStreamer->AddComment("basic block frequency"); + OutStreamer->emitULEB128IntValue( + MBFI->getBlockFreq(&MBB).getFrequency()); + } + if (FeatEnable.BrProb) { + unsigned SuccCount = MBB.succ_size(); + OutStreamer->AddComment("basic block successor count"); + OutStreamer->emitULEB128IntValue(SuccCount); + for (const MachineBasicBlock *SuccMBB : MBB.successors()) { + OutStreamer->AddComment("successor BB ID"); + OutStreamer->emitULEB128IntValue(SuccMBB->getBBID()->BaseID); + OutStreamer->AddComment("successor branch probability"); + OutStreamer->emitULEB128IntValue( + MBPI->getEdgeProbability(&MBB, SuccMBB).getNumerator()); + } + } + } + } + } + OutStreamer->popSection(); } @@ -1934,8 +2002,14 @@ void AsmPrinter::emitFunctionBody() { // Emit section containing BB address offsets and their metadata, when // BB labels are requested for this function. Skip empty functions. - if (MF->hasBBLabels() && HasAnyRealCode) - emitBBAddrMapSection(*MF); + if (HasAnyRealCode) { + if (MF->hasBBLabels()) + emitBBAddrMapSection(*MF); + else if (PgoAnalysisMapFeatures.getBits() != 0) + MF->getContext().reportWarning( + SMLoc(), "pgo-analysis-map is enabled for function " + MF->getName() + + " but it does not have labels"); + } // Emit sections containing instruction and function PCs. emitPCSections(*MF); diff --git a/llvm/lib/CodeGen/BasicBlockPathCloning.cpp b/llvm/lib/CodeGen/BasicBlockPathCloning.cpp index 5d5f3c3da481..901542e8507b 100644 --- a/llvm/lib/CodeGen/BasicBlockPathCloning.cpp +++ b/llvm/lib/CodeGen/BasicBlockPathCloning.cpp @@ -196,7 +196,7 @@ class BasicBlockPathCloning : public MachineFunctionPass { public: static char ID; - BasicBlockSectionsProfileReader *BBSectionsProfileReader = nullptr; + BasicBlockSectionsProfileReaderWrapperPass *BBSectionsProfileReader = nullptr; BasicBlockPathCloning() : MachineFunctionPass(ID) { initializeBasicBlockPathCloningPass(*PassRegistry::getPassRegistry()); @@ -218,7 +218,7 @@ INITIALIZE_PASS_BEGIN( BasicBlockPathCloning, "bb-path-cloning", "Applies path clonings for the -basic-block-sections=list option", false, false) -INITIALIZE_PASS_DEPENDENCY(BasicBlockSectionsProfileReader) +INITIALIZE_PASS_DEPENDENCY(BasicBlockSectionsProfileReaderWrapperPass) INITIALIZE_PASS_END( BasicBlockPathCloning, "bb-path-cloning", "Applies path clonings for the -basic-block-sections=list option", false, @@ -230,13 +230,14 @@ bool BasicBlockPathCloning::runOnMachineFunction(MachineFunction &MF) { if (hasInstrProfHashMismatch(MF)) return false; - return ApplyCloning(MF, getAnalysis<BasicBlockSectionsProfileReader>() - .getClonePathsForFunction(MF.getName())); + return ApplyCloning(MF, + getAnalysis<BasicBlockSectionsProfileReaderWrapperPass>() + .getClonePathsForFunction(MF.getName())); } void BasicBlockPathCloning::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); - AU.addRequired<BasicBlockSectionsProfileReader>(); + AU.addRequired<BasicBlockSectionsProfileReaderWrapperPass>(); MachineFunctionPass::getAnalysisUsage(AU); } diff --git a/llvm/lib/CodeGen/BasicBlockSections.cpp b/llvm/lib/CodeGen/BasicBlockSections.cpp index 42997d2287d6..94b5a503fbd0 100644 --- a/llvm/lib/CodeGen/BasicBlockSections.cpp +++ b/llvm/lib/CodeGen/BasicBlockSections.cpp @@ -103,7 +103,7 @@ class BasicBlockSections : public MachineFunctionPass { public: static char ID; - BasicBlockSectionsProfileReader *BBSectionsProfileReader = nullptr; + BasicBlockSectionsProfileReaderWrapperPass *BBSectionsProfileReader = nullptr; BasicBlockSections() : MachineFunctionPass(ID) { initializeBasicBlockSectionsPass(*PassRegistry::getPassRegistry()); @@ -128,7 +128,7 @@ INITIALIZE_PASS_BEGIN( "Prepares for basic block sections, by splitting functions " "into clusters of basic blocks.", false, false) -INITIALIZE_PASS_DEPENDENCY(BasicBlockSectionsProfileReader) +INITIALIZE_PASS_DEPENDENCY(BasicBlockSectionsProfileReaderWrapperPass) INITIALIZE_PASS_END(BasicBlockSections, "bbsections-prepare", "Prepares for basic block sections, by splitting functions " "into clusters of basic blocks.", @@ -306,7 +306,7 @@ bool BasicBlockSections::runOnMachineFunction(MachineFunction &MF) { DenseMap<UniqueBBID, BBClusterInfo> FuncClusterInfo; if (BBSectionsType == BasicBlockSection::List) { auto [HasProfile, ClusterInfo] = - getAnalysis<BasicBlockSectionsProfileReader>() + getAnalysis<BasicBlockSectionsProfileReaderWrapperPass>() .getClusterInfoForFunction(MF.getName()); if (!HasProfile) return false; @@ -362,7 +362,7 @@ bool BasicBlockSections::runOnMachineFunction(MachineFunction &MF) { void BasicBlockSections::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); - AU.addRequired<BasicBlockSectionsProfileReader>(); + AU.addRequired<BasicBlockSectionsProfileReaderWrapperPass>(); MachineFunctionPass::getAnalysisUsage(AU); } diff --git a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp index 15b6f63e8632..79e42d9304df 100644 --- a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp +++ b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp @@ -30,8 +30,9 @@ using namespace llvm; -char BasicBlockSectionsProfileReader::ID = 0; -INITIALIZE_PASS(BasicBlockSectionsProfileReader, "bbsections-profile-reader", +char BasicBlockSectionsProfileReaderWrapperPass::ID = 0; +INITIALIZE_PASS(BasicBlockSectionsProfileReaderWrapperPass, + "bbsections-profile-reader", "Reads and parses a basic block sections profile.", false, false) @@ -395,11 +396,11 @@ Error BasicBlockSectionsProfileReader::ReadProfile() { } } -bool BasicBlockSectionsProfileReader::doInitialization(Module &M) { - if (!MBuf) +bool BasicBlockSectionsProfileReaderWrapperPass::doInitialization(Module &M) { + if (!BBSPR.MBuf) return false; // Get the function name to debug info filename mapping. - FunctionNameToDIFilename.clear(); + BBSPR.FunctionNameToDIFilename.clear(); for (const Function &F : M) { SmallString<128> DIFilename; if (F.isDeclaration()) @@ -411,15 +412,46 @@ bool BasicBlockSectionsProfileReader::doInitialization(Module &M) { DIFilename = sys::path::remove_leading_dotslash(CU->getFilename()); } [[maybe_unused]] bool inserted = - FunctionNameToDIFilename.try_emplace(F.getName(), DIFilename).second; + BBSPR.FunctionNameToDIFilename.try_emplace(F.getName(), DIFilename) + .second; assert(inserted); } - if (auto Err = ReadProfile()) + if (auto Err = BBSPR.ReadProfile()) report_fatal_error(std::move(Err)); return false; } -ImmutablePass * -llvm::createBasicBlockSectionsProfileReaderPass(const MemoryBuffer *Buf) { - return new BasicBlockSectionsProfileReader(Buf); +AnalysisKey BasicBlockSectionsProfileReaderAnalysis::Key; + +BasicBlockSectionsProfileReader +BasicBlockSectionsProfileReaderAnalysis::run(Function &F, + FunctionAnalysisManager &AM) { + return BasicBlockSectionsProfileReader(TM->getBBSectionsFuncListBuf()); +} + +bool BasicBlockSectionsProfileReaderWrapperPass::isFunctionHot( + StringRef FuncName) const { + return BBSPR.isFunctionHot(FuncName); +} + +std::pair<bool, SmallVector<BBClusterInfo>> +BasicBlockSectionsProfileReaderWrapperPass::getClusterInfoForFunction( + StringRef FuncName) const { + return BBSPR.getClusterInfoForFunction(FuncName); +} + +SmallVector<SmallVector<unsigned>> +BasicBlockSectionsProfileReaderWrapperPass::getClonePathsForFunction( + StringRef FuncName) const { + return BBSPR.getClonePathsForFunction(FuncName); +} + +BasicBlockSectionsProfileReader & +BasicBlockSectionsProfileReaderWrapperPass::getBBSPR() { + return BBSPR; +} + +ImmutablePass *llvm::createBasicBlockSectionsProfileReaderWrapperPass( + const MemoryBuffer *Buf) { + return new BasicBlockSectionsProfileReaderWrapperPass(Buf); } diff --git a/llvm/lib/CodeGen/CodeGen.cpp b/llvm/lib/CodeGen/CodeGen.cpp index 7b73a7b11ddf..418066452c17 100644 --- a/llvm/lib/CodeGen/CodeGen.cpp +++ b/llvm/lib/CodeGen/CodeGen.cpp @@ -30,7 +30,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) { initializeCFIFixupPass(Registry); initializeCFIInstrInserterPass(Registry); initializeCheckDebugMachineModulePass(Registry); - initializeCodeGenPreparePass(Registry); + initializeCodeGenPrepareLegacyPassPass(Registry); initializeDeadMachineInstructionElimPass(Registry); initializeDebugifyMachineModulePass(Registry); initializeDetectDeadLanesPass(Registry); diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp index 6e99fb133e26..b8bfb9742bfb 100644 --- a/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -12,6 +12,7 @@ // //===----------------------------------------------------------------------===// +#include "llvm/CodeGen/CodeGenPrepare.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" @@ -301,7 +302,8 @@ using ValueToSExts = MapVector<Value *, SExts>; class TypePromotionTransaction; -class CodeGenPrepare : public FunctionPass { +class CodeGenPrepare { + friend class CodeGenPrepareLegacyPass; const TargetMachine *TM = nullptr; const TargetSubtargetInfo *SubtargetInfo = nullptr; const TargetLowering *TLI = nullptr; @@ -365,6 +367,8 @@ class CodeGenPrepare : public FunctionPass { std::unique_ptr<DominatorTree> DT; public: + CodeGenPrepare(){}; + CodeGenPrepare(const TargetMachine *TM) : TM(TM){}; /// If encounter huge function, we need to limit the build time. bool IsHugeFunc = false; @@ -374,15 +378,7 @@ public: /// to insert such BB into FreshBBs for huge function. SmallSet<BasicBlock *, 32> FreshBBs; - static char ID; // Pass identification, replacement for typeid - - CodeGenPrepare() : FunctionPass(ID) { - initializeCodeGenPreparePass(*PassRegistry::getPassRegistry()); - } - - bool runOnFunction(Function &F) override; - - void releaseMemory() override { + void releaseMemory() { // Clear per function information. InsertedInsts.clear(); PromotedInsts.clear(); @@ -391,17 +387,7 @@ public: BFI.reset(); } - StringRef getPassName() const override { return "CodeGen Prepare"; } - - void getAnalysisUsage(AnalysisUsage &AU) const override { - // FIXME: When we can selectively preserve passes, preserve the domtree. - AU.addRequired<ProfileSummaryInfoWrapperPass>(); - AU.addRequired<TargetLibraryInfoWrapperPass>(); - AU.addRequired<TargetPassConfig>(); - AU.addRequired<TargetTransformInfoWrapperPass>(); - AU.addRequired<LoopInfoWrapperPass>(); - AU.addUsedIfAvailable<BasicBlockSectionsProfileReader>(); - } + bool run(Function &F, FunctionAnalysisManager &AM); private: template <typename F> @@ -488,45 +474,108 @@ private: bool combineToUSubWithOverflow(CmpInst *Cmp, ModifyDT &ModifiedDT); bool combineToUAddWithOverflow(CmpInst *Cmp, ModifyDT &ModifiedDT); void verifyBFIUpdates(Function &F); + bool _run(Function &F); +}; + +class CodeGenPrepareLegacyPass : public FunctionPass { +public: + static char ID; // Pass identification, replacement for typeid + + CodeGenPrepareLegacyPass() : FunctionPass(ID) { + initializeCodeGenPrepareLegacyPassPass(*PassRegistry::getPassRegistry()); + } + + bool runOnFunction(Function &F) override; + + StringRef getPassName() const override { return "CodeGen Prepare"; } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + // FIXME: When we can selectively preserve passes, preserve the domtree. + AU.addRequired<ProfileSummaryInfoWrapperPass>(); + AU.addRequired<TargetLibraryInfoWrapperPass>(); + AU.addRequired<TargetPassConfig>(); + AU.addRequired<TargetTransformInfoWrapperPass>(); + AU.addRequired<LoopInfoWrapperPass>(); + AU.addUsedIfAvailable<BasicBlockSectionsProfileReaderWrapperPass>(); + } }; } // end anonymous namespace -char CodeGenPrepare::ID = 0; +char CodeGenPrepareLegacyPass::ID = 0; -INITIALIZE_PASS_BEGIN(CodeGenPrepare, DEBUG_TYPE, +bool CodeGenPrepareLegacyPass::runOnFunction(Function &F) { + if (skipFunction(F)) + return false; + auto TM = &getAnalysis<TargetPassConfig>().getTM<TargetMachine>(); + CodeGenPrepare CGP(TM); + CGP.DL = &F.getParent()->getDataLayout(); + CGP.SubtargetInfo = TM->getSubtargetImpl(F); + CGP.TLI = CGP.SubtargetInfo->getTargetLowering(); + CGP.TRI = CGP.SubtargetInfo->getRegisterInfo(); + CGP.TLInfo = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F); + CGP.TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); + CGP.LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); + CGP.BPI.reset(new BranchProbabilityInfo(F, *CGP.LI)); + CGP.BFI.reset(new BlockFrequencyInfo(F, *CGP.BPI, *CGP.LI)); + CGP.PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(); + auto BBSPRWP = + getAnalysisIfAvailable<BasicBlockSectionsProfileReaderWrapperPass>(); + CGP.BBSectionsProfileReader = BBSPRWP ? &BBSPRWP->getBBSPR() : nullptr; + + return CGP._run(F); +} + +INITIALIZE_PASS_BEGIN(CodeGenPrepareLegacyPass, DEBUG_TYPE, "Optimize for code generation", false, false) -INITIALIZE_PASS_DEPENDENCY(BasicBlockSectionsProfileReader) +INITIALIZE_PASS_DEPENDENCY(BasicBlockSectionsProfileReaderWrapperPass) INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) -INITIALIZE_PASS_END(CodeGenPrepare, DEBUG_TYPE, "Optimize for code generation", - false, false) +INITIALIZE_PASS_END(CodeGenPrepareLegacyPass, DEBUG_TYPE, + "Optimize for code generation", false, false) -FunctionPass *llvm::createCodeGenPreparePass() { return new CodeGenPrepare(); } +FunctionPass *llvm::createCodeGenPrepareLegacyPass() { + return new CodeGenPrepareLegacyPass(); +} -bool CodeGenPrepare::runOnFunction(Function &F) { - if (skipFunction(F)) - return false; +PreservedAnalyses CodeGenPreparePass::run(Function &F, + FunctionAnalysisManager &AM) { + CodeGenPrepare CGP(TM); - DL = &F.getParent()->getDataLayout(); + bool Changed = CGP.run(F, AM); + if (!Changed) + return PreservedAnalyses::all(); - bool EverMadeChange = false; + PreservedAnalyses PA; + PA.preserve<TargetLibraryAnalysis>(); + PA.preserve<TargetIRAnalysis>(); + PA.preserve<LoopAnalysis>(); + return PA; +} - TM = &getAnalysis<TargetPassConfig>().getTM<TargetMachine>(); +bool CodeGenPrepare::run(Function &F, FunctionAnalysisManager &AM) { + DL = &F.getParent()->getDataLayout(); SubtargetInfo = TM->getSubtargetImpl(F); TLI = SubtargetInfo->getTargetLowering(); TRI = SubtargetInfo->getRegisterInfo(); - TLInfo = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F); - TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); - LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); + TLInfo = &AM.getResult<TargetLibraryAnalysis>(F); + TTI = &AM.getResult<TargetIRAnalysis>(F); + LI = &AM.getResult<LoopAnalysis>(F); BPI.reset(new BranchProbabilityInfo(F, *LI)); BFI.reset(new BlockFrequencyInfo(F, *BPI, *LI)); - PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(); + auto &MAMProxy = AM.getResult<ModuleAnalysisManagerFunctionProxy>(F); + PSI = MAMProxy.getCachedResult<ProfileSummaryAnalysis>(*F.getParent()); BBSectionsProfileReader = - getAnalysisIfAvailable<BasicBlockSectionsProfileReader>(); + AM.getCachedResult<BasicBlockSectionsProfileReaderAnalysis>(F); + return _run(F); +} + +bool CodeGenPrepare::_run(Function &F) { + bool EverMadeChange = false; + OptSize = F.hasOptSize(); // Use the basic-block-sections profile to promote hot functions to .text.hot // if requested. @@ -4776,7 +4825,7 @@ bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode, cast<ConstantInt>(AddrInst->getOperand(i))->getZExtValue(); ConstantOffset += SL->getElementOffset(Idx); } else { - TypeSize TS = DL.getTypeAllocSize(GTI.getIndexedType()); + TypeSize TS = GTI.getSequentialElementStride(DL); if (TS.isNonZero()) { // The optimisations below currently only work for fixed offsets. if (TS.isScalable()) diff --git a/llvm/lib/CodeGen/GCRootLowering.cpp b/llvm/lib/CodeGen/GCRootLowering.cpp index c0ce37091933..894ab9a0486a 100644 --- a/llvm/lib/CodeGen/GCRootLowering.cpp +++ b/llvm/lib/CodeGen/GCRootLowering.cpp @@ -27,6 +27,15 @@ using namespace llvm; +/// Lower barriers out of existence (if the associated GCStrategy hasn't +/// already done so...), and insert initializing stores to roots as a defensive +/// measure. Given we're going to report all roots live at all safepoints, we +/// need to be able to ensure each root has been initialized by the point the +/// first safepoint is reached. This really should have been done by the +/// frontend, but the old API made this non-obvious, so we do a potentially +/// redundant store just in case. +static bool DoLowering(Function &F, GCStrategy &S); + namespace { /// LowerIntrinsics - This pass rewrites calls to the llvm.gcread or @@ -34,8 +43,6 @@ namespace { /// directed by the GCStrategy. It also performs automatic root initialization /// and custom intrinsic lowering. class LowerIntrinsics : public FunctionPass { - bool DoLowering(Function &F, GCStrategy &S); - public: static char ID; @@ -72,6 +79,19 @@ public: }; } +PreservedAnalyses GCLoweringPass::run(Function &F, + FunctionAnalysisManager &FAM) { + auto &Info = FAM.getResult<GCFunctionAnalysis>(F); + + bool Changed = DoLowering(F, Info.getStrategy()); + + if (!Changed) + return PreservedAnalyses::all(); + PreservedAnalyses PA; + PA.preserve<DominatorTreeAnalysis>(); + return PA; +} + // ----------------------------------------------------------------------------- INITIALIZE_PASS_BEGIN(LowerIntrinsics, "gc-lowering", "GC Lowering", false, @@ -178,14 +198,7 @@ bool LowerIntrinsics::runOnFunction(Function &F) { return DoLowering(F, S); } -/// Lower barriers out of existance (if the associated GCStrategy hasn't -/// already done so...), and insert initializing stores to roots as a defensive -/// measure. Given we're going to report all roots live at all safepoints, we -/// need to be able to ensure each root has been initialized by the point the -/// first safepoint is reached. This really should have been done by the -/// frontend, but the old API made this non-obvious, so we do a potentially -/// redundant store just in case. -bool LowerIntrinsics::DoLowering(Function &F, GCStrategy &S) { +bool DoLowering(Function &F, GCStrategy &S) { SmallVector<AllocaInst *, 32> Roots; bool MadeChange = false; diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp index bea29642cd00..6708f2baa5ed 100644 --- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -751,16 +751,91 @@ bool IRTranslator::translateSwitch(const User &U, MachineIRBuilder &MIB) { auto DefaultProb = getEdgeProbability(SwitchMBB, DefaultMBB); WorkList.push_back({SwitchMBB, First, Last, nullptr, nullptr, DefaultProb}); - // FIXME: At the moment we don't do any splitting optimizations here like - // SelectionDAG does, so this worklist only has one entry. while (!WorkList.empty()) { SwitchWorkListItem W = WorkList.pop_back_val(); + + unsigned NumClusters = W.LastCluster - W.FirstCluster + 1; + // For optimized builds, lower large range as a balanced binary tree. + if (NumClusters > 3 && + MF->getTarget().getOptLevel() != CodeGenOptLevel::None && + !DefaultMBB->getParent()->getFunction().hasMinSize()) { + splitWorkItem(WorkList, W, SI.getCondition(), SwitchMBB, MIB); + continue; + } + if (!lowerSwitchWorkItem(W, SI.getCondition(), SwitchMBB, DefaultMBB, MIB)) return false; } return true; } +void IRTranslator::splitWorkItem(SwitchCG::SwitchWorkList &WorkList, + const SwitchCG::SwitchWorkListItem &W, + Value *Cond, MachineBasicBlock *SwitchMBB, + MachineIRBuilder &MIB) { + using namespace SwitchCG; + assert(W.FirstCluster->Low->getValue().slt(W.LastCluster->Low->getValue()) && + "Clusters not sorted?"); + assert(W.LastCluster - W.FirstCluster + 1 >= 2 && "Too small to split!"); + + auto [LastLeft, FirstRight, LeftProb, RightProb] = + SL->computeSplitWorkItemInfo(W); + + // Use the first element on the right as pivot since we will make less-than + // comparisons against it. + CaseClusterIt PivotCluster = FirstRight; + assert(PivotCluster > W.FirstCluster); + assert(PivotCluster <= W.LastCluster); + + CaseClusterIt FirstLeft = W.FirstCluster; + CaseClusterIt LastRight = W.LastCluster; + + const ConstantInt *Pivot = PivotCluster->Low; + + // New blocks will be inserted immediately after the current one. + MachineFunction::iterator BBI(W.MBB); + ++BBI; + + // We will branch to the LHS if Value < Pivot. If LHS is a single cluster, + // we can branch to its destination directly if it's squeezed exactly in + // between the known lower bound and Pivot - 1. + MachineBasicBlock *LeftMBB; + if (FirstLeft == LastLeft && FirstLeft->Kind == CC_Range && + FirstLeft->Low == W.GE && + (FirstLeft->High->getValue() + 1LL) == Pivot->getValue()) { + LeftMBB = FirstLeft->MBB; + } else { + LeftMBB = FuncInfo.MF->CreateMachineBasicBlock(W.MBB->getBasicBlock()); + FuncInfo.MF->insert(BBI, LeftMBB); + WorkList.push_back( + {LeftMBB, FirstLeft, LastLeft, W.GE, Pivot, W.DefaultProb / 2}); + } + + // Similarly, we will branch to the RHS if Value >= Pivot. If RHS is a + // single cluster, RHS.Low == Pivot, and we can branch to its destination + // directly if RHS.High equals the current upper bound. + MachineBasicBlock *RightMBB; + if (FirstRight == LastRight && FirstRight->Kind == CC_Range && W.LT && + (FirstRight->High->getValue() + 1ULL) == W.LT->getValue()) { + RightMBB = FirstRight->MBB; + } else { + RightMBB = FuncInfo.MF->CreateMachineBasicBlock(W.MBB->getBasicBlock()); + FuncInfo.MF->insert(BBI, RightMBB); + WorkList.push_back( + {RightMBB, FirstRight, LastRight, Pivot, W.LT, W.DefaultProb / 2}); + } + + // Create the CaseBlock record that will be used to lower the branch. + CaseBlock CB(ICmpInst::Predicate::ICMP_SLT, false, Cond, Pivot, nullptr, + LeftMBB, RightMBB, W.MBB, MIB.getDebugLoc(), LeftProb, + RightProb); + + if (W.MBB == SwitchMBB) + emitSwitchCase(CB, SwitchMBB, MIB); + else + SL->SwitchCases.push_back(CB); +} + void IRTranslator::emitJumpTable(SwitchCG::JumpTable &JT, MachineBasicBlock *MBB) { // Emit the code for the jump table @@ -1545,7 +1620,7 @@ bool IRTranslator::translateGetElementPtr(const User &U, Offset += DL->getStructLayout(StTy)->getElementOffset(Field); continue; } else { - uint64_t ElementSize = DL->getTypeAllocSize(GTI.getIndexedType()); + uint64_t ElementSize = GTI.getSequentialElementStride(*DL); // If this is a scalar constant or a splat vector of constants, // handle it quickly. diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index 37e7153be572..def7f6ebeb01 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -25,6 +25,7 @@ #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RuntimeLibcalls.h" #include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetLowering.h" @@ -149,7 +150,8 @@ LegalizerHelper::legalizeInstrStep(MachineInstr &MI, return moreElementsVector(MI, Step.TypeIdx, Step.NewType); case Custom: LLVM_DEBUG(dbgs() << ".. Custom legalization\n"); - return LI.legalizeCustom(*this, MI) ? Legalized : UnableToLegalize; + return LI.legalizeCustom(*this, MI, LocObserver) ? Legalized + : UnableToLegalize; default: LLVM_DEBUG(dbgs() << ".. Unable to legalize\n"); return UnableToLegalize; @@ -531,6 +533,8 @@ static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) { RTLIBCASE(REM_F); case TargetOpcode::G_FPOW: RTLIBCASE(POW_F); + case TargetOpcode::G_FPOWI: + RTLIBCASE(POWI_F); case TargetOpcode::G_FMA: RTLIBCASE(FMA_F); case TargetOpcode::G_FSIN: @@ -567,7 +571,8 @@ static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) { /// True if an instruction is in tail position in its caller. Intended for /// legalizing libcalls as tail calls when possible. -static bool isLibCallInTailPosition(MachineInstr &MI, +static bool isLibCallInTailPosition(const CallLowering::ArgInfo &Result, + MachineInstr &MI, const TargetInstrInfo &TII, MachineRegisterInfo &MRI) { MachineBasicBlock &MBB = *MI.getParent(); @@ -596,17 +601,12 @@ static bool isLibCallInTailPosition(MachineInstr &MI, // RET_ReallyLR implicit $x0 auto Next = next_nodbg(MI.getIterator(), MBB.instr_end()); if (Next != MBB.instr_end() && Next->isCopy()) { - switch (MI.getOpcode()) { - default: - llvm_unreachable("unsupported opcode"); - case TargetOpcode::G_BZERO: + if (MI.getOpcode() == TargetOpcode::G_BZERO) return false; - case TargetOpcode::G_MEMCPY: - case TargetOpcode::G_MEMMOVE: - case TargetOpcode::G_MEMSET: - break; - } + // For MEMCPY/MOMMOVE/MEMSET these will be the first use (the dst), as the + // mempy/etc routines return the same parameter. For other it will be the + // returned value. Register VReg = MI.getOperand(0).getReg(); if (!VReg.isVirtual() || VReg != Next->getOperand(1).getReg()) return false; @@ -622,7 +622,7 @@ static bool isLibCallInTailPosition(MachineInstr &MI, if (Ret->getNumImplicitOperands() != 1) return false; - if (PReg != Ret->getOperand(0).getReg()) + if (!Ret->getOperand(0).isReg() || PReg != Ret->getOperand(0).getReg()) return false; // Skip over the COPY that we just validated. @@ -639,34 +639,64 @@ LegalizerHelper::LegalizeResult llvm::createLibcall(MachineIRBuilder &MIRBuilder, const char *Name, const CallLowering::ArgInfo &Result, ArrayRef<CallLowering::ArgInfo> Args, - const CallingConv::ID CC) { + const CallingConv::ID CC, LostDebugLocObserver &LocObserver, + MachineInstr *MI) { auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering(); CallLowering::CallLoweringInfo Info; Info.CallConv = CC; Info.Callee = MachineOperand::CreateES(Name); Info.OrigRet = Result; + if (MI) + Info.IsTailCall = + (Result.Ty->isVoidTy() || + Result.Ty == MIRBuilder.getMF().getFunction().getReturnType()) && + isLibCallInTailPosition(Result, *MI, MIRBuilder.getTII(), + *MIRBuilder.getMRI()); + std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs)); if (!CLI.lowerCall(MIRBuilder, Info)) return LegalizerHelper::UnableToLegalize; + if (MI && Info.LoweredTailCall) { + assert(Info.IsTailCall && "Lowered tail call when it wasn't a tail call?"); + + // Check debug locations before removing the return. + LocObserver.checkpoint(true); + + // We must have a return following the call (or debug insts) to get past + // isLibCallInTailPosition. + do { + MachineInstr *Next = MI->getNextNode(); + assert(Next && + (Next->isCopy() || Next->isReturn() || Next->isDebugInstr()) && + "Expected instr following MI to be return or debug inst?"); + // We lowered a tail call, so the call is now the return from the block. + // Delete the old return. + Next->eraseFromParent(); + } while (MI->getNextNode()); + + // We expect to lose the debug location from the return. + LocObserver.checkpoint(false); + } return LegalizerHelper::Legalized; } LegalizerHelper::LegalizeResult llvm::createLibcall(MachineIRBuilder &MIRBuilder, RTLIB::Libcall Libcall, const CallLowering::ArgInfo &Result, - ArrayRef<CallLowering::ArgInfo> Args) { + ArrayRef<CallLowering::ArgInfo> Args, + LostDebugLocObserver &LocObserver, MachineInstr *MI) { auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering(); const char *Name = TLI.getLibcallName(Libcall); const CallingConv::ID CC = TLI.getLibcallCallingConv(Libcall); - return createLibcall(MIRBuilder, Name, Result, Args, CC); + return createLibcall(MIRBuilder, Name, Result, Args, CC, LocObserver, MI); } // Useful for libcalls where all operands have the same type. static LegalizerHelper::LegalizeResult simpleLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size, - Type *OpType) { + Type *OpType, LostDebugLocObserver &LocObserver) { auto Libcall = getRTLibDesc(MI.getOpcode(), Size); // FIXME: What does the original arg index mean here? @@ -674,7 +704,8 @@ simpleLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size, for (const MachineOperand &MO : llvm::drop_begin(MI.operands())) Args.push_back({MO.getReg(), OpType, 0}); return createLibcall(MIRBuilder, Libcall, - {MI.getOperand(0).getReg(), OpType, 0}, Args); + {MI.getOperand(0).getReg(), OpType, 0}, Args, + LocObserver, &MI); } LegalizerHelper::LegalizeResult @@ -733,8 +764,9 @@ llvm::createMemLibcall(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI, Info.CallConv = TLI.getLibcallCallingConv(RTLibcall); Info.Callee = MachineOperand::CreateES(Name); Info.OrigRet = CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx), 0); - Info.IsTailCall = MI.getOperand(MI.getNumOperands() - 1).getImm() && - isLibCallInTailPosition(MI, MIRBuilder.getTII(), MRI); + Info.IsTailCall = + MI.getOperand(MI.getNumOperands() - 1).getImm() && + isLibCallInTailPosition(Info.OrigRet, MI, MIRBuilder.getTII(), MRI); std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs)); if (!CLI.lowerCall(MIRBuilder, Info)) @@ -765,6 +797,132 @@ llvm::createMemLibcall(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI, return LegalizerHelper::Legalized; } +static RTLIB::Libcall getOutlineAtomicLibcall(MachineInstr &MI) { + unsigned Opc = MI.getOpcode(); + auto &AtomicMI = cast<GMemOperation>(MI); + auto &MMO = AtomicMI.getMMO(); + auto Ordering = MMO.getMergedOrdering(); + LLT MemType = MMO.getMemoryType(); + uint64_t MemSize = MemType.getSizeInBytes(); + if (MemType.isVector()) + return RTLIB::UNKNOWN_LIBCALL; + +#define LCALLS(A, B) \ + { A##B##_RELAX, A##B##_ACQ, A##B##_REL, A##B##_ACQ_REL } +#define LCALL5(A) \ + LCALLS(A, 1), LCALLS(A, 2), LCALLS(A, 4), LCALLS(A, 8), LCALLS(A, 16) + switch (Opc) { + case TargetOpcode::G_ATOMIC_CMPXCHG: + case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: { + const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_CAS)}; + return getOutlineAtomicHelper(LC, Ordering, MemSize); + } + case TargetOpcode::G_ATOMICRMW_XCHG: { + const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_SWP)}; + return getOutlineAtomicHelper(LC, Ordering, MemSize); + } + case TargetOpcode::G_ATOMICRMW_ADD: + case TargetOpcode::G_ATOMICRMW_SUB: { + const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDADD)}; + return getOutlineAtomicHelper(LC, Ordering, MemSize); + } + case TargetOpcode::G_ATOMICRMW_AND: { + const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDCLR)}; + return getOutlineAtomicHelper(LC, Ordering, MemSize); + } + case TargetOpcode::G_ATOMICRMW_OR: { + const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDSET)}; + return getOutlineAtomicHelper(LC, Ordering, MemSize); + } + case TargetOpcode::G_ATOMICRMW_XOR: { + const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDEOR)}; + return getOutlineAtomicHelper(LC, Ordering, MemSize); + } + default: + return RTLIB::UNKNOWN_LIBCALL; + } +#undef LCALLS +#undef LCALL5 +} + +static LegalizerHelper::LegalizeResult +createAtomicLibcall(MachineIRBuilder &MIRBuilder, MachineInstr &MI) { + auto &Ctx = MIRBuilder.getMF().getFunction().getContext(); + + Type *RetTy; + SmallVector<Register> RetRegs; + SmallVector<CallLowering::ArgInfo, 3> Args; + unsigned Opc = MI.getOpcode(); + switch (Opc) { + case TargetOpcode::G_ATOMIC_CMPXCHG: + case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: { + Register Success; + LLT SuccessLLT; + auto [Ret, RetLLT, Mem, MemLLT, Cmp, CmpLLT, New, NewLLT] = + MI.getFirst4RegLLTs(); + RetRegs.push_back(Ret); + RetTy = IntegerType::get(Ctx, RetLLT.getSizeInBits()); + if (Opc == TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS) { + std::tie(Ret, RetLLT, Success, SuccessLLT, Mem, MemLLT, Cmp, CmpLLT, New, + NewLLT) = MI.getFirst5RegLLTs(); + RetRegs.push_back(Success); + RetTy = StructType::get( + Ctx, {RetTy, IntegerType::get(Ctx, SuccessLLT.getSizeInBits())}); + } + Args.push_back({Cmp, IntegerType::get(Ctx, CmpLLT.getSizeInBits()), 0}); + Args.push_back({New, IntegerType::get(Ctx, NewLLT.getSizeInBits()), 0}); + Args.push_back({Mem, PointerType::get(Ctx, MemLLT.getAddressSpace()), 0}); + break; + } + case TargetOpcode::G_ATOMICRMW_XCHG: + case TargetOpcode::G_ATOMICRMW_ADD: + case TargetOpcode::G_ATOMICRMW_SUB: + case TargetOpcode::G_ATOMICRMW_AND: + case TargetOpcode::G_ATOMICRMW_OR: + case TargetOpcode::G_ATOMICRMW_XOR: { + auto [Ret, RetLLT, Mem, MemLLT, Val, ValLLT] = MI.getFirst3RegLLTs(); + RetRegs.push_back(Ret); + RetTy = IntegerType::get(Ctx, RetLLT.getSizeInBits()); + if (Opc == TargetOpcode::G_ATOMICRMW_AND) + Val = + MIRBuilder.buildXor(ValLLT, MIRBuilder.buildConstant(ValLLT, -1), Val) + .getReg(0); + else if (Opc == TargetOpcode::G_ATOMICRMW_SUB) + Val = + MIRBuilder.buildSub(ValLLT, MIRBuilder.buildConstant(ValLLT, 0), Val) + .getReg(0); + Args.push_back({Val, IntegerType::get(Ctx, ValLLT.getSizeInBits()), 0}); + Args.push_back({Mem, PointerType::get(Ctx, MemLLT.getAddressSpace()), 0}); + break; + } + default: + llvm_unreachable("unsupported opcode"); + } + + auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering(); + auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering(); + RTLIB::Libcall RTLibcall = getOutlineAtomicLibcall(MI); + const char *Name = TLI.getLibcallName(RTLibcall); + + // Unsupported libcall on the target. + if (!Name) { + LLVM_DEBUG(dbgs() << ".. .. Could not find libcall name for " + << MIRBuilder.getTII().getName(Opc) << "\n"); + return LegalizerHelper::UnableToLegalize; + } + + CallLowering::CallLoweringInfo Info; + Info.CallConv = TLI.getLibcallCallingConv(RTLibcall); + Info.Callee = MachineOperand::CreateES(Name); + Info.OrigRet = CallLowering::ArgInfo(RetRegs, RetTy, 0); + + std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs)); + if (!CLI.lowerCall(MIRBuilder, Info)) + return LegalizerHelper::UnableToLegalize; + + return LegalizerHelper::Legalized; +} + static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType, Type *FromType) { auto ToMVT = MVT::getVT(ToType); @@ -789,11 +947,11 @@ static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType, static LegalizerHelper::LegalizeResult conversionLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, Type *ToType, - Type *FromType) { + Type *FromType, LostDebugLocObserver &LocObserver) { RTLIB::Libcall Libcall = getConvRTLibDesc(MI.getOpcode(), ToType, FromType); - return createLibcall(MIRBuilder, Libcall, - {MI.getOperand(0).getReg(), ToType, 0}, - {{MI.getOperand(1).getReg(), FromType, 0}}); + return createLibcall( + MIRBuilder, Libcall, {MI.getOperand(0).getReg(), ToType, 0}, + {{MI.getOperand(1).getReg(), FromType, 0}}, LocObserver, &MI); } static RTLIB::Libcall @@ -829,7 +987,8 @@ getStateLibraryFunctionFor(MachineInstr &MI, const TargetLowering &TLI) { // LegalizerHelper::LegalizeResult LegalizerHelper::createGetStateLibcall(MachineIRBuilder &MIRBuilder, - MachineInstr &MI) { + MachineInstr &MI, + LostDebugLocObserver &LocObserver) { const DataLayout &DL = MIRBuilder.getDataLayout(); auto &MF = MIRBuilder.getMF(); auto &MRI = *MIRBuilder.getMRI(); @@ -850,7 +1009,8 @@ LegalizerHelper::createGetStateLibcall(MachineIRBuilder &MIRBuilder, auto Res = createLibcall(MIRBuilder, RTLibcall, CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx), 0), - CallLowering::ArgInfo({Temp.getReg(0), StatePtrTy, 0})); + CallLowering::ArgInfo({Temp.getReg(0), StatePtrTy, 0}), + LocObserver, nullptr); if (Res != LegalizerHelper::Legalized) return Res; @@ -867,7 +1027,8 @@ LegalizerHelper::createGetStateLibcall(MachineIRBuilder &MIRBuilder, // content of memory region. LegalizerHelper::LegalizeResult LegalizerHelper::createSetStateLibcall(MachineIRBuilder &MIRBuilder, - MachineInstr &MI) { + MachineInstr &MI, + LostDebugLocObserver &LocObserver) { const DataLayout &DL = MIRBuilder.getDataLayout(); auto &MF = MIRBuilder.getMF(); auto &MRI = *MIRBuilder.getMRI(); @@ -892,7 +1053,8 @@ LegalizerHelper::createSetStateLibcall(MachineIRBuilder &MIRBuilder, RTLIB::Libcall RTLibcall = getStateLibraryFunctionFor(MI, TLI); return createLibcall(MIRBuilder, RTLibcall, CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx), 0), - CallLowering::ArgInfo({Temp.getReg(0), StatePtrTy, 0})); + CallLowering::ArgInfo({Temp.getReg(0), StatePtrTy, 0}), + LocObserver, nullptr); } // The function is used to legalize operations that set default environment @@ -902,7 +1064,8 @@ LegalizerHelper::createSetStateLibcall(MachineIRBuilder &MIRBuilder, // it is not true, the target must provide custom lowering. LegalizerHelper::LegalizeResult LegalizerHelper::createResetStateLibcall(MachineIRBuilder &MIRBuilder, - MachineInstr &MI) { + MachineInstr &MI, + LostDebugLocObserver &LocObserver) { const DataLayout &DL = MIRBuilder.getDataLayout(); auto &MF = MIRBuilder.getMF(); auto &Ctx = MF.getFunction().getContext(); @@ -919,7 +1082,8 @@ LegalizerHelper::createResetStateLibcall(MachineIRBuilder &MIRBuilder, RTLIB::Libcall RTLibcall = getStateLibraryFunctionFor(MI, TLI); return createLibcall(MIRBuilder, RTLibcall, CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx), 0), - CallLowering::ArgInfo({ Dest.getReg(), StatePtrTy, 0})); + CallLowering::ArgInfo({Dest.getReg(), StatePtrTy, 0}), + LocObserver, &MI); } LegalizerHelper::LegalizeResult @@ -938,7 +1102,7 @@ LegalizerHelper::libcall(MachineInstr &MI, LostDebugLocObserver &LocObserver) { LLT LLTy = MRI.getType(MI.getOperand(0).getReg()); unsigned Size = LLTy.getSizeInBits(); Type *HLTy = IntegerType::get(Ctx, Size); - auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy); + auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy, LocObserver); if (Status != Legalized) return Status; break; @@ -974,7 +1138,28 @@ LegalizerHelper::libcall(MachineInstr &MI, LostDebugLocObserver &LocObserver) { LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n"); return UnableToLegalize; } - auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy); + auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy, LocObserver); + if (Status != Legalized) + return Status; + break; + } + case TargetOpcode::G_FPOWI: { + LLT LLTy = MRI.getType(MI.getOperand(0).getReg()); + unsigned Size = LLTy.getSizeInBits(); + Type *HLTy = getFloatTypeForLLT(Ctx, LLTy); + Type *ITy = IntegerType::get( + Ctx, MRI.getType(MI.getOperand(2).getReg()).getSizeInBits()); + if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) { + LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n"); + return UnableToLegalize; + } + auto Libcall = getRTLibDesc(MI.getOpcode(), Size); + std::initializer_list<CallLowering::ArgInfo> Args = { + {MI.getOperand(1).getReg(), HLTy, 0}, + {MI.getOperand(2).getReg(), ITy, 1}}; + LegalizeResult Status = + createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), HLTy, 0}, + Args, LocObserver, &MI); if (Status != Legalized) return Status; break; @@ -985,7 +1170,8 @@ LegalizerHelper::libcall(MachineInstr &MI, LostDebugLocObserver &LocObserver) { Type *ToTy = getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(0).getReg())); if (!FromTy || !ToTy) return UnableToLegalize; - LegalizeResult Status = conversionLibcall(MI, MIRBuilder, ToTy, FromTy ); + LegalizeResult Status = + conversionLibcall(MI, MIRBuilder, ToTy, FromTy, LocObserver); if (Status != Legalized) return Status; break; @@ -1000,7 +1186,8 @@ LegalizerHelper::libcall(MachineInstr &MI, LostDebugLocObserver &LocObserver) { LegalizeResult Status = conversionLibcall( MI, MIRBuilder, ToSize == 32 ? Type::getInt32Ty(Ctx) : Type::getInt64Ty(Ctx), - FromSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx)); + FromSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx), + LocObserver); if (Status != Legalized) return Status; break; @@ -1015,7 +1202,21 @@ LegalizerHelper::libcall(MachineInstr &MI, LostDebugLocObserver &LocObserver) { LegalizeResult Status = conversionLibcall( MI, MIRBuilder, ToSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx), - FromSize == 32 ? Type::getInt32Ty(Ctx) : Type::getInt64Ty(Ctx)); + FromSize == 32 ? Type::getInt32Ty(Ctx) : Type::getInt64Ty(Ctx), + LocObserver); + if (Status != Legalized) + return Status; + break; + } + case TargetOpcode::G_ATOMICRMW_XCHG: + case TargetOpcode::G_ATOMICRMW_ADD: + case TargetOpcode::G_ATOMICRMW_SUB: + case TargetOpcode::G_ATOMICRMW_AND: + case TargetOpcode::G_ATOMICRMW_OR: + case TargetOpcode::G_ATOMICRMW_XOR: + case TargetOpcode::G_ATOMIC_CMPXCHG: + case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: { + auto Status = createAtomicLibcall(MIRBuilder, MI); if (Status != Legalized) return Status; break; @@ -1032,19 +1233,20 @@ LegalizerHelper::libcall(MachineInstr &MI, LostDebugLocObserver &LocObserver) { return Result; } case TargetOpcode::G_GET_FPMODE: { - LegalizeResult Result = createGetStateLibcall(MIRBuilder, MI); + LegalizeResult Result = createGetStateLibcall(MIRBuilder, MI, LocObserver); if (Result != Legalized) return Result; break; } case TargetOpcode::G_SET_FPMODE: { - LegalizeResult Result = createSetStateLibcall(MIRBuilder, MI); + LegalizeResult Result = createSetStateLibcall(MIRBuilder, MI, LocObserver); if (Result != Legalized) return Result; break; } case TargetOpcode::G_RESET_FPMODE: { - LegalizeResult Result = createResetStateLibcall(MIRBuilder, MI); + LegalizeResult Result = + createResetStateLibcall(MIRBuilder, MI, LocObserver); if (Result != Legalized) return Result; break; @@ -2831,6 +3033,7 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { return Legalized; } case TargetOpcode::G_VECREDUCE_FADD: + case TargetOpcode::G_VECREDUCE_FMUL: case TargetOpcode::G_VECREDUCE_FMIN: case TargetOpcode::G_VECREDUCE_FMAX: case TargetOpcode::G_VECREDUCE_FMINIMUM: @@ -4515,8 +4718,13 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, return fewerElementsVectorMultiEltType(GMI, NumElts, {2 /*imm*/}); GISEL_VECREDUCE_CASES_NONSEQ return fewerElementsVectorReductions(MI, TypeIdx, NarrowTy); + case TargetOpcode::G_VECREDUCE_SEQ_FADD: + case TargetOpcode::G_VECREDUCE_SEQ_FMUL: + return fewerElementsVectorSeqReductions(MI, TypeIdx, NarrowTy); case G_SHUFFLE_VECTOR: return fewerElementsVectorShuffle(MI, TypeIdx, NarrowTy); + case G_FPOWI: + return fewerElementsVectorMultiEltType(GMI, NumElts, {2 /*pow*/}); default: return UnableToLegalize; } @@ -4747,6 +4955,36 @@ LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorReductions( } LegalizerHelper::LegalizeResult +LegalizerHelper::fewerElementsVectorSeqReductions(MachineInstr &MI, + unsigned int TypeIdx, + LLT NarrowTy) { + auto [DstReg, DstTy, ScalarReg, ScalarTy, SrcReg, SrcTy] = + MI.getFirst3RegLLTs(); + if (!NarrowTy.isScalar() || TypeIdx != 2 || DstTy != ScalarTy || + DstTy != NarrowTy) + return UnableToLegalize; + + assert((MI.getOpcode() == TargetOpcode::G_VECREDUCE_SEQ_FADD || + MI.getOpcode() == TargetOpcode::G_VECREDUCE_SEQ_FMUL) && + "Unexpected vecreduce opcode"); + unsigned ScalarOpc = MI.getOpcode() == TargetOpcode::G_VECREDUCE_SEQ_FADD + ? TargetOpcode::G_FADD + : TargetOpcode::G_FMUL; + + SmallVector<Register> SplitSrcs; + unsigned NumParts = SrcTy.getNumElements(); + extractParts(SrcReg, NarrowTy, NumParts, SplitSrcs); + Register Acc = ScalarReg; + for (unsigned i = 0; i < NumParts; i++) + Acc = MIRBuilder.buildInstr(ScalarOpc, {NarrowTy}, {Acc, SplitSrcs[i]}) + .getReg(0); + + MIRBuilder.buildCopy(DstReg, Acc); + MI.eraseFromParent(); + return Legalized; +} + +LegalizerHelper::LegalizeResult LegalizerHelper::tryNarrowPow2Reduction(MachineInstr &MI, Register SrcReg, LLT SrcTy, LLT NarrowTy, unsigned ScalarOpc) { diff --git a/llvm/lib/CodeGen/NonRelocatableStringpool.cpp b/llvm/lib/CodeGen/NonRelocatableStringpool.cpp index 7304bfef55cb..e8391afb8e3f 100644 --- a/llvm/lib/CodeGen/NonRelocatableStringpool.cpp +++ b/llvm/lib/CodeGen/NonRelocatableStringpool.cpp @@ -12,9 +12,6 @@ namespace llvm { DwarfStringPoolEntryRef NonRelocatableStringpool::getEntry(StringRef S) { - if (S.empty() && !Strings.empty()) - return EmptyString; - if (Translator) S = Translator(S); auto I = Strings.insert({S, DwarfStringPoolEntry()}); diff --git a/llvm/lib/CodeGen/RegisterCoalescer.cpp b/llvm/lib/CodeGen/RegisterCoalescer.cpp index 3fbb93795075..cbb1a74049fb 100644 --- a/llvm/lib/CodeGen/RegisterCoalescer.cpp +++ b/llvm/lib/CodeGen/RegisterCoalescer.cpp @@ -305,11 +305,7 @@ namespace { /// number if it is not zero. If DstReg is a physical register and the /// existing subregister number of the def / use being updated is not zero, /// make sure to set it to the correct physical subregister. - /// - /// If \p IsSubregToReg, we are coalescing a DstReg = SUBREG_TO_REG - /// SrcReg. This introduces an implicit-def of DstReg on coalesced users. - void updateRegDefsUses(Register SrcReg, Register DstReg, unsigned SubIdx, - bool IsSubregToReg); + void updateRegDefsUses(Register SrcReg, Register DstReg, unsigned SubIdx); /// If the given machine operand reads only undefined lanes add an undef /// flag. @@ -1347,7 +1343,8 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP, if (DstReg.isPhysical()) { Register NewDstReg = DstReg; - unsigned NewDstIdx = TRI->composeSubRegIndices(CP.getSrcIdx(), DefSubIdx); + unsigned NewDstIdx = TRI->composeSubRegIndices(CP.getSrcIdx(), + DefMI->getOperand(0).getSubReg()); if (NewDstIdx) NewDstReg = TRI->getSubReg(DstReg, NewDstIdx); @@ -1496,7 +1493,7 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP, MRI->setRegClass(DstReg, NewRC); // Update machine operands and add flags. - updateRegDefsUses(DstReg, DstReg, DstIdx, false); + updateRegDefsUses(DstReg, DstReg, DstIdx); NewMI.getOperand(0).setSubReg(NewIdx); // updateRegDefUses can add an "undef" flag to the definition, since // it will replace DstReg with DstReg.DstIdx. If NewIdx is 0, make @@ -1816,7 +1813,7 @@ void RegisterCoalescer::addUndefFlag(const LiveInterval &Int, SlotIndex UseIdx, } void RegisterCoalescer::updateRegDefsUses(Register SrcReg, Register DstReg, - unsigned SubIdx, bool IsSubregToReg) { + unsigned SubIdx) { bool DstIsPhys = DstReg.isPhysical(); LiveInterval *DstInt = DstIsPhys ? nullptr : &LIS->getInterval(DstReg); @@ -1856,8 +1853,6 @@ void RegisterCoalescer::updateRegDefsUses(Register SrcReg, Register DstReg, if (DstInt && !Reads && SubIdx && !UseMI->isDebugInstr()) Reads = DstInt->liveAt(LIS->getInstructionIndex(*UseMI)); - bool FullDef = true; - // Replace SrcReg with DstReg in all UseMI operands. for (unsigned i = 0, e = Ops.size(); i != e; ++i) { MachineOperand &MO = UseMI->getOperand(Ops[i]); @@ -1865,13 +1860,9 @@ void RegisterCoalescer::updateRegDefsUses(Register SrcReg, Register DstReg, // Adjust <undef> flags in case of sub-register joins. We don't want to // turn a full def into a read-modify-write sub-register def and vice // versa. - if (SubIdx && MO.isDef()) { + if (SubIdx && MO.isDef()) MO.setIsUndef(!Reads); - if (!Reads) - FullDef = false; - } - // A subreg use of a partially undef (super) register may be a complete // undef use now and then has to be marked that way. if (MO.isUse() && !DstIsPhys) { @@ -1903,25 +1894,6 @@ void RegisterCoalescer::updateRegDefsUses(Register SrcReg, Register DstReg, MO.substVirtReg(DstReg, SubIdx, *TRI); } - if (IsSubregToReg && !FullDef) { - // If the coalesed instruction doesn't fully define the register, we need - // to preserve the original super register liveness for SUBREG_TO_REG. - // - // We pretended SUBREG_TO_REG was a regular copy for coalescing purposes, - // but it introduces liveness for other subregisters. Downstream users may - // have been relying on those bits, so we need to ensure their liveness is - // captured with a def of other lanes. - - // FIXME: Need to add new subrange if tracking subranges. We could also - // skip adding this if we knew the other lanes are dead, and only for - // other lanes. - - assert(!MRI->shouldTrackSubRegLiveness(DstReg) && - "this should update subranges"); - MachineInstrBuilder MIB(*MF, UseMI); - MIB.addReg(DstReg, RegState::ImplicitDefine); - } - LLVM_DEBUG({ dbgs() << "\t\tupdated: "; if (!UseMI->isDebugInstr()) @@ -2121,8 +2093,6 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) { }); } - const bool IsSubregToReg = CopyMI->isSubregToReg(); - ShrinkMask = LaneBitmask::getNone(); ShrinkMainRange = false; @@ -2190,12 +2160,9 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) { // Rewrite all SrcReg operands to DstReg. // Also update DstReg operands to include DstIdx if it is set. - if (CP.getDstIdx()) { - assert(!IsSubregToReg && "can this happen?"); - updateRegDefsUses(CP.getDstReg(), CP.getDstReg(), CP.getDstIdx(), false); - } - updateRegDefsUses(CP.getSrcReg(), CP.getDstReg(), CP.getSrcIdx(), - IsSubregToReg); + if (CP.getDstIdx()) + updateRegDefsUses(CP.getDstReg(), CP.getDstReg(), CP.getDstIdx()); + updateRegDefsUses(CP.getSrcReg(), CP.getDstReg(), CP.getSrcIdx()); // Shrink subregister ranges if necessary. if (ShrinkMask.any()) { diff --git a/llvm/lib/CodeGen/ReplaceWithVeclib.cpp b/llvm/lib/CodeGen/ReplaceWithVeclib.cpp index 893aa4a91828..56025aa5c45f 100644 --- a/llvm/lib/CodeGen/ReplaceWithVeclib.cpp +++ b/llvm/lib/CodeGen/ReplaceWithVeclib.cpp @@ -6,9 +6,9 @@ // //===----------------------------------------------------------------------===// // -// Replaces calls to LLVM vector intrinsics (i.e., calls to LLVM intrinsics -// with vector operands) with matching calls to functions from a vector -// library (e.g., libmvec, SVML) according to TargetLibraryInfo. +// Replaces LLVM IR instructions with vector operands (i.e., the frem +// instruction or calls to LLVM intrinsics) with matching calls to functions +// from a vector library (e.g libmvec, SVML) using TargetLibraryInfo interface. // //===----------------------------------------------------------------------===// @@ -69,88 +69,98 @@ Function *getTLIFunction(Module *M, FunctionType *VectorFTy, return TLIFunc; } -/// Replace the call to the vector intrinsic ( \p CalltoReplace ) with a call to -/// the corresponding function from the vector library ( \p TLIVecFunc ). -static void replaceWithTLIFunction(CallInst &CalltoReplace, VFInfo &Info, +/// Replace the instruction \p I with a call to the corresponding function from +/// the vector library (\p TLIVecFunc). +static void replaceWithTLIFunction(Instruction &I, VFInfo &Info, Function *TLIVecFunc) { - IRBuilder<> IRBuilder(&CalltoReplace); - SmallVector<Value *> Args(CalltoReplace.args()); + IRBuilder<> IRBuilder(&I); + auto *CI = dyn_cast<CallInst>(&I); + SmallVector<Value *> Args(CI ? CI->args() : I.operands()); if (auto OptMaskpos = Info.getParamIndexForOptionalMask()) { - auto *MaskTy = VectorType::get(Type::getInt1Ty(CalltoReplace.getContext()), - Info.Shape.VF); + auto *MaskTy = + VectorType::get(Type::getInt1Ty(I.getContext()), Info.Shape.VF); Args.insert(Args.begin() + OptMaskpos.value(), Constant::getAllOnesValue(MaskTy)); } - // Preserve the operand bundles. + // If it is a call instruction, preserve the operand bundles. SmallVector<OperandBundleDef, 1> OpBundles; - CalltoReplace.getOperandBundlesAsDefs(OpBundles); - CallInst *Replacement = IRBuilder.CreateCall(TLIVecFunc, Args, OpBundles); - CalltoReplace.replaceAllUsesWith(Replacement); + if (CI) + CI->getOperandBundlesAsDefs(OpBundles); + + auto *Replacement = IRBuilder.CreateCall(TLIVecFunc, Args, OpBundles); + I.replaceAllUsesWith(Replacement); // Preserve fast math flags for FP math. if (isa<FPMathOperator>(Replacement)) - Replacement->copyFastMathFlags(&CalltoReplace); + Replacement->copyFastMathFlags(&I); } -/// Returns true when successfully replaced \p CallToReplace with a suitable -/// function taking vector arguments, based on available mappings in the \p TLI. -/// Currently only works when \p CallToReplace is a call to vectorized -/// intrinsic. +/// Returns true when successfully replaced \p I with a suitable function taking +/// vector arguments, based on available mappings in the \p TLI. Currently only +/// works when \p I is a call to vectorized intrinsic or the frem instruction. static bool replaceWithCallToVeclib(const TargetLibraryInfo &TLI, - CallInst &CallToReplace) { - if (!CallToReplace.getCalledFunction()) - return false; + Instruction &I) { + // At the moment VFABI assumes the return type is always widened unless it is + // a void type. + auto *VTy = dyn_cast<VectorType>(I.getType()); + ElementCount EC(VTy ? VTy->getElementCount() : ElementCount::getFixed(0)); - auto IntrinsicID = CallToReplace.getCalledFunction()->getIntrinsicID(); - // Replacement is only performed for intrinsic functions. - if (IntrinsicID == Intrinsic::not_intrinsic) - return false; - - // Compute arguments types of the corresponding scalar call. Additionally - // checks if in the vector call, all vector operands have the same EC. - ElementCount VF = ElementCount::getFixed(0); - SmallVector<Type *> ScalarArgTypes; - for (auto Arg : enumerate(CallToReplace.args())) { - auto *ArgTy = Arg.value()->getType(); - if (isVectorIntrinsicWithScalarOpAtArg(IntrinsicID, Arg.index())) { - ScalarArgTypes.push_back(ArgTy); - } else if (auto *VectorArgTy = dyn_cast<VectorType>(ArgTy)) { - ScalarArgTypes.push_back(ArgTy->getScalarType()); - // Disallow vector arguments with different VFs. When processing the first - // vector argument, store it's VF, and for the rest ensure that they match - // it. - if (VF.isZero()) - VF = VectorArgTy->getElementCount(); - else if (VF != VectorArgTy->getElementCount()) + // Compute the argument types of the corresponding scalar call and the scalar + // function name. For calls, it additionally finds the function to replace + // and checks that all vector operands match the previously found EC. + SmallVector<Type *, 8> ScalarArgTypes; + std::string ScalarName; + Function *FuncToReplace = nullptr; + if (auto *CI = dyn_cast<CallInst>(&I)) { + FuncToReplace = CI->getCalledFunction(); + Intrinsic::ID IID = FuncToReplace->getIntrinsicID(); + assert(IID != Intrinsic::not_intrinsic && "Not an intrinsic"); + for (auto Arg : enumerate(CI->args())) { + auto *ArgTy = Arg.value()->getType(); + if (isVectorIntrinsicWithScalarOpAtArg(IID, Arg.index())) { + ScalarArgTypes.push_back(ArgTy); + } else if (auto *VectorArgTy = dyn_cast<VectorType>(ArgTy)) { + ScalarArgTypes.push_back(VectorArgTy->getElementType()); + // When return type is void, set EC to the first vector argument, and + // disallow vector arguments with different ECs. + if (EC.isZero()) + EC = VectorArgTy->getElementCount(); + else if (EC != VectorArgTy->getElementCount()) + return false; + } else + // Exit when it is supposed to be a vector argument but it isn't. return false; - } else - // Exit when it is supposed to be a vector argument but it isn't. + } + // Try to reconstruct the name for the scalar version of the instruction, + // using scalar argument types. + ScalarName = Intrinsic::isOverloaded(IID) + ? Intrinsic::getName(IID, ScalarArgTypes, I.getModule()) + : Intrinsic::getName(IID).str(); + } else { + assert(VTy && "Return type must be a vector"); + auto *ScalarTy = VTy->getScalarType(); + LibFunc Func; + if (!TLI.getLibFunc(I.getOpcode(), ScalarTy, Func)) return false; + ScalarName = TLI.getName(Func); + ScalarArgTypes = {ScalarTy, ScalarTy}; } - // Try to reconstruct the name for the scalar version of this intrinsic using - // the intrinsic ID and the argument types converted to scalar above. - std::string ScalarName = - (Intrinsic::isOverloaded(IntrinsicID) - ? Intrinsic::getName(IntrinsicID, ScalarArgTypes, - CallToReplace.getModule()) - : Intrinsic::getName(IntrinsicID).str()); - // Try to find the mapping for the scalar version of this intrinsic and the // exact vector width of the call operands in the TargetLibraryInfo. First, // check with a non-masked variant, and if that fails try with a masked one. const VecDesc *VD = - TLI.getVectorMappingInfo(ScalarName, VF, /*Masked*/ false); - if (!VD && !(VD = TLI.getVectorMappingInfo(ScalarName, VF, /*Masked*/ true))) + TLI.getVectorMappingInfo(ScalarName, EC, /*Masked*/ false); + if (!VD && !(VD = TLI.getVectorMappingInfo(ScalarName, EC, /*Masked*/ true))) return false; LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": Found TLI mapping from: `" << ScalarName - << "` and vector width " << VF << " to: `" + << "` and vector width " << EC << " to: `" << VD->getVectorFnName() << "`.\n"); // Replace the call to the intrinsic with a call to the vector library // function. - Type *ScalarRetTy = CallToReplace.getType()->getScalarType(); + Type *ScalarRetTy = I.getType()->getScalarType(); FunctionType *ScalarFTy = FunctionType::get(ScalarRetTy, ScalarArgTypes, /*isVarArg*/ false); const std::string MangledName = VD->getVectorFunctionABIVariantString(); @@ -162,27 +172,37 @@ static bool replaceWithCallToVeclib(const TargetLibraryInfo &TLI, if (!VectorFTy) return false; - Function *FuncToReplace = CallToReplace.getCalledFunction(); - Function *TLIFunc = getTLIFunction(CallToReplace.getModule(), VectorFTy, + Function *TLIFunc = getTLIFunction(I.getModule(), VectorFTy, VD->getVectorFnName(), FuncToReplace); - replaceWithTLIFunction(CallToReplace, *OptInfo, TLIFunc); - - LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": Replaced call to `" - << FuncToReplace->getName() << "` with call to `" - << TLIFunc->getName() << "`.\n"); + replaceWithTLIFunction(I, *OptInfo, TLIFunc); + LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": Replaced call to `" << ScalarName + << "` with call to `" << TLIFunc->getName() << "`.\n"); ++NumCallsReplaced; return true; } +/// Supported instruction \p I must be a vectorized frem or a call to an +/// intrinsic that returns either void or a vector. +static bool isSupportedInstruction(Instruction *I) { + Type *Ty = I->getType(); + if (auto *CI = dyn_cast<CallInst>(I)) + return (Ty->isVectorTy() || Ty->isVoidTy()) && CI->getCalledFunction() && + CI->getCalledFunction()->getIntrinsicID() != + Intrinsic::not_intrinsic; + if (I->getOpcode() == Instruction::FRem && Ty->isVectorTy()) + return true; + return false; +} + static bool runImpl(const TargetLibraryInfo &TLI, Function &F) { bool Changed = false; - SmallVector<CallInst *> ReplacedCalls; + SmallVector<Instruction *> ReplacedCalls; for (auto &I : instructions(F)) { - if (auto *CI = dyn_cast<CallInst>(&I)) { - if (replaceWithCallToVeclib(TLI, *CI)) { - ReplacedCalls.push_back(CI); - Changed = true; - } + if (!isSupportedInstruction(&I)) + continue; + if (replaceWithCallToVeclib(TLI, I)) { + ReplacedCalls.push_back(&I); + Changed = true; } } // Erase the calls to the intrinsics that have been replaced diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index eafa95ce7fcf..2327664516cc 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -7987,7 +7987,7 @@ SDValue DAGCombiner::visitOR(SDNode *N) { // If OR can be rewritten into ADD, try combines based on ADD. if ((!LegalOperations || TLI.isOperationLegal(ISD::ADD, VT)) && - DAG.haveNoCommonBitsSet(N0, N1)) + DAG.isADDLike(SDValue(N, 0))) if (SDValue Combined = visitADDLike(N)) return Combined; @@ -10055,7 +10055,11 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N1), VT, {N01, N1})) { SDValue Shl0 = DAG.getNode(ISD::SHL, SDLoc(N0), VT, N0.getOperand(0), N1); AddToWorklist(Shl0.getNode()); - return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, Shl0, Shl1); + SDNodeFlags Flags; + // Preserve the disjoint flag for Or. + if (N0.getOpcode() == ISD::OR && N0->getFlags().hasDisjoint()) + Flags.setDisjoint(true); + return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, Shl0, Shl1, Flags); } } @@ -14709,7 +14713,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { SDValue EltNo = N0->getOperand(1); if (isa<ConstantSDNode>(EltNo) && isTypeLegal(NVT)) { - int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue(); + int Elt = EltNo->getAsZExtVal(); int Index = isLE ? (Elt*SizeRatio) : (Elt*SizeRatio + (SizeRatio-1)); SDLoc DL(N); diff --git a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp index f3d8edb8926b..6d80b282a1ed 100644 --- a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -560,15 +560,13 @@ bool FastISel::selectGetElementPtr(const User *I) { } } } else { - Type *Ty = GTI.getIndexedType(); - // If this is a constant subscript, handle it quickly. if (const auto *CI = dyn_cast<ConstantInt>(Idx)) { if (CI->isZero()) continue; // N = N + Offset uint64_t IdxN = CI->getValue().sextOrTrunc(64).getSExtValue(); - TotalOffs += DL.getTypeAllocSize(Ty) * IdxN; + TotalOffs += GTI.getSequentialElementStride(DL) * IdxN; if (TotalOffs >= MaxOffs) { N = fastEmit_ri_(VT, ISD::ADD, N, TotalOffs, VT); if (!N) // Unhandled operand. Halt "fast" selection and bail. @@ -585,7 +583,7 @@ bool FastISel::selectGetElementPtr(const User *I) { } // N = N + Idx * ElementSize; - uint64_t ElementSize = DL.getTypeAllocSize(Ty); + uint64_t ElementSize = GTI.getSequentialElementStride(DL); Register IdxN = getRegForGEPIndex(Idx); if (!IdxN) // Unhandled operand. Halt "fast" selection and bail. return false; @@ -1182,6 +1180,184 @@ bool FastISel::selectCall(const User *I) { return lowerCall(Call); } +void FastISel::handleDbgInfo(const Instruction *II) { + if (!II->hasDbgValues()) + return; + + // Clear any metadata. + MIMD = MIMetadata(); + + // Reverse order of debug records, because fast-isel walks through backwards. + for (DPValue &DPV : llvm::reverse(II->getDbgValueRange())) { + flushLocalValueMap(); + recomputeInsertPt(); + + Value *V = nullptr; + if (!DPV.hasArgList()) + V = DPV.getVariableLocationOp(0); + + bool Res = false; + if (DPV.getType() == DPValue::LocationType::Value) { + Res = lowerDbgValue(V, DPV.getExpression(), DPV.getVariable(), + DPV.getDebugLoc()); + } else { + assert(DPV.getType() == DPValue::LocationType::Declare); + if (FuncInfo.PreprocessedDPVDeclares.contains(&DPV)) + continue; + Res = lowerDbgDeclare(V, DPV.getExpression(), DPV.getVariable(), + DPV.getDebugLoc()); + } + + if (!Res) + LLVM_DEBUG(dbgs() << "Dropping debug-info for " << DPV << "\n";); + } +} + +bool FastISel::lowerDbgValue(const Value *V, DIExpression *Expr, + DILocalVariable *Var, const DebugLoc &DL) { + // This form of DBG_VALUE is target-independent. + const MCInstrDesc &II = TII.get(TargetOpcode::DBG_VALUE); + if (!V || isa<UndefValue>(V)) { + // DI is either undef or cannot produce a valid DBG_VALUE, so produce an + // undef DBG_VALUE to terminate any prior location. + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, false, 0U, Var, Expr); + return true; + } + if (const auto *CI = dyn_cast<ConstantInt>(V)) { + // See if there's an expression to constant-fold. + if (Expr) + std::tie(Expr, CI) = Expr->constantFold(CI); + if (CI->getBitWidth() > 64) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) + .addCImm(CI) + .addImm(0U) + .addMetadata(Var) + .addMetadata(Expr); + else + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) + .addImm(CI->getZExtValue()) + .addImm(0U) + .addMetadata(Var) + .addMetadata(Expr); + return true; + } + if (const auto *CF = dyn_cast<ConstantFP>(V)) { + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) + .addFPImm(CF) + .addImm(0U) + .addMetadata(Var) + .addMetadata(Expr); + return true; + } + if (const auto *Arg = dyn_cast<Argument>(V); + Arg && Expr && Expr->isEntryValue()) { + // As per the Verifier, this case is only valid for swift async Args. + assert(Arg->hasAttribute(Attribute::AttrKind::SwiftAsync)); + + Register Reg = getRegForValue(Arg); + for (auto [PhysReg, VirtReg] : FuncInfo.RegInfo->liveins()) + if (Reg == VirtReg || Reg == PhysReg) { + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, false /*IsIndirect*/, + PhysReg, Var, Expr); + return true; + } + + LLVM_DEBUG(dbgs() << "Dropping dbg.value: expression is entry_value but " + "couldn't find a physical register\n"); + return false; + } + if (auto SI = FuncInfo.StaticAllocaMap.find(dyn_cast<AllocaInst>(V)); + SI != FuncInfo.StaticAllocaMap.end()) { + MachineOperand FrameIndexOp = MachineOperand::CreateFI(SI->second); + bool IsIndirect = false; + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, IsIndirect, FrameIndexOp, + Var, Expr); + return true; + } + if (Register Reg = lookUpRegForValue(V)) { + // FIXME: This does not handle register-indirect values at offset 0. + if (!FuncInfo.MF->useDebugInstrRef()) { + bool IsIndirect = false; + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, IsIndirect, Reg, Var, + Expr); + return true; + } + // If using instruction referencing, produce this as a DBG_INSTR_REF, + // to be later patched up by finalizeDebugInstrRefs. + SmallVector<MachineOperand, 1> MOs({MachineOperand::CreateReg( + /* Reg */ Reg, /* isDef */ false, /* isImp */ false, + /* isKill */ false, /* isDead */ false, + /* isUndef */ false, /* isEarlyClobber */ false, + /* SubReg */ 0, /* isDebug */ true)}); + SmallVector<uint64_t, 2> Ops({dwarf::DW_OP_LLVM_arg, 0}); + auto *NewExpr = DIExpression::prependOpcodes(Expr, Ops); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(TargetOpcode::DBG_INSTR_REF), /*IsIndirect*/ false, MOs, + Var, NewExpr); + return true; + } + return false; +} + +bool FastISel::lowerDbgDeclare(const Value *Address, DIExpression *Expr, + DILocalVariable *Var, const DebugLoc &DL) { + if (!Address || isa<UndefValue>(Address)) { + LLVM_DEBUG(dbgs() << "Dropping debug info (bad/undef address)\n"); + return false; + } + + std::optional<MachineOperand> Op; + if (Register Reg = lookUpRegForValue(Address)) + Op = MachineOperand::CreateReg(Reg, false); + + // If we have a VLA that has a "use" in a metadata node that's then used + // here but it has no other uses, then we have a problem. E.g., + // + // int foo (const int *x) { + // char a[*x]; + // return 0; + // } + // + // If we assign 'a' a vreg and fast isel later on has to use the selection + // DAG isel, it will want to copy the value to the vreg. However, there are + // no uses, which goes counter to what selection DAG isel expects. + if (!Op && !Address->use_empty() && isa<Instruction>(Address) && + (!isa<AllocaInst>(Address) || + !FuncInfo.StaticAllocaMap.count(cast<AllocaInst>(Address)))) + Op = MachineOperand::CreateReg(FuncInfo.InitializeRegForValue(Address), + false); + + if (Op) { + assert(Var->isValidLocationForIntrinsic(DL) && + "Expected inlined-at fields to agree"); + if (FuncInfo.MF->useDebugInstrRef() && Op->isReg()) { + // If using instruction referencing, produce this as a DBG_INSTR_REF, + // to be later patched up by finalizeDebugInstrRefs. Tack a deref onto + // the expression, we don't have an "indirect" flag in DBG_INSTR_REF. + SmallVector<uint64_t, 3> Ops( + {dwarf::DW_OP_LLVM_arg, 0, dwarf::DW_OP_deref}); + auto *NewExpr = DIExpression::prependOpcodes(Expr, Ops); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(TargetOpcode::DBG_INSTR_REF), /*IsIndirect*/ false, *Op, + Var, NewExpr); + return true; + } + + // A dbg.declare describes the address of a source variable, so lower it + // into an indirect DBG_VALUE. + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(TargetOpcode::DBG_VALUE), /*IsIndirect*/ true, *Op, Var, + Expr); + return true; + } + + // We can't yet handle anything else here because it would require + // generating code, thus altering codegen because of debug info. + LLVM_DEBUG( + dbgs() << "Dropping debug info (no materialized reg for address)\n"); + return false; +} + bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) { switch (II->getIntrinsicID()) { default: @@ -1211,153 +1387,28 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) { return true; const Value *Address = DI->getAddress(); - if (!Address || isa<UndefValue>(Address)) { - LLVM_DEBUG(dbgs() << "Dropping debug info for " << *DI - << " (bad/undef address)\n"); - return true; - } + if (!lowerDbgDeclare(Address, DI->getExpression(), DI->getVariable(), + MIMD.getDL())) + LLVM_DEBUG(dbgs() << "Dropping debug info for " << *DI); - std::optional<MachineOperand> Op; - if (Register Reg = lookUpRegForValue(Address)) - Op = MachineOperand::CreateReg(Reg, false); - - // If we have a VLA that has a "use" in a metadata node that's then used - // here but it has no other uses, then we have a problem. E.g., - // - // int foo (const int *x) { - // char a[*x]; - // return 0; - // } - // - // If we assign 'a' a vreg and fast isel later on has to use the selection - // DAG isel, it will want to copy the value to the vreg. However, there are - // no uses, which goes counter to what selection DAG isel expects. - if (!Op && !Address->use_empty() && isa<Instruction>(Address) && - (!isa<AllocaInst>(Address) || - !FuncInfo.StaticAllocaMap.count(cast<AllocaInst>(Address)))) - Op = MachineOperand::CreateReg(FuncInfo.InitializeRegForValue(Address), - false); - - if (Op) { - assert(DI->getVariable()->isValidLocationForIntrinsic(MIMD.getDL()) && - "Expected inlined-at fields to agree"); - if (FuncInfo.MF->useDebugInstrRef() && Op->isReg()) { - // If using instruction referencing, produce this as a DBG_INSTR_REF, - // to be later patched up by finalizeDebugInstrRefs. Tack a deref onto - // the expression, we don't have an "indirect" flag in DBG_INSTR_REF. - SmallVector<uint64_t, 3> Ops( - {dwarf::DW_OP_LLVM_arg, 0, dwarf::DW_OP_deref}); - auto *NewExpr = DIExpression::prependOpcodes(DI->getExpression(), Ops); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD.getDL(), - TII.get(TargetOpcode::DBG_INSTR_REF), /*IsIndirect*/ false, *Op, - DI->getVariable(), NewExpr); - } else { - // A dbg.declare describes the address of a source variable, so lower it - // into an indirect DBG_VALUE. - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD.getDL(), - TII.get(TargetOpcode::DBG_VALUE), /*IsIndirect*/ true, *Op, - DI->getVariable(), DI->getExpression()); - } - } else { - // We can't yet handle anything else here because it would require - // generating code, thus altering codegen because of debug info. - LLVM_DEBUG(dbgs() << "Dropping debug info for " << *DI - << " (no materialized reg for address)\n"); - } return true; } case Intrinsic::dbg_value: { // This form of DBG_VALUE is target-independent. const DbgValueInst *DI = cast<DbgValueInst>(II); - const MCInstrDesc &II = TII.get(TargetOpcode::DBG_VALUE); const Value *V = DI->getValue(); DIExpression *Expr = DI->getExpression(); DILocalVariable *Var = DI->getVariable(); + if (DI->hasArgList()) + // Signal that we don't have a location for this. + V = nullptr; + assert(Var->isValidLocationForIntrinsic(MIMD.getDL()) && "Expected inlined-at fields to agree"); - if (!V || isa<UndefValue>(V) || DI->hasArgList()) { - // DI is either undef or cannot produce a valid DBG_VALUE, so produce an - // undef DBG_VALUE to terminate any prior location. - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD.getDL(), II, false, 0U, - Var, Expr); - return true; - } - if (const auto *CI = dyn_cast<ConstantInt>(V)) { - // See if there's an expression to constant-fold. - if (Expr) - std::tie(Expr, CI) = Expr->constantFold(CI); - if (CI->getBitWidth() > 64) - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II) - .addCImm(CI) - .addImm(0U) - .addMetadata(Var) - .addMetadata(Expr); - else - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II) - .addImm(CI->getZExtValue()) - .addImm(0U) - .addMetadata(Var) - .addMetadata(Expr); - return true; - } - if (const auto *CF = dyn_cast<ConstantFP>(V)) { - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II) - .addFPImm(CF) - .addImm(0U) - .addMetadata(Var) - .addMetadata(Expr); - return true; - } - if (const auto *Arg = dyn_cast<Argument>(V); - Arg && Expr && Expr->isEntryValue()) { - // As per the Verifier, this case is only valid for swift async Args. - assert(Arg->hasAttribute(Attribute::AttrKind::SwiftAsync)); - - Register Reg = getRegForValue(Arg); - for (auto [PhysReg, VirtReg] : FuncInfo.RegInfo->liveins()) - if (Reg == VirtReg || Reg == PhysReg) { - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD.getDL(), II, - false /*IsIndirect*/, PhysReg, Var, Expr); - return true; - } - LLVM_DEBUG(dbgs() << "Dropping dbg.value: expression is entry_value but " - "couldn't find a physical register\n" - << *DI << "\n"); - return true; - } - if (auto SI = FuncInfo.StaticAllocaMap.find(dyn_cast<AllocaInst>(V)); - SI != FuncInfo.StaticAllocaMap.end()) { - MachineOperand FrameIndexOp = MachineOperand::CreateFI(SI->second); - bool IsIndirect = false; - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD.getDL(), II, IsIndirect, - FrameIndexOp, Var, Expr); - return true; - } - if (Register Reg = lookUpRegForValue(V)) { - // FIXME: This does not handle register-indirect values at offset 0. - if (!FuncInfo.MF->useDebugInstrRef()) { - bool IsIndirect = false; - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD.getDL(), II, IsIndirect, - Reg, Var, Expr); - return true; - } - // If using instruction referencing, produce this as a DBG_INSTR_REF, - // to be later patched up by finalizeDebugInstrRefs. - SmallVector<MachineOperand, 1> MOs({MachineOperand::CreateReg( - /* Reg */ Reg, /* isDef */ false, /* isImp */ false, - /* isKill */ false, /* isDead */ false, - /* isUndef */ false, /* isEarlyClobber */ false, - /* SubReg */ 0, /* isDebug */ true)}); - SmallVector<uint64_t, 2> Ops({dwarf::DW_OP_LLVM_arg, 0}); - auto *NewExpr = DIExpression::prependOpcodes(Expr, Ops); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD.getDL(), - TII.get(TargetOpcode::DBG_INSTR_REF), /*IsIndirect*/ false, MOs, - Var, NewExpr); - return true; - } - // We don't know how to handle other cases, so we drop. - LLVM_DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n"); + if (!lowerDbgValue(V, Expr, Var, MIMD.getDL())) + LLVM_DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n"); + return true; } case Intrinsic::dbg_label: { diff --git a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp index 34fa1f5a7ed1..032cff416cda 100644 --- a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -551,7 +551,7 @@ void InstrEmitter::EmitSubregNode(SDNode *Node, SDValue N0 = Node->getOperand(0); SDValue N1 = Node->getOperand(1); SDValue N2 = Node->getOperand(2); - unsigned SubIdx = cast<ConstantSDNode>(N2)->getZExtValue(); + unsigned SubIdx = N2->getAsZExtVal(); // Figure out the register class to create for the destreg. It should be // the largest legal register class supporting SubIdx sub-registers. @@ -650,7 +650,7 @@ void InstrEmitter::EmitRegSequence(SDNode *Node, // Skip physical registers as they don't have a vreg to get and we'll // insert copies for them in TwoAddressInstructionPass anyway. if (!R || !R->getReg().isPhysical()) { - unsigned SubIdx = cast<ConstantSDNode>(Op)->getZExtValue(); + unsigned SubIdx = Op->getAsZExtVal(); unsigned SubReg = getVR(Node->getOperand(i-1), VRBaseMap); const TargetRegisterClass *TRC = MRI->getRegClass(SubReg); const TargetRegisterClass *SRC = diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 4e317062cec4..296ed3a3c3dc 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -3199,7 +3199,16 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { return true; } break; - case ISD::FP_ROUND: + case ISD::FP_ROUND: { + EVT VT = Node->getValueType(0); + if (VT.getScalarType() == MVT::bf16) { + Results.push_back( + DAG.getNode(ISD::FP_TO_BF16, SDLoc(Node), VT, Node->getOperand(0))); + break; + } + + LLVM_FALLTHROUGH; + } case ISD::BITCAST: if ((Tmp1 = EmitStackConvert(Node->getOperand(0), Node->getValueType(0), Node->getValueType(0), dl))) @@ -3226,12 +3235,19 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { return true; } break; - case ISD::FP_EXTEND: - if ((Tmp1 = EmitStackConvert(Node->getOperand(0), - Node->getOperand(0).getValueType(), - Node->getValueType(0), dl))) + case ISD::FP_EXTEND: { + SDValue Op = Node->getOperand(0); + EVT SrcVT = Op.getValueType(); + EVT DstVT = Node->getValueType(0); + if (SrcVT.getScalarType() == MVT::bf16) { + Results.push_back(DAG.getNode(ISD::BF16_TO_FP, SDLoc(Node), DstVT, Op)); + break; + } + + if ((Tmp1 = EmitStackConvert(Op, SrcVT, DstVT, dl))) Results.push_back(Tmp1); break; + } case ISD::BF16_TO_FP: { // Always expand bf16 to f32 casts, they lower to ext + shift. // diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index 65919a64b806..589fec0e56f7 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -2181,6 +2181,24 @@ static ISD::NodeType GetPromotionOpcode(EVT OpVT, EVT RetVT) { report_fatal_error("Attempt at an invalid promotion-related conversion"); } +static ISD::NodeType GetPromotionOpcodeStrict(EVT OpVT, EVT RetVT) { + if (OpVT == MVT::f16) + return ISD::STRICT_FP16_TO_FP; + + if (RetVT == MVT::f16) + return ISD::STRICT_FP_TO_FP16; + + if (OpVT == MVT::bf16) { + // TODO: return ISD::STRICT_BF16_TO_FP; + } + + if (RetVT == MVT::bf16) { + // TODO: return ISD::STRICT_FP_TO_BF16; + } + + report_fatal_error("Attempt at an invalid promotion-related conversion"); +} + bool DAGTypeLegalizer::PromoteFloatOperand(SDNode *N, unsigned OpNo) { LLVM_DEBUG(dbgs() << "Promote float operand " << OpNo << ": "; N->dump(&DAG)); SDValue R = SDValue(); @@ -2281,7 +2299,7 @@ SDValue DAGTypeLegalizer::PromoteFloatOp_FP_EXTEND(SDNode *N, unsigned OpNo) { SDValue DAGTypeLegalizer::PromoteFloatOp_STRICT_FP_EXTEND(SDNode *N, unsigned OpNo) { - assert(OpNo == 1); + assert(OpNo == 1 && "Promoting unpromotable operand"); SDValue Op = GetPromotedFloat(N->getOperand(1)); EVT VT = N->getValueType(0); @@ -2416,6 +2434,9 @@ void DAGTypeLegalizer::PromoteFloatResult(SDNode *N, unsigned ResNo) { case ISD::FFREXP: R = PromoteFloatRes_FFREXP(N); break; case ISD::FP_ROUND: R = PromoteFloatRes_FP_ROUND(N); break; + case ISD::STRICT_FP_ROUND: + R = PromoteFloatRes_STRICT_FP_ROUND(N); + break; case ISD::LOAD: R = PromoteFloatRes_LOAD(N); break; case ISD::SELECT: R = PromoteFloatRes_SELECT(N); break; case ISD::SELECT_CC: R = PromoteFloatRes_SELECT_CC(N); break; @@ -2490,7 +2511,7 @@ SDValue DAGTypeLegalizer::PromoteFloatRes_EXTRACT_VECTOR_ELT(SDNode *N) { EVT VecVT = Vec->getValueType(0); EVT EltVT = VecVT.getVectorElementType(); - uint64_t IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue(); + uint64_t IdxVal = Idx->getAsZExtVal(); switch (getTypeAction(VecVT)) { default: break; @@ -2621,6 +2642,29 @@ SDValue DAGTypeLegalizer::PromoteFloatRes_FP_ROUND(SDNode *N) { return DAG.getNode(GetPromotionOpcode(VT, NVT), DL, NVT, Round); } +// Explicit operation to reduce precision. Reduce the value to half precision +// and promote it back to the legal type. +SDValue DAGTypeLegalizer::PromoteFloatRes_STRICT_FP_ROUND(SDNode *N) { + SDLoc DL(N); + + SDValue Chain = N->getOperand(0); + SDValue Op = N->getOperand(1); + EVT VT = N->getValueType(0); + EVT OpVT = Op->getValueType(0); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + EVT IVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits()); + + // Round promoted float to desired precision + SDValue Round = DAG.getNode(GetPromotionOpcodeStrict(OpVT, VT), DL, + DAG.getVTList(IVT, MVT::Other), Chain, Op); + // Promote it back to the legal output type + SDValue Res = + DAG.getNode(GetPromotionOpcodeStrict(VT, NVT), DL, + DAG.getVTList(NVT, MVT::Other), Round.getValue(1), Round); + ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); + return Res; +} + SDValue DAGTypeLegalizer::PromoteFloatRes_LOAD(SDNode *N) { LoadSDNode *L = cast<LoadSDNode>(N); EVT VT = N->getValueType(0); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 3d21bd22e6ef..814f746f5a4d 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -165,7 +165,9 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) { case ISD::FP_TO_FP16: Res = PromoteIntRes_FP_TO_FP16_BF16(N); break; - + case ISD::STRICT_FP_TO_FP16: + Res = PromoteIntRes_STRICT_FP_TO_FP16_BF16(N); + break; case ISD::GET_ROUNDING: Res = PromoteIntRes_GET_ROUNDING(N); break; case ISD::AND: @@ -787,6 +789,16 @@ SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_FP16_BF16(SDNode *N) { return DAG.getNode(N->getOpcode(), dl, NVT, N->getOperand(0)); } +SDValue DAGTypeLegalizer::PromoteIntRes_STRICT_FP_TO_FP16_BF16(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDLoc dl(N); + + SDValue Res = DAG.getNode(N->getOpcode(), dl, DAG.getVTList(NVT, MVT::Other), + N->getOperand(0), N->getOperand(1)); + ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); + return Res; +} + SDValue DAGTypeLegalizer::PromoteIntRes_XRINT(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDLoc dl(N); @@ -1804,6 +1816,7 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) { case ISD::FP16_TO_FP: case ISD::VP_UINT_TO_FP: case ISD::UINT_TO_FP: Res = PromoteIntOp_UINT_TO_FP(N); break; + case ISD::STRICT_FP16_TO_FP: case ISD::STRICT_UINT_TO_FP: Res = PromoteIntOp_STRICT_UINT_TO_FP(N); break; case ISD::ZERO_EXTEND: Res = PromoteIntOp_ZERO_EXTEND(N); break; case ISD::VP_ZERO_EXTEND: Res = PromoteIntOp_VP_ZERO_EXTEND(N); break; @@ -5557,7 +5570,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_EXTRACT_SUBVECTOR(SDNode *N) { getTypeAction(InVT) == TargetLowering::TypeLegal) { EVT NInVT = InVT.getHalfNumVectorElementsVT(*DAG.getContext()); unsigned NElts = NInVT.getVectorMinNumElements(); - uint64_t IdxVal = cast<ConstantSDNode>(BaseIdx)->getZExtValue(); + uint64_t IdxVal = BaseIdx->getAsZExtVal(); SDValue Step1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NInVT, InOp0, DAG.getConstant(alignDown(IdxVal, NElts), dl, diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 84b1b2c71fd0..09f0bca8b861 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -326,6 +326,7 @@ private: SDValue PromoteIntRes_FP_TO_XINT(SDNode *N); SDValue PromoteIntRes_FP_TO_XINT_SAT(SDNode *N); SDValue PromoteIntRes_FP_TO_FP16_BF16(SDNode *N); + SDValue PromoteIntRes_STRICT_FP_TO_FP16_BF16(SDNode *N); SDValue PromoteIntRes_XRINT(SDNode *N); SDValue PromoteIntRes_FREEZE(SDNode *N); SDValue PromoteIntRes_INT_EXTEND(SDNode *N); @@ -699,6 +700,7 @@ private: SDValue PromoteFloatRes_ExpOp(SDNode *N); SDValue PromoteFloatRes_FFREXP(SDNode *N); SDValue PromoteFloatRes_FP_ROUND(SDNode *N); + SDValue PromoteFloatRes_STRICT_FP_ROUND(SDNode *N); SDValue PromoteFloatRes_LOAD(SDNode *N); SDValue PromoteFloatRes_SELECT(SDNode *N); SDValue PromoteFloatRes_SELECT_CC(SDNode *N); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 66461b26468f..ec74d2940099 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -1442,7 +1442,7 @@ void DAGTypeLegalizer::SplitVecRes_EXTRACT_SUBVECTOR(SDNode *N, SDValue &Lo, std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, LoVT, Vec, Idx); - uint64_t IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue(); + uint64_t IdxVal = Idx->getAsZExtVal(); Hi = DAG.getNode( ISD::EXTRACT_SUBVECTOR, dl, HiVT, Vec, DAG.getVectorIdxConstant(IdxVal + LoVT.getVectorMinNumElements(), dl)); @@ -1466,7 +1466,7 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_SUBVECTOR(SDNode *N, SDValue &Lo, // If we know the index is in the first half, and we know the subvector // doesn't cross the boundary between the halves, we can avoid spilling the // vector, and insert into the lower half of the split vector directly. - unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue(); + unsigned IdxVal = Idx->getAsZExtVal(); if (IdxVal + SubElems <= LoElems) { Lo = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, LoVT, Lo, SubVec, Idx); return; @@ -3279,7 +3279,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_INSERT_SUBVECTOR(SDNode *N, SDValue Lo, Hi; GetSplitVector(SubVec, Lo, Hi); - uint64_t IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue(); + uint64_t IdxVal = Idx->getAsZExtVal(); uint64_t LoElts = Lo.getValueType().getVectorMinNumElements(); SDValue FirstInsertion = @@ -3301,7 +3301,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_SUBVECTOR(SDNode *N) { GetSplitVector(N->getOperand(0), Lo, Hi); uint64_t LoEltsMin = Lo.getValueType().getVectorMinNumElements(); - uint64_t IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue(); + uint64_t IdxVal = Idx->getAsZExtVal(); if (IdxVal < LoEltsMin) { assert(IdxVal + SubVT.getVectorMinNumElements() <= LoEltsMin && @@ -5257,7 +5257,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) { EVT InVT = InOp.getValueType(); // Check if we can just return the input vector after widening. - uint64_t IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue(); + uint64_t IdxVal = Idx->getAsZExtVal(); if (IdxVal == 0 && InVT == WidenVT) return InOp; diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 0e17bba2398e..b39be64c06f9 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -5022,7 +5022,6 @@ bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts, case ISD::CONCAT_VECTORS: case ISD::INSERT_SUBVECTOR: case ISD::AND: - case ISD::OR: case ISD::XOR: case ISD::ROTL: case ISD::ROTR: @@ -5062,6 +5061,10 @@ bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts, return ConsiderFlags && (Op->getFlags().hasNoSignedWrap() || Op->getFlags().hasNoUnsignedWrap()); + // Matches hasPoisonGeneratingFlags(). + case ISD::OR: + return ConsiderFlags && Op->getFlags().hasDisjoint(); + case ISD::INSERT_VECTOR_ELT:{ // Ensure that the element index is in bounds. EVT VecVT = Op.getOperand(0).getValueType(); @@ -5085,7 +5088,8 @@ bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts, bool SelectionDAG::isADDLike(SDValue Op) const { unsigned Opcode = Op.getOpcode(); if (Opcode == ISD::OR) - return haveNoCommonBitsSet(Op.getOperand(0), Op.getOperand(1)); + return Op->getFlags().hasDisjoint() || + haveNoCommonBitsSet(Op.getOperand(0), Op.getOperand(1)); if (Opcode == ISD::XOR) return isMinSignedConstant(Op.getOperand(1)); return false; @@ -7193,8 +7197,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, assert(isa<ConstantSDNode>(N3) && "Insert subvector index must be constant"); assert((VT.isScalableVector() != N2VT.isScalableVector() || - (N2VT.getVectorMinNumElements() + - cast<ConstantSDNode>(N3)->getZExtValue()) <= + (N2VT.getVectorMinNumElements() + N3->getAsZExtVal()) <= VT.getVectorMinNumElements()) && "Insert subvector overflow!"); assert(cast<ConstantSDNode>(N3)->getAPIntValue().getBitWidth() == @@ -9982,8 +9985,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList, Ops[1].getValueType().isFloatingPoint() && VTList.VTs[0].bitsLT(Ops[1].getValueType()) && isa<ConstantSDNode>(Ops[2]) && - (cast<ConstantSDNode>(Ops[2])->getZExtValue() == 0 || - cast<ConstantSDNode>(Ops[2])->getZExtValue() == 1) && + (Ops[2]->getAsZExtVal() == 0 || Ops[2]->getAsZExtVal() == 1) && "Invalid STRICT_FP_ROUND!"); break; #if 0 diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp index 39a1e09e83c5..66825d845c19 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp @@ -38,15 +38,18 @@ bool BaseIndexOffset::equalBaseIndex(const BaseIndexOffset &Other, return true; // Match GlobalAddresses - if (auto *A = dyn_cast<GlobalAddressSDNode>(Base)) + if (auto *A = dyn_cast<GlobalAddressSDNode>(Base)) { if (auto *B = dyn_cast<GlobalAddressSDNode>(Other.Base)) if (A->getGlobal() == B->getGlobal()) { Off += B->getOffset() - A->getOffset(); return true; } + return false; + } + // Match Constants - if (auto *A = dyn_cast<ConstantPoolSDNode>(Base)) + if (auto *A = dyn_cast<ConstantPoolSDNode>(Base)) { if (auto *B = dyn_cast<ConstantPoolSDNode>(Other.Base)) { bool IsMatch = A->isMachineConstantPoolEntry() == B->isMachineConstantPoolEntry(); @@ -62,7 +65,8 @@ bool BaseIndexOffset::equalBaseIndex(const BaseIndexOffset &Other, } } - const MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); + return false; + } // Match FrameIndexes. if (auto *A = dyn_cast<FrameIndexSDNode>(Base)) @@ -73,6 +77,7 @@ bool BaseIndexOffset::equalBaseIndex(const BaseIndexOffset &Other, // Non-equal FrameIndexes - If both frame indices are fixed // we know their relative offsets and can compare them. Otherwise // we must be conservative. + const MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); if (MFI.isFixedObjectIndex(A->getIndex()) && MFI.isFixedObjectIndex(B->getIndex())) { Off += MFI.getObjectOffset(B->getIndex()) - @@ -81,6 +86,7 @@ bool BaseIndexOffset::equalBaseIndex(const BaseIndexOffset &Other, } } } + return false; } @@ -91,10 +97,13 @@ bool BaseIndexOffset::computeAliasing(const SDNode *Op0, const SelectionDAG &DAG, bool &IsAlias) { BaseIndexOffset BasePtr0 = match(Op0, DAG); - BaseIndexOffset BasePtr1 = match(Op1, DAG); + if (!BasePtr0.getBase().getNode()) + return false; - if (!(BasePtr0.getBase().getNode() && BasePtr1.getBase().getNode())) + BaseIndexOffset BasePtr1 = match(Op1, DAG); + if (!BasePtr1.getBase().getNode()) return false; + int64_t PtrDiff; if (NumBytes0 && NumBytes1 && BasePtr0.equalBaseIndex(BasePtr1, DAG, PtrDiff)) { diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 3c4b285cb067..2c477b947430 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -3354,6 +3354,8 @@ void SelectionDAGBuilder::visitBinary(const User &I, unsigned Opcode) { } if (auto *ExactOp = dyn_cast<PossiblyExactOperator>(&I)) Flags.setExact(ExactOp->isExact()); + if (auto *DisjointOp = dyn_cast<PossiblyDisjointInst>(&I)) + Flags.setDisjoint(DisjointOp->isDisjoint()); if (auto *FPOp = dyn_cast<FPMathOperator>(&I)) Flags.copyFMF(*FPOp); @@ -4112,7 +4114,7 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { unsigned IdxSize = DAG.getDataLayout().getIndexSizeInBits(AS); MVT IdxTy = MVT::getIntegerVT(IdxSize); TypeSize ElementSize = - DAG.getDataLayout().getTypeAllocSize(GTI.getIndexedType()); + GTI.getSequentialElementStride(DAG.getDataLayout()); // We intentionally mask away the high bits here; ElementSize may not // fit in IdxTy. APInt ElementMul(IdxSize, ElementSize.getKnownMinValue()); @@ -5642,7 +5644,7 @@ static SDValue expandDivFix(unsigned Opcode, const SDLoc &DL, // expansion/promotion) if it was possible to expand a libcall of an // illegal type during operation legalization. But it's not, so things // get a bit hacky. - unsigned ScaleInt = cast<ConstantSDNode>(Scale)->getZExtValue(); + unsigned ScaleInt = Scale->getAsZExtVal(); if ((ScaleInt > 0 || (Saturating && Signed)) && (TLI.isTypeLegal(VT) || (VT.isVector() && TLI.isTypeLegal(VT.getVectorElementType())))) { @@ -7655,8 +7657,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, // suitable for the target. Convert the index as required. MVT VectorIdxTy = TLI.getVectorIdxTy(DAG.getDataLayout()); if (Index.getValueType() != VectorIdxTy) - Index = DAG.getVectorIdxConstant( - cast<ConstantSDNode>(Index)->getZExtValue(), sdl); + Index = DAG.getVectorIdxConstant(Index->getAsZExtVal(), sdl); EVT ResultVT = TLI.getValueType(DAG.getDataLayout(), I.getType()); setValue(&I, DAG.getNode(ISD::INSERT_SUBVECTOR, sdl, ResultVT, Vec, SubVec, @@ -7672,8 +7673,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, // suitable for the target. Convert the index as required. MVT VectorIdxTy = TLI.getVectorIdxTy(DAG.getDataLayout()); if (Index.getValueType() != VectorIdxTy) - Index = DAG.getVectorIdxConstant( - cast<ConstantSDNode>(Index)->getZExtValue(), sdl); + Index = DAG.getVectorIdxConstant(Index->getAsZExtVal(), sdl); setValue(&I, DAG.getNode(ISD::EXTRACT_SUBVECTOR, sdl, ResultVT, Vec, Index)); @@ -8136,7 +8136,7 @@ void SelectionDAGBuilder::visitVectorPredicationIntrinsic( case ISD::VP_IS_FPCLASS: { const DataLayout DLayout = DAG.getDataLayout(); EVT DestVT = TLI.getValueType(DLayout, VPIntrin.getType()); - auto Constant = cast<ConstantSDNode>(OpValues[1])->getZExtValue(); + auto Constant = OpValues[1]->getAsZExtVal(); SDValue Check = DAG.getTargetConstant(Constant, DL, MVT::i32); SDValue V = DAG.getNode(ISD::VP_IS_FPCLASS, DL, DestVT, {OpValues[0], Check, OpValues[2], OpValues[3]}); @@ -9173,8 +9173,7 @@ findMatchingInlineAsmOperand(unsigned OperandNo, unsigned CurOp = InlineAsm::Op_FirstOperand; for (; OperandNo; --OperandNo) { // Advance to the next operand. - unsigned OpFlag = - cast<ConstantSDNode>(AsmNodeOperands[CurOp])->getZExtValue(); + unsigned OpFlag = AsmNodeOperands[CurOp]->getAsZExtVal(); const InlineAsm::Flag F(OpFlag); assert( (F.isRegDefKind() || F.isRegDefEarlyClobberKind() || F.isMemKind()) && @@ -9480,8 +9479,7 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call, // just use its register. auto CurOp = findMatchingInlineAsmOperand(OpInfo.getMatchedOperand(), AsmNodeOperands); - InlineAsm::Flag Flag( - cast<ConstantSDNode>(AsmNodeOperands[CurOp])->getZExtValue()); + InlineAsm::Flag Flag(AsmNodeOperands[CurOp]->getAsZExtVal()); if (Flag.isRegDefKind() || Flag.isRegDefEarlyClobberKind()) { if (OpInfo.isIndirect) { // This happens on gcc/testsuite/gcc.dg/pr8788-1.c @@ -9985,14 +9983,14 @@ void SelectionDAGBuilder::visitStackmap(const CallInst &CI) { // constant nodes. SDValue ID = getValue(CI.getArgOperand(0)); assert(ID.getValueType() == MVT::i64); - SDValue IDConst = DAG.getTargetConstant( - cast<ConstantSDNode>(ID)->getZExtValue(), DL, ID.getValueType()); + SDValue IDConst = + DAG.getTargetConstant(ID->getAsZExtVal(), DL, ID.getValueType()); Ops.push_back(IDConst); SDValue Shad = getValue(CI.getArgOperand(1)); assert(Shad.getValueType() == MVT::i32); - SDValue ShadConst = DAG.getTargetConstant( - cast<ConstantSDNode>(Shad)->getZExtValue(), DL, Shad.getValueType()); + SDValue ShadConst = + DAG.getTargetConstant(Shad->getAsZExtVal(), DL, Shad.getValueType()); Ops.push_back(ShadConst); // Add the live variables. @@ -10041,7 +10039,7 @@ void SelectionDAGBuilder::visitPatchpoint(const CallBase &CB, // Get the real number of arguments participating in the call <numArgs> SDValue NArgVal = getValue(CB.getArgOperand(PatchPointOpers::NArgPos)); - unsigned NumArgs = cast<ConstantSDNode>(NArgVal)->getZExtValue(); + unsigned NumArgs = NArgVal->getAsZExtVal(); // Skip the four meta args: <id>, <numNopBytes>, <target>, <numArgs> // Intrinsics include all meta-operands up to but not including CC. @@ -10088,12 +10086,9 @@ void SelectionDAGBuilder::visitPatchpoint(const CallBase &CB, // Add the <id> and <numBytes> constants. SDValue IDVal = getValue(CB.getArgOperand(PatchPointOpers::IDPos)); - Ops.push_back(DAG.getTargetConstant( - cast<ConstantSDNode>(IDVal)->getZExtValue(), dl, MVT::i64)); + Ops.push_back(DAG.getTargetConstant(IDVal->getAsZExtVal(), dl, MVT::i64)); SDValue NBytesVal = getValue(CB.getArgOperand(PatchPointOpers::NBytesPos)); - Ops.push_back(DAG.getTargetConstant( - cast<ConstantSDNode>(NBytesVal)->getZExtValue(), dl, - MVT::i32)); + Ops.push_back(DAG.getTargetConstant(NBytesVal->getAsZExtVal(), dl, MVT::i32)); // Add the callee. Ops.push_back(Callee); @@ -11637,92 +11632,16 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond, } } -unsigned SelectionDAGBuilder::caseClusterRank(const CaseCluster &CC, - CaseClusterIt First, - CaseClusterIt Last) { - return std::count_if(First, Last + 1, [&](const CaseCluster &X) { - if (X.Prob != CC.Prob) - return X.Prob > CC.Prob; - - // Ties are broken by comparing the case value. - return X.Low->getValue().slt(CC.Low->getValue()); - }); -} - void SelectionDAGBuilder::splitWorkItem(SwitchWorkList &WorkList, const SwitchWorkListItem &W, Value *Cond, MachineBasicBlock *SwitchMBB) { assert(W.FirstCluster->Low->getValue().slt(W.LastCluster->Low->getValue()) && "Clusters not sorted?"); - assert(W.LastCluster - W.FirstCluster + 1 >= 2 && "Too small to split!"); - // Balance the tree based on branch probabilities to create a near-optimal (in - // terms of search time given key frequency) binary search tree. See e.g. Kurt - // Mehlhorn "Nearly Optimal Binary Search Trees" (1975). - CaseClusterIt LastLeft = W.FirstCluster; - CaseClusterIt FirstRight = W.LastCluster; - auto LeftProb = LastLeft->Prob + W.DefaultProb / 2; - auto RightProb = FirstRight->Prob + W.DefaultProb / 2; - - // Move LastLeft and FirstRight towards each other from opposite directions to - // find a partitioning of the clusters which balances the probability on both - // sides. If LeftProb and RightProb are equal, alternate which side is - // taken to ensure 0-probability nodes are distributed evenly. - unsigned I = 0; - while (LastLeft + 1 < FirstRight) { - if (LeftProb < RightProb || (LeftProb == RightProb && (I & 1))) - LeftProb += (++LastLeft)->Prob; - else - RightProb += (--FirstRight)->Prob; - I++; - } - - while (true) { - // Our binary search tree differs from a typical BST in that ours can have up - // to three values in each leaf. The pivot selection above doesn't take that - // into account, which means the tree might require more nodes and be less - // efficient. We compensate for this here. - - unsigned NumLeft = LastLeft - W.FirstCluster + 1; - unsigned NumRight = W.LastCluster - FirstRight + 1; - - if (std::min(NumLeft, NumRight) < 3 && std::max(NumLeft, NumRight) > 3) { - // If one side has less than 3 clusters, and the other has more than 3, - // consider taking a cluster from the other side. - - if (NumLeft < NumRight) { - // Consider moving the first cluster on the right to the left side. - CaseCluster &CC = *FirstRight; - unsigned RightSideRank = caseClusterRank(CC, FirstRight, W.LastCluster); - unsigned LeftSideRank = caseClusterRank(CC, W.FirstCluster, LastLeft); - if (LeftSideRank <= RightSideRank) { - // Moving the cluster to the left does not demote it. - ++LastLeft; - ++FirstRight; - continue; - } - } else { - assert(NumRight < NumLeft); - // Consider moving the last element on the left to the right side. - CaseCluster &CC = *LastLeft; - unsigned LeftSideRank = caseClusterRank(CC, W.FirstCluster, LastLeft); - unsigned RightSideRank = caseClusterRank(CC, FirstRight, W.LastCluster); - if (RightSideRank <= LeftSideRank) { - // Moving the cluster to the right does not demot it. - --LastLeft; - --FirstRight; - continue; - } - } - } - break; - } - - assert(LastLeft + 1 == FirstRight); - assert(LastLeft >= W.FirstCluster); - assert(FirstRight <= W.LastCluster); + auto [LastLeft, FirstRight, LeftProb, RightProb] = + SL->computeSplitWorkItemInfo(W); // Use the first element on the right as pivot since we will make less-than // comparisons against it. diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index 2e102c002c09..6dcb8c816ad0 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -200,12 +200,6 @@ private: /// create. unsigned SDNodeOrder; - /// Determine the rank by weight of CC in [First,Last]. If CC has more weight - /// than each cluster in the range, its rank is 0. - unsigned caseClusterRank(const SwitchCG::CaseCluster &CC, - SwitchCG::CaseClusterIt First, - SwitchCG::CaseClusterIt Last); - /// Emit comparison and split W into two subtrees. void splitWorkItem(SwitchCG::SwitchWorkList &WorkList, const SwitchCG::SwitchWorkListItem &W, Value *Cond, diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp index 78cc60084068..9ebef642e423 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -149,7 +149,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::INTRINSIC_VOID: case ISD::INTRINSIC_W_CHAIN: { unsigned OpNo = getOpcode() == ISD::INTRINSIC_WO_CHAIN ? 0 : 1; - unsigned IID = cast<ConstantSDNode>(getOperand(OpNo))->getZExtValue(); + unsigned IID = getOperand(OpNo)->getAsZExtVal(); if (IID < Intrinsic::num_intrinsics) return Intrinsic::getBaseName((Intrinsic::ID)IID).str(); if (!G) @@ -597,6 +597,9 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const { if (getFlags().hasExact()) OS << " exact"; + if (getFlags().hasDisjoint()) + OS << " disjoint"; + if (getFlags().hasNonNeg()) OS << " nneg"; diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index f28211ac113c..9acfc76d7d5e 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -1614,6 +1614,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { if (isFoldedOrDeadInstruction(Inst, *FuncInfo) || ElidedArgCopyInstrs.count(Inst)) { --NumFastIselRemaining; + FastIS->handleDbgInfo(Inst); continue; } @@ -1625,6 +1626,8 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { if (FastIS->selectInstruction(Inst)) { --NumFastIselRemaining; ++NumFastIselSuccess; + + FastIS->handleDbgInfo(Inst); // If fast isel succeeded, skip over all the folded instructions, and // then see if there is a load right before the selected instructions. // Try to fold the load if so. @@ -1640,6 +1643,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { // If we succeeded, don't re-select the load. LLVM_DEBUG(dbgs() << "FastISel folded load: " << *BeforeInst << "\n"); + FastIS->handleDbgInfo(BeforeInst); BI = std::next(BasicBlock::const_iterator(BeforeInst)); --NumFastIselRemaining; ++NumFastIselSuccess; @@ -2121,7 +2125,7 @@ void SelectionDAGISel::SelectInlineAsmMemoryOperands(std::vector<SDValue> &Ops, --e; // Don't process a glue operand if it is here. while (i != e) { - InlineAsm::Flag Flags(cast<ConstantSDNode>(InOps[i])->getZExtValue()); + InlineAsm::Flag Flags(InOps[i]->getAsZExtVal()); if (!Flags.isMemKind() && !Flags.isFuncKind()) { // Just skip over this operand, copying the operands verbatim. Ops.insert(Ops.end(), InOps.begin() + i, @@ -2135,12 +2139,10 @@ void SelectionDAGISel::SelectInlineAsmMemoryOperands(std::vector<SDValue> &Ops, if (Flags.isUseOperandTiedToDef(TiedToOperand)) { // We need the constraint ID from the operand this is tied to. unsigned CurOp = InlineAsm::Op_FirstOperand; - Flags = - InlineAsm::Flag(cast<ConstantSDNode>(InOps[CurOp])->getZExtValue()); + Flags = InlineAsm::Flag(InOps[CurOp]->getAsZExtVal()); for (; TiedToOperand; --TiedToOperand) { CurOp += Flags.getNumOperandRegisters() + 1; - Flags = InlineAsm::Flag( - cast<ConstantSDNode>(InOps[CurOp])->getZExtValue()); + Flags = InlineAsm::Flag(InOps[CurOp]->getAsZExtVal()); } } @@ -2380,9 +2382,8 @@ void SelectionDAGISel::pushStackMapLiveVariable(SmallVectorImpl<SDValue> &Ops, if (OpNode->getOpcode() == ISD::Constant) { Ops.push_back( CurDAG->getTargetConstant(StackMaps::ConstantOp, DL, MVT::i64)); - Ops.push_back( - CurDAG->getTargetConstant(cast<ConstantSDNode>(OpNode)->getZExtValue(), - DL, OpVal.getValueType())); + Ops.push_back(CurDAG->getTargetConstant(OpNode->getAsZExtVal(), DL, + OpVal.getValueType())); } else { Ops.push_back(OpVal); } @@ -2452,7 +2453,7 @@ void SelectionDAGISel::Select_PATCHPOINT(SDNode *N) { Ops.push_back(*It++); // Push the args for the call. - for (uint64_t I = cast<ConstantSDNode>(NumArgs)->getZExtValue(); I != 0; I--) + for (uint64_t I = NumArgs->getAsZExtVal(); I != 0; I--) Ops.push_back(*It++); // Now push the live variables. diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index c5977546828f..e3e3e375d6a6 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -1064,10 +1064,9 @@ static SDValue combineShiftToAVG(SDValue Op, SelectionDAG &DAG, SDLoc DL(Op); SDValue ResultAVG = - DAG.getNode(AVGOpc, DL, NVT, DAG.getNode(ISD::TRUNCATE, DL, NVT, ExtOpA), - DAG.getNode(ISD::TRUNCATE, DL, NVT, ExtOpB)); - return DAG.getNode(IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, DL, VT, - ResultAVG); + DAG.getNode(AVGOpc, DL, NVT, DAG.getExtOrTrunc(IsSigned, ExtOpA, DL, NVT), + DAG.getExtOrTrunc(IsSigned, ExtOpB, DL, NVT)); + return DAG.getExtOrTrunc(IsSigned, ResultAVG, DL, VT); } /// Look at Op. At this point, we know that only the OriginalDemandedBits of the @@ -1468,14 +1467,24 @@ bool TargetLowering::SimplifyDemandedBits( case ISD::OR: { SDValue Op0 = Op.getOperand(0); SDValue Op1 = Op.getOperand(1); - + SDNodeFlags Flags = Op.getNode()->getFlags(); if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO, - Depth + 1)) + Depth + 1)) { + if (Flags.hasDisjoint()) { + Flags.setDisjoint(false); + Op->setFlags(Flags); + } return true; + } assert(!Known.hasConflict() && "Bits known to be one AND zero?"); if (SimplifyDemandedBits(Op0, ~Known.One & DemandedBits, DemandedElts, - Known2, TLO, Depth + 1)) + Known2, TLO, Depth + 1)) { + if (Flags.hasDisjoint()) { + Flags.setDisjoint(false); + Op->setFlags(Flags); + } return true; + } assert(!Known2.hasConflict() && "Bits known to be one AND zero?"); // If all of the demanded bits are known zero on one side, return the other. @@ -1636,11 +1645,11 @@ bool TargetLowering::SimplifyDemandedBits( break; } case ISD::SELECT: - if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, Known, TLO, - Depth + 1)) + if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, DemandedElts, + Known, TLO, Depth + 1)) return true; - if (SimplifyDemandedBits(Op.getOperand(1), DemandedBits, Known2, TLO, - Depth + 1)) + if (SimplifyDemandedBits(Op.getOperand(1), DemandedBits, DemandedElts, + Known2, TLO, Depth + 1)) return true; assert(!Known.hasConflict() && "Bits known to be one AND zero?"); assert(!Known2.hasConflict() && "Bits known to be one AND zero?"); @@ -1666,11 +1675,11 @@ bool TargetLowering::SimplifyDemandedBits( Known = Known.intersectWith(Known2); break; case ISD::SELECT_CC: - if (SimplifyDemandedBits(Op.getOperand(3), DemandedBits, Known, TLO, - Depth + 1)) + if (SimplifyDemandedBits(Op.getOperand(3), DemandedBits, DemandedElts, + Known, TLO, Depth + 1)) return true; - if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, Known2, TLO, - Depth + 1)) + if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, DemandedElts, + Known2, TLO, Depth + 1)) return true; assert(!Known.hasConflict() && "Bits known to be one AND zero?"); assert(!Known2.hasConflict() && "Bits known to be one AND zero?"); @@ -2435,6 +2444,13 @@ bool TargetLowering::SimplifyDemandedBits( unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1; bool IsVecInReg = Op.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG; + APInt InDemandedElts = DemandedElts.zext(InElts); + APInt InDemandedBits = DemandedBits.trunc(InBits); + + // Since some of the sign extended bits are demanded, we know that the sign + // bit is demanded. + InDemandedBits.setBit(InBits - 1); + // If none of the top bits are demanded, convert this into an any_extend. if (DemandedBits.getActiveBits() <= InBits) { // If we only need the non-extended bits of the bottom element @@ -2443,19 +2459,17 @@ bool TargetLowering::SimplifyDemandedBits( VT.getSizeInBits() == SrcVT.getSizeInBits()) return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src)); - unsigned Opc = - IsVecInReg ? ISD::ANY_EXTEND_VECTOR_INREG : ISD::ANY_EXTEND; - if (!TLO.LegalOperations() || isOperationLegal(Opc, VT)) - return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src)); + // Don't lose an all signbits 0/-1 splat on targets with 0/-1 booleans. + if (getBooleanContents(VT) != ZeroOrNegativeOneBooleanContent || + TLO.DAG.ComputeNumSignBits(Src, InDemandedElts, Depth + 1) != + InBits) { + unsigned Opc = + IsVecInReg ? ISD::ANY_EXTEND_VECTOR_INREG : ISD::ANY_EXTEND; + if (!TLO.LegalOperations() || isOperationLegal(Opc, VT)) + return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src)); + } } - APInt InDemandedBits = DemandedBits.trunc(InBits); - APInt InDemandedElts = DemandedElts.zext(InElts); - - // Since some of the sign extended bits are demanded, we know that the sign - // bit is demanded. - InDemandedBits.setBit(InBits - 1); - if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO, Depth + 1)) return true; @@ -3344,8 +3358,8 @@ bool TargetLowering::SimplifyDemandedVectorElts( // Try to transform the select condition based on the current demanded // elements. - APInt UndefSel, UndefZero; - if (SimplifyDemandedVectorElts(Sel, DemandedElts, UndefSel, UndefZero, TLO, + APInt UndefSel, ZeroSel; + if (SimplifyDemandedVectorElts(Sel, DemandedElts, UndefSel, ZeroSel, TLO, Depth + 1)) return true; @@ -3368,7 +3382,7 @@ bool TargetLowering::SimplifyDemandedVectorElts( // select value element. APInt DemandedSel = DemandedElts & ~KnownZero; if (DemandedSel != DemandedElts) - if (SimplifyDemandedVectorElts(Sel, DemandedSel, UndefSel, UndefZero, TLO, + if (SimplifyDemandedVectorElts(Sel, DemandedSel, UndefSel, ZeroSel, TLO, Depth + 1)) return true; diff --git a/llvm/lib/CodeGen/ShadowStackGCLowering.cpp b/llvm/lib/CodeGen/ShadowStackGCLowering.cpp index d4840d117110..232e5e2bb886 100644 --- a/llvm/lib/CodeGen/ShadowStackGCLowering.cpp +++ b/llvm/lib/CodeGen/ShadowStackGCLowering.cpp @@ -15,9 +15,11 @@ // //===----------------------------------------------------------------------===// +#include "llvm/CodeGen/ShadowStackGCLowering.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Analysis/DomTreeUpdater.h" +#include "llvm/CodeGen/GCMetadata.h" #include "llvm/CodeGen/Passes.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constant.h" @@ -50,7 +52,7 @@ using namespace llvm; namespace { -class ShadowStackGCLowering : public FunctionPass { +class ShadowStackGCLoweringImpl { /// RootChain - This is the global linked-list that contains the chain of GC /// roots. GlobalVariable *Head = nullptr; @@ -64,13 +66,10 @@ class ShadowStackGCLowering : public FunctionPass { std::vector<std::pair<CallInst *, AllocaInst *>> Roots; public: - static char ID; - - ShadowStackGCLowering(); + ShadowStackGCLoweringImpl() = default; - bool doInitialization(Module &M) override; - void getAnalysisUsage(AnalysisUsage &AU) const override; - bool runOnFunction(Function &F) override; + bool doInitialization(Module &M); + bool runOnFunction(Function &F, DomTreeUpdater *DTU); private: bool IsNullValue(Value *V); @@ -86,8 +85,51 @@ private: const char *Name); }; +class ShadowStackGCLowering : public FunctionPass { + ShadowStackGCLoweringImpl Impl; + +public: + static char ID; + + ShadowStackGCLowering(); + + bool doInitialization(Module &M) override { return Impl.doInitialization(M); } + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addPreserved<DominatorTreeWrapperPass>(); + } + bool runOnFunction(Function &F) override { + std::optional<DomTreeUpdater> DTU; + if (auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>()) + DTU.emplace(DTWP->getDomTree(), DomTreeUpdater::UpdateStrategy::Lazy); + return Impl.runOnFunction(F, DTU ? &*DTU : nullptr); + } +}; + } // end anonymous namespace +PreservedAnalyses ShadowStackGCLoweringPass::run(Module &M, + ModuleAnalysisManager &MAM) { + auto &Map = MAM.getResult<CollectorMetadataAnalysis>(M); + if (Map.StrategyMap.contains("shadow-stack")) + return PreservedAnalyses::all(); + + ShadowStackGCLoweringImpl Impl; + bool Changed = Impl.doInitialization(M); + for (auto &F : M) { + auto &FAM = + MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); + auto *DT = FAM.getCachedResult<DominatorTreeAnalysis>(F); + DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy); + Changed |= Impl.runOnFunction(F, DT ? &DTU : nullptr); + } + + if (!Changed) + return PreservedAnalyses::all(); + PreservedAnalyses PA; + PA.preserve<DominatorTreeAnalysis>(); + return PA; +} + char ShadowStackGCLowering::ID = 0; char &llvm::ShadowStackGCLoweringID = ShadowStackGCLowering::ID; @@ -104,7 +146,7 @@ ShadowStackGCLowering::ShadowStackGCLowering() : FunctionPass(ID) { initializeShadowStackGCLoweringPass(*PassRegistry::getPassRegistry()); } -Constant *ShadowStackGCLowering::GetFrameMap(Function &F) { +Constant *ShadowStackGCLoweringImpl::GetFrameMap(Function &F) { // doInitialization creates the abstract type of this value. Type *VoidPtr = PointerType::getUnqual(F.getContext()); @@ -158,7 +200,7 @@ Constant *ShadowStackGCLowering::GetFrameMap(Function &F) { return ConstantExpr::getGetElementPtr(FrameMap->getType(), GV, GEPIndices); } -Type *ShadowStackGCLowering::GetConcreteStackEntryType(Function &F) { +Type *ShadowStackGCLoweringImpl::GetConcreteStackEntryType(Function &F) { // doInitialization creates the generic version of this type. std::vector<Type *> EltTys; EltTys.push_back(StackEntryTy); @@ -170,7 +212,7 @@ Type *ShadowStackGCLowering::GetConcreteStackEntryType(Function &F) { /// doInitialization - If this module uses the GC intrinsics, find them now. If /// not, exit fast. -bool ShadowStackGCLowering::doInitialization(Module &M) { +bool ShadowStackGCLoweringImpl::doInitialization(Module &M) { bool Active = false; for (Function &F : M) { if (F.hasGC() && F.getGC() == "shadow-stack") { @@ -224,13 +266,13 @@ bool ShadowStackGCLowering::doInitialization(Module &M) { return true; } -bool ShadowStackGCLowering::IsNullValue(Value *V) { +bool ShadowStackGCLoweringImpl::IsNullValue(Value *V) { if (Constant *C = dyn_cast<Constant>(V)) return C->isNullValue(); return false; } -void ShadowStackGCLowering::CollectRoots(Function &F) { +void ShadowStackGCLoweringImpl::CollectRoots(Function &F) { // FIXME: Account for original alignment. Could fragment the root array. // Approach 1: Null initialize empty slots at runtime. Yuck. // Approach 2: Emit a map of the array instead of just a count. @@ -258,11 +300,10 @@ void ShadowStackGCLowering::CollectRoots(Function &F) { Roots.insert(Roots.begin(), MetaRoots.begin(), MetaRoots.end()); } -GetElementPtrInst *ShadowStackGCLowering::CreateGEP(LLVMContext &Context, - IRBuilder<> &B, Type *Ty, - Value *BasePtr, int Idx, - int Idx2, - const char *Name) { +GetElementPtrInst * +ShadowStackGCLoweringImpl::CreateGEP(LLVMContext &Context, IRBuilder<> &B, + Type *Ty, Value *BasePtr, int Idx, + int Idx2, const char *Name) { Value *Indices[] = {ConstantInt::get(Type::getInt32Ty(Context), 0), ConstantInt::get(Type::getInt32Ty(Context), Idx), ConstantInt::get(Type::getInt32Ty(Context), Idx2)}; @@ -273,9 +314,11 @@ GetElementPtrInst *ShadowStackGCLowering::CreateGEP(LLVMContext &Context, return dyn_cast<GetElementPtrInst>(Val); } -GetElementPtrInst *ShadowStackGCLowering::CreateGEP(LLVMContext &Context, - IRBuilder<> &B, Type *Ty, Value *BasePtr, - int Idx, const char *Name) { +GetElementPtrInst *ShadowStackGCLoweringImpl::CreateGEP(LLVMContext &Context, + IRBuilder<> &B, + Type *Ty, + Value *BasePtr, int Idx, + const char *Name) { Value *Indices[] = {ConstantInt::get(Type::getInt32Ty(Context), 0), ConstantInt::get(Type::getInt32Ty(Context), Idx)}; Value *Val = B.CreateGEP(Ty, BasePtr, Indices, Name); @@ -285,12 +328,9 @@ GetElementPtrInst *ShadowStackGCLowering::CreateGEP(LLVMContext &Context, return dyn_cast<GetElementPtrInst>(Val); } -void ShadowStackGCLowering::getAnalysisUsage(AnalysisUsage &AU) const { - AU.addPreserved<DominatorTreeWrapperPass>(); -} - /// runOnFunction - Insert code to maintain the shadow stack. -bool ShadowStackGCLowering::runOnFunction(Function &F) { +bool ShadowStackGCLoweringImpl::runOnFunction(Function &F, + DomTreeUpdater *DTU) { // Quick exit for functions that do not use the shadow stack GC. if (!F.hasGC() || F.getGC() != "shadow-stack") return false; @@ -305,10 +345,6 @@ bool ShadowStackGCLowering::runOnFunction(Function &F) { if (Roots.empty()) return false; - std::optional<DomTreeUpdater> DTU; - if (auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>()) - DTU.emplace(DTWP->getDomTree(), DomTreeUpdater::UpdateStrategy::Lazy); - // Build the constant map and figure the type of the shadow stack entry. Value *FrameMap = GetFrameMap(F); Type *ConcreteStackEntryTy = GetConcreteStackEntryType(F); @@ -359,8 +395,7 @@ bool ShadowStackGCLowering::runOnFunction(Function &F) { AtEntry.CreateStore(NewHeadVal, Head); // For each instruction that escapes... - EscapeEnumerator EE(F, "gc_cleanup", /*HandleExceptions=*/true, - DTU ? &*DTU : nullptr); + EscapeEnumerator EE(F, "gc_cleanup", /*HandleExceptions=*/true, DTU); while (IRBuilder<> *AtExit = EE.Next()) { // Pop the entry from the shadow stack. Don't reuse CurrentHead from // AtEntry, since that would make the value live for the entire function. diff --git a/llvm/lib/CodeGen/StackProtector.cpp b/llvm/lib/CodeGen/StackProtector.cpp index 48dc7cb232e3..fca822a485ca 100644 --- a/llvm/lib/CodeGen/StackProtector.cpp +++ b/llvm/lib/CodeGen/StackProtector.cpp @@ -64,6 +64,90 @@ static cl::opt<bool> EnableSelectionDAGSP("enable-selectiondag-sp", static cl::opt<bool> DisableCheckNoReturn("disable-check-noreturn-call", cl::init(false), cl::Hidden); +/// InsertStackProtectors - Insert code into the prologue and epilogue of the +/// function. +/// +/// - The prologue code loads and stores the stack guard onto the stack. +/// - The epilogue checks the value stored in the prologue against the original +/// value. It calls __stack_chk_fail if they differ. +static bool InsertStackProtectors(const TargetMachine *TM, Function *F, + DomTreeUpdater *DTU, bool &HasPrologue, + bool &HasIRCheck); + +/// CreateFailBB - Create a basic block to jump to when the stack protector +/// check fails. +static BasicBlock *CreateFailBB(Function *F, const Triple &Trip); + +bool SSPLayoutInfo::shouldEmitSDCheck(const BasicBlock &BB) const { + return HasPrologue && !HasIRCheck && isa<ReturnInst>(BB.getTerminator()); +} + +void SSPLayoutInfo::copyToMachineFrameInfo(MachineFrameInfo &MFI) const { + if (Layout.empty()) + return; + + for (int I = 0, E = MFI.getObjectIndexEnd(); I != E; ++I) { + if (MFI.isDeadObjectIndex(I)) + continue; + + const AllocaInst *AI = MFI.getObjectAllocation(I); + if (!AI) + continue; + + SSPLayoutMap::const_iterator LI = Layout.find(AI); + if (LI == Layout.end()) + continue; + + MFI.setObjectSSPLayout(I, LI->second); + } +} + +SSPLayoutInfo SSPLayoutAnalysis::run(Function &F, + FunctionAnalysisManager &FAM) { + + SSPLayoutInfo Info; + Info.RequireStackProtector = + SSPLayoutAnalysis::requiresStackProtector(&F, &Info.Layout); + Info.SSPBufferSize = F.getFnAttributeAsParsedInteger( + "stack-protector-buffer-size", SSPLayoutInfo::DefaultSSPBufferSize); + return Info; +} + +AnalysisKey SSPLayoutAnalysis::Key; + +PreservedAnalyses StackProtectorPass::run(Function &F, + FunctionAnalysisManager &FAM) { + auto &Info = FAM.getResult<SSPLayoutAnalysis>(F); + auto *DT = FAM.getCachedResult<DominatorTreeAnalysis>(F); + DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy); + + if (!Info.RequireStackProtector) + return PreservedAnalyses::all(); + + // TODO(etienneb): Functions with funclets are not correctly supported now. + // Do nothing if this is funclet-based personality. + if (F.hasPersonalityFn()) { + EHPersonality Personality = classifyEHPersonality(F.getPersonalityFn()); + if (isFuncletEHPersonality(Personality)) + return PreservedAnalyses::all(); + } + + ++NumFunProtected; + bool Changed = InsertStackProtectors(TM, &F, DT ? &DTU : nullptr, + Info.HasPrologue, Info.HasIRCheck); +#ifdef EXPENSIVE_CHECKS + assert((!DT || DT->verify(DominatorTree::VerificationLevel::Full)) && + "Failed to maintain validity of domtree!"); +#endif + + if (!Changed) + return PreservedAnalyses::all(); + PreservedAnalyses PA; + PA.preserve<SSPLayoutAnalysis>(); + PA.preserve<DominatorTreeAnalysis>(); + return PA; +} + char StackProtector::ID = 0; StackProtector::StackProtector() : FunctionPass(ID) { @@ -90,14 +174,12 @@ bool StackProtector::runOnFunction(Function &Fn) { if (auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>()) DTU.emplace(DTWP->getDomTree(), DomTreeUpdater::UpdateStrategy::Lazy); TM = &getAnalysis<TargetPassConfig>().getTM<TargetMachine>(); - Trip = TM->getTargetTriple(); - TLI = TM->getSubtargetImpl(Fn)->getTargetLowering(); - HasPrologue = false; - HasIRCheck = false; - - SSPBufferSize = Fn.getFnAttributeAsParsedInteger( - "stack-protector-buffer-size", DefaultSSPBufferSize); - if (!requiresStackProtector(F, &Layout)) + LayoutInfo.HasPrologue = false; + LayoutInfo.HasIRCheck = false; + + LayoutInfo.SSPBufferSize = Fn.getFnAttributeAsParsedInteger( + "stack-protector-buffer-size", SSPLayoutInfo::DefaultSSPBufferSize); + if (!requiresStackProtector(F, &LayoutInfo.Layout)) return false; // TODO(etienneb): Functions with funclets are not correctly supported now. @@ -109,7 +191,9 @@ bool StackProtector::runOnFunction(Function &Fn) { } ++NumFunProtected; - bool Changed = InsertStackProtectors(); + bool Changed = + InsertStackProtectors(TM, F, DTU ? &*DTU : nullptr, + LayoutInfo.HasPrologue, LayoutInfo.HasIRCheck); #ifdef EXPENSIVE_CHECKS assert((!DTU || DTU->getDomTree().verify(DominatorTree::VerificationLevel::Full)) && @@ -284,7 +368,8 @@ static const CallInst *findStackProtectorIntrinsic(Function &F) { /// functions with aggregates that contain any buffer regardless of type and /// size, and functions that contain stack-based variables that have had their /// address taken. -bool StackProtector::requiresStackProtector(Function *F, SSPLayoutMap *Layout) { +bool SSPLayoutAnalysis::requiresStackProtector(Function *F, + SSPLayoutMap *Layout) { Module *M = F->getParent(); bool Strong = false; bool NeedsProtector = false; @@ -295,7 +380,7 @@ bool StackProtector::requiresStackProtector(Function *F, SSPLayoutMap *Layout) { SmallPtrSet<const PHINode *, 16> VisitedPHIs; unsigned SSPBufferSize = F->getFnAttributeAsParsedInteger( - "stack-protector-buffer-size", DefaultSSPBufferSize); + "stack-protector-buffer-size", SSPLayoutInfo::DefaultSSPBufferSize); if (F->hasFnAttribute(Attribute::SafeStack)) return false; @@ -460,13 +545,12 @@ static bool CreatePrologue(Function *F, Module *M, Instruction *CheckLoc, return SupportsSelectionDAGSP; } -/// InsertStackProtectors - Insert code into the prologue and epilogue of the -/// function. -/// -/// - The prologue code loads and stores the stack guard onto the stack. -/// - The epilogue checks the value stored in the prologue against the original -/// value. It calls __stack_chk_fail if they differ. -bool StackProtector::InsertStackProtectors() { +bool InsertStackProtectors(const TargetMachine *TM, Function *F, + DomTreeUpdater *DTU, bool &HasPrologue, + bool &HasIRCheck) { + auto *M = F->getParent(); + auto *TLI = TM->getSubtargetImpl(*F)->getTargetLowering(); + // If the target wants to XOR the frame pointer into the guard value, it's // impossible to emit the check in IR, so the target *must* support stack // protection in SDAG. @@ -574,7 +658,7 @@ bool StackProtector::InsertStackProtectors() { // merge pass will merge together all of the various BB into one including // fail BB generated by the stack protector pseudo instruction. if (!FailBB) - FailBB = CreateFailBB(); + FailBB = CreateFailBB(F, TM->getTargetTriple()); IRBuilder<> B(CheckLoc); Value *Guard = getStackGuard(TLI, M, B); @@ -589,8 +673,7 @@ bool StackProtector::InsertStackProtectors() { SuccessProb.getNumerator()); SplitBlockAndInsertIfThen(Cmp, CheckLoc, - /*Unreachable=*/false, Weights, - DTU ? &*DTU : nullptr, + /*Unreachable=*/false, Weights, DTU, /*LI=*/nullptr, /*ThenBlock=*/FailBB); auto *BI = cast<BranchInst>(Cmp->getParent()->getTerminator()); @@ -608,9 +691,8 @@ bool StackProtector::InsertStackProtectors() { return HasPrologue; } -/// CreateFailBB - Create a basic block to jump to when the stack protector -/// check fails. -BasicBlock *StackProtector::CreateFailBB() { +BasicBlock *CreateFailBB(Function *F, const Triple &Trip) { + auto *M = F->getParent(); LLVMContext &Context = F->getContext(); BasicBlock *FailBB = BasicBlock::Create(Context, "CallStackCheckFailBlk", F); IRBuilder<> B(FailBB); @@ -633,27 +715,3 @@ BasicBlock *StackProtector::CreateFailBB() { B.CreateUnreachable(); return FailBB; } - -bool StackProtector::shouldEmitSDCheck(const BasicBlock &BB) const { - return HasPrologue && !HasIRCheck && isa<ReturnInst>(BB.getTerminator()); -} - -void StackProtector::copyToMachineFrameInfo(MachineFrameInfo &MFI) const { - if (Layout.empty()) - return; - - for (int I = 0, E = MFI.getObjectIndexEnd(); I != E; ++I) { - if (MFI.isDeadObjectIndex(I)) - continue; - - const AllocaInst *AI = MFI.getObjectAllocation(I); - if (!AI) - continue; - - SSPLayoutMap::const_iterator LI = Layout.find(AI); - if (LI == Layout.end()) - continue; - - MFI.setObjectSSPLayout(I, LI->second); - } -} diff --git a/llvm/lib/CodeGen/SwitchLoweringUtils.cpp b/llvm/lib/CodeGen/SwitchLoweringUtils.cpp index 7982d80353bd..8922fa589813 100644 --- a/llvm/lib/CodeGen/SwitchLoweringUtils.cpp +++ b/llvm/lib/CodeGen/SwitchLoweringUtils.cpp @@ -494,3 +494,84 @@ void SwitchCG::sortAndRangeify(CaseClusterVector &Clusters) { } Clusters.resize(DstIndex); } + +unsigned SwitchCG::SwitchLowering::caseClusterRank(const CaseCluster &CC, + CaseClusterIt First, + CaseClusterIt Last) { + return std::count_if(First, Last + 1, [&](const CaseCluster &X) { + if (X.Prob != CC.Prob) + return X.Prob > CC.Prob; + + // Ties are broken by comparing the case value. + return X.Low->getValue().slt(CC.Low->getValue()); + }); +} + +llvm::SwitchCG::SwitchLowering::SplitWorkItemInfo +SwitchCG::SwitchLowering::computeSplitWorkItemInfo( + const SwitchWorkListItem &W) { + CaseClusterIt LastLeft = W.FirstCluster; + CaseClusterIt FirstRight = W.LastCluster; + auto LeftProb = LastLeft->Prob + W.DefaultProb / 2; + auto RightProb = FirstRight->Prob + W.DefaultProb / 2; + + // Move LastLeft and FirstRight towards each other from opposite directions to + // find a partitioning of the clusters which balances the probability on both + // sides. If LeftProb and RightProb are equal, alternate which side is + // taken to ensure 0-probability nodes are distributed evenly. + unsigned I = 0; + while (LastLeft + 1 < FirstRight) { + if (LeftProb < RightProb || (LeftProb == RightProb && (I & 1))) + LeftProb += (++LastLeft)->Prob; + else + RightProb += (--FirstRight)->Prob; + I++; + } + + while (true) { + // Our binary search tree differs from a typical BST in that ours can have + // up to three values in each leaf. The pivot selection above doesn't take + // that into account, which means the tree might require more nodes and be + // less efficient. We compensate for this here. + + unsigned NumLeft = LastLeft - W.FirstCluster + 1; + unsigned NumRight = W.LastCluster - FirstRight + 1; + + if (std::min(NumLeft, NumRight) < 3 && std::max(NumLeft, NumRight) > 3) { + // If one side has less than 3 clusters, and the other has more than 3, + // consider taking a cluster from the other side. + + if (NumLeft < NumRight) { + // Consider moving the first cluster on the right to the left side. + CaseCluster &CC = *FirstRight; + unsigned RightSideRank = caseClusterRank(CC, FirstRight, W.LastCluster); + unsigned LeftSideRank = caseClusterRank(CC, W.FirstCluster, LastLeft); + if (LeftSideRank <= RightSideRank) { + // Moving the cluster to the left does not demote it. + ++LastLeft; + ++FirstRight; + continue; + } + } else { + assert(NumRight < NumLeft); + // Consider moving the last element on the left to the right side. + CaseCluster &CC = *LastLeft; + unsigned LeftSideRank = caseClusterRank(CC, W.FirstCluster, LastLeft); + unsigned RightSideRank = caseClusterRank(CC, FirstRight, W.LastCluster); + if (RightSideRank <= LeftSideRank) { + // Moving the cluster to the right does not demot it. + --LastLeft; + --FirstRight; + continue; + } + } + } + break; + } + + assert(LastLeft + 1 == FirstRight); + assert(LastLeft >= W.FirstCluster); + assert(FirstRight <= W.LastCluster); + + return SplitWorkItemInfo{LastLeft, FirstRight, LeftProb, RightProb}; +}
\ No newline at end of file diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp index 2648c16bcd8d..e92e3cd04391 100644 --- a/llvm/lib/CodeGen/TargetLoweringBase.cpp +++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp @@ -520,27 +520,28 @@ RTLIB::Libcall RTLIB::getFREXP(EVT RetVT) { FREXP_PPCF128); } -RTLIB::Libcall RTLIB::getOUTLINE_ATOMIC(unsigned Opc, AtomicOrdering Order, - MVT VT) { +RTLIB::Libcall RTLIB::getOutlineAtomicHelper(const Libcall (&LC)[5][4], + AtomicOrdering Order, + uint64_t MemSize) { unsigned ModeN, ModelN; - switch (VT.SimpleTy) { - case MVT::i8: + switch (MemSize) { + case 1: ModeN = 0; break; - case MVT::i16: + case 2: ModeN = 1; break; - case MVT::i32: + case 4: ModeN = 2; break; - case MVT::i64: + case 8: ModeN = 3; break; - case MVT::i128: + case 16: ModeN = 4; break; default: - return UNKNOWN_LIBCALL; + return RTLIB::UNKNOWN_LIBCALL; } switch (Order) { @@ -561,6 +562,15 @@ RTLIB::Libcall RTLIB::getOUTLINE_ATOMIC(unsigned Opc, AtomicOrdering Order, return UNKNOWN_LIBCALL; } + return LC[ModeN][ModelN]; +} + +RTLIB::Libcall RTLIB::getOUTLINE_ATOMIC(unsigned Opc, AtomicOrdering Order, + MVT VT) { + if (!VT.isScalarInteger()) + return UNKNOWN_LIBCALL; + uint64_t MemSize = VT.getScalarSizeInBits() / 8; + #define LCALLS(A, B) \ { A##B##_RELAX, A##B##_ACQ, A##B##_REL, A##B##_ACQ_REL } #define LCALL5(A) \ @@ -568,27 +578,27 @@ RTLIB::Libcall RTLIB::getOUTLINE_ATOMIC(unsigned Opc, AtomicOrdering Order, switch (Opc) { case ISD::ATOMIC_CMP_SWAP: { const Libcall LC[5][4] = {LCALL5(OUTLINE_ATOMIC_CAS)}; - return LC[ModeN][ModelN]; + return getOutlineAtomicHelper(LC, Order, MemSize); } case ISD::ATOMIC_SWAP: { const Libcall LC[5][4] = {LCALL5(OUTLINE_ATOMIC_SWP)}; - return LC[ModeN][ModelN]; + return getOutlineAtomicHelper(LC, Order, MemSize); } case ISD::ATOMIC_LOAD_ADD: { const Libcall LC[5][4] = {LCALL5(OUTLINE_ATOMIC_LDADD)}; - return LC[ModeN][ModelN]; + return getOutlineAtomicHelper(LC, Order, MemSize); } case ISD::ATOMIC_LOAD_OR: { const Libcall LC[5][4] = {LCALL5(OUTLINE_ATOMIC_LDSET)}; - return LC[ModeN][ModelN]; + return getOutlineAtomicHelper(LC, Order, MemSize); } case ISD::ATOMIC_LOAD_CLR: { const Libcall LC[5][4] = {LCALL5(OUTLINE_ATOMIC_LDCLR)}; - return LC[ModeN][ModelN]; + return getOutlineAtomicHelper(LC, Order, MemSize); } case ISD::ATOMIC_LOAD_XOR: { const Libcall LC[5][4] = {LCALL5(OUTLINE_ATOMIC_LDEOR)}; - return LC[ModeN][ModelN]; + return getOutlineAtomicHelper(LC, Order, MemSize); } default: return UNKNOWN_LIBCALL; diff --git a/llvm/lib/CodeGen/TargetPassConfig.cpp b/llvm/lib/CodeGen/TargetPassConfig.cpp index 4003a08a5422..3bbc792f4cbf 100644 --- a/llvm/lib/CodeGen/TargetPassConfig.cpp +++ b/llvm/lib/CodeGen/TargetPassConfig.cpp @@ -978,7 +978,7 @@ void TargetPassConfig::addPassesToHandleExceptions() { /// before exception handling preparation passes. void TargetPassConfig::addCodeGenPrepare() { if (getOptLevel() != CodeGenOptLevel::None && !DisableCGP) - addPass(createCodeGenPreparePass()); + addPass(createCodeGenPrepareLegacyPass()); } /// Add common passes that perform LLVM IR to IR transforms in preparation for @@ -1271,7 +1271,7 @@ void TargetPassConfig::addMachinePasses() { // together. Update this check once we have addressed any issues. if (TM->getBBSectionsType() != llvm::BasicBlockSection::None) { if (TM->getBBSectionsType() == llvm::BasicBlockSection::List) { - addPass(llvm::createBasicBlockSectionsProfileReaderPass( + addPass(llvm::createBasicBlockSectionsProfileReaderWrapperPass( TM->getBBSectionsFuncListBuf())); addPass(llvm::createBasicBlockPathCloningPass()); } diff --git a/llvm/lib/DWARFLinker/DWARFLinker.cpp b/llvm/lib/DWARFLinker/Classic/DWARFLinker.cpp index 10967123a562..8d76c3bcf672 100644 --- a/llvm/lib/DWARFLinker/DWARFLinker.cpp +++ b/llvm/lib/DWARFLinker/Classic/DWARFLinker.cpp @@ -6,14 +6,14 @@ // //===----------------------------------------------------------------------===// -#include "llvm/DWARFLinker/DWARFLinker.h" +#include "llvm/DWARFLinker/Classic/DWARFLinker.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringExtras.h" #include "llvm/CodeGen/NonRelocatableStringpool.h" -#include "llvm/DWARFLinker/DWARFLinkerDeclContext.h" -#include "llvm/DWARFLinker/DWARFStreamer.h" +#include "llvm/DWARFLinker/Classic/DWARFLinkerDeclContext.h" +#include "llvm/DWARFLinker/Classic/DWARFStreamer.h" #include "llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h" #include "llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h" #include "llvm/DebugInfo/DWARF/DWARFContext.h" @@ -39,6 +39,9 @@ namespace llvm { +using namespace dwarf_linker; +using namespace dwarf_linker::classic; + /// Hold the input and output of the debug info size in bytes. struct DebugInfoSize { uint64_t Input; @@ -137,10 +140,6 @@ static bool isTypeTag(uint16_t Tag) { return false; } -AddressesMap::~AddressesMap() = default; - -DwarfEmitter::~DwarfEmitter() = default; - bool DWARFLinker::DIECloner::getDIENames(const DWARFDie &Die, AttributesInfo &Info, OffsetsStringPool &StringPool, @@ -195,7 +194,7 @@ static SmallString<128> guessToolchainBaseDir(StringRef SysRoot) { /// DW_TAG_module blocks. static void analyzeImportedModule( const DWARFDie &DIE, CompileUnit &CU, - swiftInterfacesMap *ParseableSwiftInterfaces, + DWARFLinkerBase::SwiftInterfacesMapTy *ParseableSwiftInterfaces, std::function<void(const Twine &, const DWARFDie &)> ReportWarning) { if (CU.getLanguage() != dwarf::DW_LANG_Swift) return; @@ -307,7 +306,8 @@ static void updateChildPruning(const DWARFDie &Die, CompileUnit &CU, static void analyzeContextInfo( const DWARFDie &DIE, unsigned ParentIdx, CompileUnit &CU, DeclContext *CurrentDeclContext, DeclContextTree &Contexts, - uint64_t ModulesEndOffset, swiftInterfacesMap *ParseableSwiftInterfaces, + uint64_t ModulesEndOffset, + DWARFLinkerBase::SwiftInterfacesMapTy *ParseableSwiftInterfaces, std::function<void(const Twine &, const DWARFDie &)> ReportWarning) { // LIFO work list. std::vector<ContextWorklistItem> Worklist; @@ -1357,9 +1357,9 @@ unsigned DWARFLinker::DIECloner::cloneAddressAttribute( // independently by the linker). // - If address relocated in an inline_subprogram that happens at the // beginning of its inlining function. - // To avoid above cases and to not apply relocation twice (in applyValidRelocs - // and here), read address attribute from InputDIE and apply Info.PCOffset - // here. + // To avoid above cases and to not apply relocation twice (in + // applyValidRelocs and here), read address attribute from InputDIE and apply + // Info.PCOffset here. std::optional<DWARFFormValue> AddrAttribute = InputDIE.find(AttrSpec.Attr); if (!AddrAttribute) @@ -1411,7 +1411,7 @@ unsigned DWARFLinker::DIECloner::cloneScalarAttribute( // need to remove the attribute. if (AttrSpec.Attr == dwarf::DW_AT_macro_info) { if (std::optional<uint64_t> Offset = Val.getAsSectionOffset()) { - const DWARFDebugMacro *Macro = File.Dwarf->getDebugMacinfo(); + const llvm::DWARFDebugMacro *Macro = File.Dwarf->getDebugMacinfo(); if (Macro == nullptr || !Macro->hasEntryForOffset(*Offset)) return 0; } @@ -1419,7 +1419,7 @@ unsigned DWARFLinker::DIECloner::cloneScalarAttribute( if (AttrSpec.Attr == dwarf::DW_AT_macros) { if (std::optional<uint64_t> Offset = Val.getAsSectionOffset()) { - const DWARFDebugMacro *Macro = File.Dwarf->getDebugMacro(); + const llvm::DWARFDebugMacro *Macro = File.Dwarf->getDebugMacro(); if (Macro == nullptr || !Macro->hasEntryForOffset(*Offset)) return 0; } @@ -2040,8 +2040,7 @@ static void patchAddrBase(DIE &Die, DIEInteger Offset) { } void DWARFLinker::DIECloner::emitDebugAddrSection( - CompileUnit &Unit, - const uint16_t DwarfVersion) const { + CompileUnit &Unit, const uint16_t DwarfVersion) const { if (LLVM_UNLIKELY(Linker.Options.Update)) return; @@ -2407,8 +2406,9 @@ static uint64_t getDwoId(const DWARFDie &CUDie) { return 0; } -static std::string remapPath(StringRef Path, - const objectPrefixMap &ObjectPrefixMap) { +static std::string +remapPath(StringRef Path, + const DWARFLinkerBase::ObjectPrefixMapTy &ObjectPrefixMap) { if (ObjectPrefixMap.empty()) return Path.str(); @@ -2419,8 +2419,9 @@ static std::string remapPath(StringRef Path, return p.str().str(); } -static std::string getPCMFile(const DWARFDie &CUDie, - objectPrefixMap *ObjectPrefixMap) { +static std::string +getPCMFile(const DWARFDie &CUDie, + const DWARFLinkerBase::ObjectPrefixMapTy *ObjectPrefixMap) { std::string PCMFile = dwarf::toString( CUDie.find({dwarf::DW_AT_dwo_name, dwarf::DW_AT_GNU_dwo_name}), ""); @@ -2477,8 +2478,8 @@ std::pair<bool, bool> DWARFLinker::isClangModuleRef(const DWARFDie &CUDie, bool DWARFLinker::registerModuleReference(const DWARFDie &CUDie, LinkContext &Context, - objFileLoader Loader, - CompileUnitHandler OnCUDieLoaded, + ObjFileLoaderTy Loader, + CompileUnitHandlerTy OnCUDieLoaded, unsigned Indent) { std::string PCMFile = getPCMFile(CUDie, Options.ObjectPrefixMap); std::pair<bool, bool> IsClangModuleRef = @@ -2505,11 +2506,9 @@ bool DWARFLinker::registerModuleReference(const DWARFDie &CUDie, return true; } -Error DWARFLinker::loadClangModule(objFileLoader Loader, const DWARFDie &CUDie, - const std::string &PCMFile, - LinkContext &Context, - CompileUnitHandler OnCUDieLoaded, - unsigned Indent) { +Error DWARFLinker::loadClangModule( + ObjFileLoaderTy Loader, const DWARFDie &CUDie, const std::string &PCMFile, + LinkContext &Context, CompileUnitHandlerTy OnCUDieLoaded, unsigned Indent) { uint64_t DwoId = getDwoId(CUDie); std::string ModuleName = dwarf::toString(CUDie.find(dwarf::DW_AT_name), ""); @@ -2673,8 +2672,8 @@ void DWARFLinker::copyInvariantDebugSection(DWARFContext &Dwarf) { Dwarf.getDWARFObj().getLoclistsSection().Data, "debug_loclists"); } -void DWARFLinker::addObjectFile(DWARFFile &File, objFileLoader Loader, - CompileUnitHandler OnCUDieLoaded) { +void DWARFLinker::addObjectFile(DWARFFile &File, ObjFileLoaderTy Loader, + CompileUnitHandlerTy OnCUDieLoaded) { ObjectContexts.emplace_back(LinkContext(File)); if (ObjectContexts.back().File.Dwarf) { @@ -2713,12 +2712,8 @@ Error DWARFLinker::link() { DeclContextTree ODRContexts; for (LinkContext &OptContext : ObjectContexts) { - if (Options.Verbose) { - if (DwarfLinkerClientID == DwarfLinkerClient::Dsymutil) - outs() << "DEBUG MAP OBJECT: " << OptContext.File.FileName << "\n"; - else - outs() << "OBJECT FILE: " << OptContext.File.FileName << "\n"; - } + if (Options.Verbose) + outs() << "DEBUG MAP OBJECT: " << OptContext.File.FileName << "\n"; if (!OptContext.File.Dwarf) continue; @@ -3039,7 +3034,6 @@ Error DWARFLinker::cloneModuleUnit(LinkContext &Context, RefModuleUnit &Unit, void DWARFLinker::verifyInput(const DWARFFile &File) { assert(File.Dwarf); - std::string Buffer; raw_string_ostream OS(Buffer); DIDumpOptions DumpOpts; diff --git a/llvm/lib/DWARFLinker/DWARFLinkerCompileUnit.cpp b/llvm/lib/DWARFLinker/Classic/DWARFLinkerCompileUnit.cpp index 06559bc38c86..1eb3a70a5513 100644 --- a/llvm/lib/DWARFLinker/DWARFLinkerCompileUnit.cpp +++ b/llvm/lib/DWARFLinker/Classic/DWARFLinkerCompileUnit.cpp @@ -6,15 +6,18 @@ // //===----------------------------------------------------------------------===// -#include "llvm/DWARFLinker/DWARFLinkerCompileUnit.h" +#include "llvm/DWARFLinker/Classic/DWARFLinkerCompileUnit.h" #include "llvm/ADT/StringExtras.h" -#include "llvm/DWARFLinker/DWARFLinkerDeclContext.h" +#include "llvm/DWARFLinker/Classic/DWARFLinkerDeclContext.h" #include "llvm/DebugInfo/DWARF/DWARFContext.h" #include "llvm/DebugInfo/DWARF/DWARFExpression.h" #include "llvm/Support/FormatVariadic.h" namespace llvm { +using namespace dwarf_linker; +using namespace dwarf_linker::classic; + #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) LLVM_DUMP_METHOD void CompileUnit::DIEInfo::dump() { llvm::errs() << "{\n"; diff --git a/llvm/lib/DWARFLinker/DWARFLinkerDeclContext.cpp b/llvm/lib/DWARFLinker/Classic/DWARFLinkerDeclContext.cpp index 015a4f9e8ac6..c9c8dddce9c4 100644 --- a/llvm/lib/DWARFLinker/DWARFLinkerDeclContext.cpp +++ b/llvm/lib/DWARFLinker/Classic/DWARFLinkerDeclContext.cpp @@ -6,14 +6,17 @@ // //===----------------------------------------------------------------------===// -#include "llvm/DWARFLinker/DWARFLinkerDeclContext.h" -#include "llvm/DWARFLinker/DWARFLinkerCompileUnit.h" +#include "llvm/DWARFLinker/Classic/DWARFLinkerDeclContext.h" +#include "llvm/DWARFLinker/Classic/DWARFLinkerCompileUnit.h" #include "llvm/DebugInfo/DWARF/DWARFContext.h" #include "llvm/DebugInfo/DWARF/DWARFDie.h" #include "llvm/DebugInfo/DWARF/DWARFUnit.h" namespace llvm { +using namespace dwarf_linker; +using namespace dwarf_linker::classic; + /// Set the last DIE/CU a context was seen in and, possibly invalidate the /// context if it is ambiguous. /// diff --git a/llvm/lib/DWARFLinker/DWARFStreamer.cpp b/llvm/lib/DWARFLinker/Classic/DWARFStreamer.cpp index cd649c328ed9..020bbb06449d 100644 --- a/llvm/lib/DWARFLinker/DWARFStreamer.cpp +++ b/llvm/lib/DWARFLinker/Classic/DWARFStreamer.cpp @@ -6,9 +6,9 @@ // //===----------------------------------------------------------------------===// -#include "llvm/DWARFLinker/DWARFStreamer.h" +#include "llvm/DWARFLinker/Classic/DWARFStreamer.h" #include "llvm/CodeGen/NonRelocatableStringpool.h" -#include "llvm/DWARFLinker/DWARFLinkerCompileUnit.h" +#include "llvm/DWARFLinker/Classic/DWARFLinkerCompileUnit.h" #include "llvm/DebugInfo/DWARF/DWARFContext.h" #include "llvm/DebugInfo/DWARF/DWARFDebugMacro.h" #include "llvm/MC/MCAsmBackend.h" @@ -26,7 +26,9 @@ #include "llvm/Target/TargetOptions.h" #include "llvm/TargetParser/Triple.h" -namespace llvm { +using namespace llvm; +using namespace dwarf_linker; +using namespace dwarf_linker::classic; Error DwarfStreamer::init(Triple TheTriple, StringRef Swift5ReflectionSegmentName) { @@ -859,21 +861,35 @@ void DwarfStreamer::emitLineTablePrologueV5IncludeAndFileTable( for (auto Include : P.IncludeDirectories) emitLineTableString(P, Include, DebugStrPool, DebugLineStrPool); + bool HasChecksums = P.ContentTypes.HasMD5; + bool HasInlineSources = P.ContentTypes.HasSource; + if (P.FileNames.empty()) { // file_name_entry_format_count (ubyte). MS->emitInt8(0); LineSectionSize += 1; } else { // file_name_entry_format_count (ubyte). - MS->emitInt8(2); + MS->emitInt8(2 + (HasChecksums ? 1 : 0) + (HasInlineSources ? 1 : 0)); LineSectionSize += 1; // file_name_entry_format (sequence of ULEB128 pairs). + auto StrForm = P.FileNames[0].Name.getForm(); LineSectionSize += MS->emitULEB128IntValue(dwarf::DW_LNCT_path); - LineSectionSize += MS->emitULEB128IntValue(P.FileNames[0].Name.getForm()); + LineSectionSize += MS->emitULEB128IntValue(StrForm); LineSectionSize += MS->emitULEB128IntValue(dwarf::DW_LNCT_directory_index); LineSectionSize += MS->emitULEB128IntValue(dwarf::DW_FORM_data1); + + if (HasChecksums) { + LineSectionSize += MS->emitULEB128IntValue(dwarf::DW_LNCT_MD5); + LineSectionSize += MS->emitULEB128IntValue(dwarf::DW_FORM_data16); + } + + if (HasInlineSources) { + LineSectionSize += MS->emitULEB128IntValue(dwarf::DW_LNCT_LLVM_source); + LineSectionSize += MS->emitULEB128IntValue(StrForm); + } } // file_names_count (ULEB128). @@ -884,6 +900,14 @@ void DwarfStreamer::emitLineTablePrologueV5IncludeAndFileTable( emitLineTableString(P, File.Name, DebugStrPool, DebugLineStrPool); MS->emitInt8(File.DirIdx); LineSectionSize += 1; + if (HasChecksums) { + MS->emitBinaryData( + StringRef(reinterpret_cast<const char *>(File.Checksum.data()), + File.Checksum.size())); + LineSectionSize += File.Checksum.size(); + } + if (HasInlineSources) + emitLineTableString(P, File.Source, DebugStrPool, DebugLineStrPool); } } @@ -1404,5 +1428,3 @@ void DwarfStreamer::emitMacroTableImpl(const DWARFDebugMacro *MacroTable, } } } - -} // namespace llvm diff --git a/llvm/lib/DWARFLinkerParallel/AcceleratorRecordsSaver.cpp b/llvm/lib/DWARFLinker/Parallel/AcceleratorRecordsSaver.cpp index 5ec25cfe5fd2..3af574c70561 100644 --- a/llvm/lib/DWARFLinkerParallel/AcceleratorRecordsSaver.cpp +++ b/llvm/lib/DWARFLinker/Parallel/AcceleratorRecordsSaver.cpp @@ -11,8 +11,9 @@ #include "llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h" #include "llvm/Support/DJB.h" -namespace llvm { -namespace dwarflinker_parallel { +using namespace llvm; +using namespace dwarf_linker; +using namespace dwarf_linker::parallel; static uint32_t hashFullyQualifiedName(CompileUnit &InputCU, DWARFDie &InputDIE, int ChildRecurseDepth = 0) { @@ -290,6 +291,3 @@ void AcceleratorRecordsSaver::saveTypeRecord(StringEntry *Name, DIE *OutDIE, Info.TypeEntryBodyPtr = TypeEntry->getValue().load(); OutUnit.getAsTypeUnit()->saveAcceleratorInfo(Info); } - -} // end of namespace dwarflinker_parallel -} // namespace llvm diff --git a/llvm/lib/DWARFLinkerParallel/AcceleratorRecordsSaver.h b/llvm/lib/DWARFLinker/Parallel/AcceleratorRecordsSaver.h index 5e7f4d0c3166..bc3ea8669ece 100644 --- a/llvm/lib/DWARFLinkerParallel/AcceleratorRecordsSaver.h +++ b/llvm/lib/DWARFLinker/Parallel/AcceleratorRecordsSaver.h @@ -6,8 +6,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_LIB_DWARFLINKERPARALLEL_ACCELERATORRECORDSSAVER_H -#define LLVM_LIB_DWARFLINKERPARALLEL_ACCELERATORRECORDSSAVER_H +#ifndef LLVM_LIB_DWARFLINKER_PARALLEL_ACCELERATORRECORDSSAVER_H +#define LLVM_LIB_DWARFLINKER_PARALLEL_ACCELERATORRECORDSSAVER_H #include "DIEAttributeCloner.h" #include "DWARFLinkerCompileUnit.h" @@ -15,7 +15,8 @@ #include "DWARFLinkerTypeUnit.h" namespace llvm { -namespace dwarflinker_parallel { +namespace dwarf_linker { +namespace parallel { /// This class helps to store information for accelerator entries. /// It prepares accelerator info for the certain DIE and store it inside @@ -64,7 +65,8 @@ protected: CompileUnit::OutputUnitVariantPtr OutUnit; }; -} // end of namespace dwarflinker_parallel -} // end namespace llvm +} // end of namespace parallel +} // end of namespace dwarf_linker +} // end of namespace llvm -#endif // LLVM_LIB_DWARFLINKERPARALLEL_ACCELERATORRECORDSSAVER_H +#endif // LLVM_LIB_DWARFLINKER_PARALLEL_ACCELERATORRECORDSSAVER_H diff --git a/llvm/lib/DWARFLinkerParallel/ArrayList.h b/llvm/lib/DWARFLinker/Parallel/ArrayList.h index def83f91bc6f..c48f828609be 100644 --- a/llvm/lib/DWARFLinkerParallel/ArrayList.h +++ b/llvm/lib/DWARFLinker/Parallel/ArrayList.h @@ -6,14 +6,15 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_LIB_DWARFLINKERPARALLEL_ARRAYLIST_H -#define LLVM_LIB_DWARFLINKERPARALLEL_ARRAYLIST_H +#ifndef LLVM_LIB_DWARFLINKER_PARALLEL_ARRAYLIST_H +#define LLVM_LIB_DWARFLINKER_PARALLEL_ARRAYLIST_H #include "llvm/Support/PerThreadBumpPtrAllocator.h" #include <atomic> namespace llvm { -namespace dwarflinker_parallel { +namespace dwarf_linker { +namespace parallel { /// This class is a simple list of T structures. It keeps elements as /// pre-allocated groups to save memory for each element's next pointer. @@ -21,7 +22,7 @@ namespace dwarflinker_parallel { /// Method add() can be called asynchronously. template <typename T, size_t ItemsGroupSize = 512> class ArrayList { public: - ArrayList(parallel::PerThreadBumpPtrAllocator *Allocator) + ArrayList(llvm::parallel::PerThreadBumpPtrAllocator *Allocator) : Allocator(Allocator) {} /// Add specified \p Item to the list. @@ -156,10 +157,11 @@ protected: std::atomic<ItemsGroup *> GroupsHead = nullptr; std::atomic<ItemsGroup *> LastGroup = nullptr; - parallel::PerThreadBumpPtrAllocator *Allocator = nullptr; + llvm::parallel::PerThreadBumpPtrAllocator *Allocator = nullptr; }; -} // end of namespace dwarflinker_parallel -} // end namespace llvm +} // end of namespace parallel +} // end of namespace dwarf_linker +} // end of namespace llvm -#endif // LLVM_LIB_DWARFLINKERPARALLEL_ARRAYLIST_H +#endif // LLVM_LIB_DWARFLINKER_PARALLEL_ARRAYLIST_H diff --git a/llvm/lib/DWARFLinkerParallel/DIEAttributeCloner.cpp b/llvm/lib/DWARFLinker/Parallel/DIEAttributeCloner.cpp index 81fc57f7cabb..07ebd55e2c46 100644 --- a/llvm/lib/DWARFLinkerParallel/DIEAttributeCloner.cpp +++ b/llvm/lib/DWARFLinker/Parallel/DIEAttributeCloner.cpp @@ -9,8 +9,9 @@ #include "DIEAttributeCloner.h" #include "llvm/DebugInfo/DWARF/DWARFDebugMacro.h" -namespace llvm { -namespace dwarflinker_parallel { +using namespace llvm; +using namespace dwarf_linker; +using namespace dwarf_linker::parallel; void DIEAttributeCloner::clone() { // Extract and clone every attribute. @@ -650,6 +651,3 @@ unsigned DIEAttributeCloner::finalizeAbbreviations(bool HasChildrenToClone) { return AttrOutOffset; } - -} // end of namespace dwarflinker_parallel -} // namespace llvm diff --git a/llvm/lib/DWARFLinkerParallel/DIEAttributeCloner.h b/llvm/lib/DWARFLinker/Parallel/DIEAttributeCloner.h index e18c0a15cefc..6a6bd08570d7 100644 --- a/llvm/lib/DWARFLinkerParallel/DIEAttributeCloner.h +++ b/llvm/lib/DWARFLinker/Parallel/DIEAttributeCloner.h @@ -6,8 +6,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_LIB_DWARFLINKERPARALLEL_DIEATTRIBUTECLONER_H -#define LLVM_LIB_DWARFLINKERPARALLEL_DIEATTRIBUTECLONER_H +#ifndef LLVM_LIB_DWARFLINKER_PARALLEL_DIEATTRIBUTECLONER_H +#define LLVM_LIB_DWARFLINKER_PARALLEL_DIEATTRIBUTECLONER_H #include "ArrayList.h" #include "DIEGenerator.h" @@ -16,7 +16,8 @@ #include "DWARFLinkerTypeUnit.h" namespace llvm { -namespace dwarflinker_parallel { +namespace dwarf_linker { +namespace parallel { /// Information gathered and exchanged between the various /// clone*Attr helpers about the attributes of a particular DIE. @@ -178,7 +179,8 @@ protected: bool Use_DW_FORM_strp = false; }; -} // end of namespace dwarflinker_parallel -} // end namespace llvm +} // end of namespace parallel +} // end of namespace dwarf_linker +} // end of namespace llvm -#endif // LLVM_LIB_DWARFLINKERPARALLEL_DIEATTRIBUTECLONER_H +#endif // LLVM_LIB_DWARFLINKER_PARALLEL_DIEATTRIBUTECLONER_H diff --git a/llvm/lib/DWARFLinkerParallel/DIEGenerator.h b/llvm/lib/DWARFLinker/Parallel/DIEGenerator.h index 42bf00f55ff1..2341dbaa8c76 100644 --- a/llvm/lib/DWARFLinkerParallel/DIEGenerator.h +++ b/llvm/lib/DWARFLinker/Parallel/DIEGenerator.h @@ -6,8 +6,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_LIB_DWARFLINKERPARALLEL_DIEGENERATOR_H -#define LLVM_LIB_DWARFLINKERPARALLEL_DIEGENERATOR_H +#ifndef LLVM_LIB_DWARFLINKER_PARALLEL_DIEGENERATOR_H +#define LLVM_LIB_DWARFLINKER_PARALLEL_DIEGENERATOR_H #include "DWARFLinkerGlobalData.h" #include "DWARFLinkerUnit.h" @@ -15,7 +15,8 @@ #include "llvm/Support/LEB128.h" namespace llvm { -namespace dwarflinker_parallel { +namespace dwarf_linker { +namespace parallel { /// This class is a helper to create output DIE tree. class DIEGenerator { @@ -174,7 +175,8 @@ protected: DIE *OutputDIE = nullptr; }; -} // end of namespace dwarflinker_parallel -} // end namespace llvm +} // end of namespace parallel +} // end of namespace dwarf_linker +} // end of namespace llvm -#endif // LLVM_LIB_DWARFLINKERPARALLEL_DIEGENERATOR_H +#endif // LLVM_LIB_DWARFLINKER_PARALLEL_DIEGENERATOR_H diff --git a/llvm/lib/DWARFLinkerParallel/DWARFEmitterImpl.cpp b/llvm/lib/DWARFLinker/Parallel/DWARFEmitterImpl.cpp index 355cfae3a646..115167f0c7dc 100644 --- a/llvm/lib/DWARFLinkerParallel/DWARFEmitterImpl.cpp +++ b/llvm/lib/DWARFLinker/Parallel/DWARFEmitterImpl.cpp @@ -7,7 +7,7 @@ //===----------------------------------------------------------------------===// #include "DWARFEmitterImpl.h" -#include "llvm/DWARFLinker/DWARFLinkerCompileUnit.h" +#include "DWARFLinkerCompileUnit.h" #include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCCodeEmitter.h" #include "llvm/MC/MCObjectWriter.h" @@ -17,8 +17,9 @@ #include "llvm/MC/TargetRegistry.h" #include "llvm/Support/FormattedStream.h" -namespace llvm { -namespace dwarflinker_parallel { +using namespace llvm; +using namespace dwarf_linker; +using namespace dwarf_linker::parallel; Error DwarfEmitterImpl::init(Triple TheTriple, StringRef Swift5ReflectionSegmentName) { @@ -276,6 +277,3 @@ void DwarfEmitterImpl::emitAppleTypes( Asm->OutStreamer->emitLabel(SectionBegin); emitAppleAccelTable(Asm.get(), Table, "types", SectionBegin); } - -} // end of namespace dwarflinker_parallel -} // namespace llvm diff --git a/llvm/lib/DWARFLinkerParallel/DWARFEmitterImpl.h b/llvm/lib/DWARFLinker/Parallel/DWARFEmitterImpl.h index d03336c1c11a..89a33fe94191 100644 --- a/llvm/lib/DWARFLinkerParallel/DWARFEmitterImpl.h +++ b/llvm/lib/DWARFLinker/Parallel/DWARFEmitterImpl.h @@ -6,14 +6,14 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_LIB_DWARFLINKERPARALLEL_DWARFEMITTERIMPL_H -#define LLVM_LIB_DWARFLINKERPARALLEL_DWARFEMITTERIMPL_H +#ifndef LLVM_LIB_DWARFLINKER_PARALLEL_DWARFEMITTERIMPL_H +#define LLVM_LIB_DWARFLINKER_PARALLEL_DWARFEMITTERIMPL_H #include "DWARFLinkerCompileUnit.h" #include "llvm/BinaryFormat/Swift.h" #include "llvm/CodeGen/AccelTable.h" #include "llvm/CodeGen/AsmPrinter.h" -#include "llvm/DWARFLinkerParallel/DWARFLinker.h" +#include "llvm/DWARFLinker/Parallel/DWARFLinker.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCInstrInfo.h" @@ -36,7 +36,8 @@ namespace llvm { template <typename DataT> class AccelTable; class MCCodeEmitter; -namespace dwarflinker_parallel { +namespace dwarf_linker { +namespace parallel { using DebugNamesUnitsOffsets = std::vector<std::variant<MCSymbol *, uint64_t>>; using CompUnitIDToIdx = DenseMap<unsigned, unsigned>; @@ -139,7 +140,8 @@ private: uint64_t DebugInfoSectionSize = 0; }; -} // end namespace dwarflinker_parallel -} // end namespace llvm +} // end of namespace parallel +} // end of namespace dwarf_linker +} // end of namespace llvm -#endif // LLVM_LIB_DWARFLINKERPARALLEL_DWARFEMITTERIMPL_H +#endif // LLVM_LIB_DWARFLINKER_PARALLEL_DWARFEMITTERIMPL_H diff --git a/llvm/lib/DWARFLinkerParallel/DWARFLinker.cpp b/llvm/lib/DWARFLinker/Parallel/DWARFLinker.cpp index 269f24b1a13b..ad8d28a64317 100644 --- a/llvm/lib/DWARFLinkerParallel/DWARFLinker.cpp +++ b/llvm/lib/DWARFLinker/Parallel/DWARFLinker.cpp @@ -9,10 +9,14 @@ #include "DWARFLinkerImpl.h" #include "DependencyTracker.h" -std::unique_ptr<llvm::dwarflinker_parallel::DWARFLinker> -llvm::dwarflinker_parallel::DWARFLinker::createLinker( - MessageHandlerTy ErrorHandler, MessageHandlerTy WarningHandler, - TranslatorFuncTy StringsTranslator) { +using namespace llvm; +using namespace dwarf_linker; +using namespace dwarf_linker::parallel; + +std::unique_ptr<DWARFLinker> +DWARFLinker::createLinker(MessageHandlerTy ErrorHandler, + MessageHandlerTy WarningHandler, + TranslatorFuncTy StringsTranslator) { return std::make_unique<DWARFLinkerImpl>(ErrorHandler, WarningHandler, StringsTranslator); } diff --git a/llvm/lib/DWARFLinkerParallel/DWARFLinkerCompileUnit.cpp b/llvm/lib/DWARFLinker/Parallel/DWARFLinkerCompileUnit.cpp index 3f0e75690272..ffcf9f365aec 100644 --- a/llvm/lib/DWARFLinkerParallel/DWARFLinkerCompileUnit.cpp +++ b/llvm/lib/DWARFLinker/Parallel/DWARFLinkerCompileUnit.cpp @@ -21,7 +21,8 @@ #include <utility> using namespace llvm; -using namespace llvm::dwarflinker_parallel; +using namespace dwarf_linker; +using namespace dwarf_linker::parallel; CompileUnit::CompileUnit(LinkingGlobalData &GlobalData, unsigned ID, StringRef ClangModuleName, DWARFFile &File, @@ -1870,7 +1871,7 @@ void CompileUnit::verifyDependencies() { Dependencies.get()->verifyKeepChain(); } -ArrayRef<dwarf::Attribute> llvm::dwarflinker_parallel::getODRAttributes() { +ArrayRef<dwarf::Attribute> dwarf_linker::parallel::getODRAttributes() { static dwarf::Attribute ODRAttributes[] = { dwarf::DW_AT_type, dwarf::DW_AT_specification, dwarf::DW_AT_abstract_origin, dwarf::DW_AT_import}; diff --git a/llvm/lib/DWARFLinkerParallel/DWARFLinkerCompileUnit.h b/llvm/lib/DWARFLinker/Parallel/DWARFLinkerCompileUnit.h index 28fcc34d867d..abd978e7c0e4 100644 --- a/llvm/lib/DWARFLinkerParallel/DWARFLinkerCompileUnit.h +++ b/llvm/lib/DWARFLinker/Parallel/DWARFLinkerCompileUnit.h @@ -6,15 +6,16 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_LIB_DWARFLINKERPARALLEL_DWARFLINKERCOMPILEUNIT_H -#define LLVM_LIB_DWARFLINKERPARALLEL_DWARFLINKERCOMPILEUNIT_H +#ifndef LLVM_LIB_DWARFLINKER_PARALLEL_DWARFLINKERCOMPILEUNIT_H +#define LLVM_LIB_DWARFLINKER_PARALLEL_DWARFLINKERCOMPILEUNIT_H #include "DWARFLinkerUnit.h" -#include "llvm/DWARFLinkerParallel/DWARFFile.h" +#include "llvm/DWARFLinker/DWARFFile.h" #include <optional> namespace llvm { -namespace dwarflinker_parallel { +namespace dwarf_linker { +namespace parallel { using OffsetToUnitTy = function_ref<CompileUnit *(uint64_t Offset)>; @@ -730,7 +731,8 @@ private: /// infinite recursion. ArrayRef<dwarf::Attribute> getODRAttributes(); -} // end of namespace dwarflinker_parallel -} // end namespace llvm +} // end of namespace parallel +} // end of namespace dwarf_linker +} // end of namespace llvm -#endif // LLVM_LIB_DWARFLINKERPARALLEL_DWARFLINKERCOMPILEUNIT_H +#endif // LLVM_LIB_DWARFLINKER_PARALLEL_DWARFLINKERCOMPILEUNIT_H diff --git a/llvm/lib/DWARFLinkerParallel/DWARFLinkerGlobalData.h b/llvm/lib/DWARFLinker/Parallel/DWARFLinkerGlobalData.h index 31724770093d..b641343ac808 100644 --- a/llvm/lib/DWARFLinkerParallel/DWARFLinkerGlobalData.h +++ b/llvm/lib/DWARFLinker/Parallel/DWARFLinkerGlobalData.h @@ -6,19 +6,20 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_LIB_DWARFLINKERPARALLEL_DWARFLINKERGLOBALDATA_H -#define LLVM_LIB_DWARFLINKERPARALLEL_DWARFLINKERGLOBALDATA_H +#ifndef LLVM_LIB_DWARFLINKER_PARALLEL_DWARFLINKERGLOBALDATA_H +#define LLVM_LIB_DWARFLINKER_PARALLEL_DWARFLINKERGLOBALDATA_H #include "TypePool.h" -#include "llvm/DWARFLinkerParallel/DWARFLinker.h" -#include "llvm/DWARFLinkerParallel/StringPool.h" +#include "llvm/DWARFLinker/Parallel/DWARFLinker.h" +#include "llvm/DWARFLinker/StringPool.h" #include "llvm/Support/PerThreadBumpPtrAllocator.h" namespace llvm { class DWARFDie; -namespace dwarflinker_parallel { +namespace dwarf_linker { +namespace parallel { using TranslatorFuncTy = std::function<StringRef(StringRef)>; using MessageHandlerTy = std::function<void( @@ -89,7 +90,9 @@ class LinkingGlobalData { public: /// Returns global per-thread allocator. - parallel::PerThreadBumpPtrAllocator &getAllocator() { return Allocator; } + llvm::parallel::PerThreadBumpPtrAllocator &getAllocator() { + return Allocator; + } /// Returns global string pool. StringPool &getStringPool() { return Strings; } @@ -145,7 +148,7 @@ public: } protected: - parallel::PerThreadBumpPtrAllocator Allocator; + llvm::parallel::PerThreadBumpPtrAllocator Allocator; StringPool Strings; TranslatorFuncTy Translator; DWARFLinkerOptions Options; @@ -153,7 +156,8 @@ protected: MessageHandlerTy ErrorHandler; }; -} // end of namespace dwarflinker_parallel -} // end namespace llvm +} // end of namespace parallel +} // end of namespace dwarf_linker +} // end of namespace llvm -#endif // LLVM_LIB_DWARFLINKERPARALLEL_DWARFLINKERGLOBALDATA_H +#endif // LLVM_LIB_DWARFLINKER_PARALLEL_DWARFLINKERGLOBALDATA_H diff --git a/llvm/lib/DWARFLinkerParallel/DWARFLinkerImpl.cpp b/llvm/lib/DWARFLinker/Parallel/DWARFLinkerImpl.cpp index c49b9ef0cdf9..bb59cbfdb347 100644 --- a/llvm/lib/DWARFLinkerParallel/DWARFLinkerImpl.cpp +++ b/llvm/lib/DWARFLinker/Parallel/DWARFLinkerImpl.cpp @@ -15,8 +15,9 @@ #include "llvm/Support/Parallel.h" #include "llvm/Support/ThreadPool.h" -namespace llvm { -namespace dwarflinker_parallel { +using namespace llvm; +using namespace dwarf_linker; +using namespace dwarf_linker::parallel; DWARFLinkerImpl::DWARFLinkerImpl(MessageHandlerTy ErrorHandler, MessageHandlerTy WarningHandler, @@ -182,7 +183,7 @@ Error DWARFLinkerImpl::link() { CommonSections.setOutputFormat(GlobalFormat, GlobalEndianness); if (!GlobalData.Options.NoODR && Language.has_value()) { - parallel::TaskGroup TGroup; + llvm::parallel::TaskGroup TGroup; TGroup.spawn([&]() { ArtificialTypeUnit = std::make_unique<TypeUnit>( GlobalData, UniqueUnitID++, Language, GlobalFormat, GlobalEndianness); @@ -191,9 +192,10 @@ Error DWARFLinkerImpl::link() { // Set parallel options. if (GlobalData.getOptions().Threads == 0) - parallel::strategy = optimal_concurrency(OverallNumberOfCU); + llvm::parallel::strategy = optimal_concurrency(OverallNumberOfCU); else - parallel::strategy = hardware_concurrency(GlobalData.getOptions().Threads); + llvm::parallel::strategy = + hardware_concurrency(GlobalData.getOptions().Threads); // Link object files. if (GlobalData.getOptions().Threads == 1) { @@ -205,7 +207,7 @@ Error DWARFLinkerImpl::link() { Context->InputDWARFFile.unload(); } } else { - ThreadPool Pool(parallel::strategy); + ThreadPool Pool(llvm::parallel::strategy); for (std::unique_ptr<LinkContext> &Context : ObjectContexts) Pool.async([&]() { // Link object file. @@ -486,108 +488,104 @@ Error DWARFLinkerImpl::LinkContext::link(TypeUnit *ArtificialTypeUnit) { return Error::success(); } - OriginalDebugInfoSize = getInputDebugInfoSize(); - - // Create CompileUnit structures to keep information about source - // DWARFUnit`s, load line tables. - for (const auto &OrigCU : InputDWARFFile.Dwarf->compile_units()) { - // Load only unit DIE at this stage. - auto CUDie = OrigCU->getUnitDIE(); - std::string PCMFile = - getPCMFile(CUDie, GlobalData.getOptions().ObjectPrefixMap); - - // The !isClangModuleRef condition effectively skips over fully resolved - // skeleton units. - if (!CUDie || GlobalData.getOptions().UpdateIndexTablesOnly || - !isClangModuleRef(CUDie, PCMFile, 0, true).first) { - CompileUnits.emplace_back(std::make_unique<CompileUnit>( - GlobalData, *OrigCU, UniqueUnitID.fetch_add(1), "", InputDWARFFile, - getUnitForOffset, OrigCU->getFormParams(), getEndianness())); - - // Preload line table, as it can't be loaded asynchronously. - CompileUnits.back()->loadLineTable(); - } - }; + OriginalDebugInfoSize = getInputDebugInfoSize(); + + // Create CompileUnit structures to keep information about source + // DWARFUnit`s, load line tables. + for (const auto &OrigCU : InputDWARFFile.Dwarf->compile_units()) { + // Load only unit DIE at this stage. + auto CUDie = OrigCU->getUnitDIE(); + std::string PCMFile = + getPCMFile(CUDie, GlobalData.getOptions().ObjectPrefixMap); + + // The !isClangModuleRef condition effectively skips over fully resolved + // skeleton units. + if (!CUDie || GlobalData.getOptions().UpdateIndexTablesOnly || + !isClangModuleRef(CUDie, PCMFile, 0, true).first) { + CompileUnits.emplace_back(std::make_unique<CompileUnit>( + GlobalData, *OrigCU, UniqueUnitID.fetch_add(1), "", InputDWARFFile, + getUnitForOffset, OrigCU->getFormParams(), getEndianness())); + + // Preload line table, as it can't be loaded asynchronously. + CompileUnits.back()->loadLineTable(); + } + }; - HasNewInterconnectedCUs = false; + HasNewInterconnectedCUs = false; - // Link self-sufficient compile units and discover inter-connected compile - // units. - parallelForEach(CompileUnits, [&](std::unique_ptr<CompileUnit> &CU) { - linkSingleCompileUnit(*CU, ArtificialTypeUnit); - }); + // Link self-sufficient compile units and discover inter-connected compile + // units. + parallelForEach(CompileUnits, [&](std::unique_ptr<CompileUnit> &CU) { + linkSingleCompileUnit(*CU, ArtificialTypeUnit); + }); + + // Link all inter-connected units. + if (HasNewInterconnectedCUs) { + InterCUProcessingStarted = true; - // Link all inter-connected units. - if (HasNewInterconnectedCUs) { - InterCUProcessingStarted = true; - - if (Error Err = finiteLoop([&]() -> Expected<bool> { - HasNewInterconnectedCUs = false; - - // Load inter-connected units. - parallelForEach( - CompileUnits, [&](std::unique_ptr<CompileUnit> &CU) { - if (CU->isInterconnectedCU()) { - CU->maybeResetToLoadedStage(); - linkSingleCompileUnit(*CU, ArtificialTypeUnit, - CompileUnit::Stage::Loaded); - } - }); - - // Do liveness analysis for inter-connected units. - parallelForEach(CompileUnits, - [&](std::unique_ptr<CompileUnit> &CU) { - linkSingleCompileUnit( - *CU, ArtificialTypeUnit, + if (Error Err = finiteLoop([&]() -> Expected<bool> { + HasNewInterconnectedCUs = false; + + // Load inter-connected units. + parallelForEach(CompileUnits, [&](std::unique_ptr<CompileUnit> &CU) { + if (CU->isInterconnectedCU()) { + CU->maybeResetToLoadedStage(); + linkSingleCompileUnit(*CU, ArtificialTypeUnit, + CompileUnit::Stage::Loaded); + } + }); + + // Do liveness analysis for inter-connected units. + parallelForEach(CompileUnits, [&](std::unique_ptr<CompileUnit> &CU) { + linkSingleCompileUnit(*CU, ArtificialTypeUnit, CompileUnit::Stage::LivenessAnalysisDone); - }); - - return HasNewInterconnectedCUs.load(); - })) - return Err; - - // Update dependencies. - if (Error Err = finiteLoop([&]() -> Expected<bool> { - HasNewGlobalDependency = false; - parallelForEach( - CompileUnits, [&](std::unique_ptr<CompileUnit> &CU) { - linkSingleCompileUnit( - *CU, ArtificialTypeUnit, - CompileUnit::Stage::UpdateDependenciesCompleteness); - }); - return HasNewGlobalDependency.load(); - })) - return Err; - parallelForEach(CompileUnits, [&](std::unique_ptr<CompileUnit> &CU) { - if (CU->isInterconnectedCU() && - CU->getStage() == CompileUnit::Stage::LivenessAnalysisDone) - CU->setStage(CompileUnit::Stage::UpdateDependenciesCompleteness); - }); + }); - // Assign type names. - parallelForEach(CompileUnits, [&](std::unique_ptr<CompileUnit> &CU) { - linkSingleCompileUnit(*CU, ArtificialTypeUnit, - CompileUnit::Stage::TypeNamesAssigned); - }); + return HasNewInterconnectedCUs.load(); + })) + return Err; - // Clone inter-connected units. - parallelForEach(CompileUnits, [&](std::unique_ptr<CompileUnit> &CU) { - linkSingleCompileUnit(*CU, ArtificialTypeUnit, - CompileUnit::Stage::Cloned); - }); + // Update dependencies. + if (Error Err = finiteLoop([&]() -> Expected<bool> { + HasNewGlobalDependency = false; + parallelForEach(CompileUnits, [&](std::unique_ptr<CompileUnit> &CU) { + linkSingleCompileUnit( + *CU, ArtificialTypeUnit, + CompileUnit::Stage::UpdateDependenciesCompleteness); + }); + return HasNewGlobalDependency.load(); + })) + return Err; + parallelForEach(CompileUnits, [&](std::unique_ptr<CompileUnit> &CU) { + if (CU->isInterconnectedCU() && + CU->getStage() == CompileUnit::Stage::LivenessAnalysisDone) + CU->setStage(CompileUnit::Stage::UpdateDependenciesCompleteness); + }); - // Update patches for inter-connected units. - parallelForEach(CompileUnits, [&](std::unique_ptr<CompileUnit> &CU) { - linkSingleCompileUnit(*CU, ArtificialTypeUnit, - CompileUnit::Stage::PatchesUpdated); - }); + // Assign type names. + parallelForEach(CompileUnits, [&](std::unique_ptr<CompileUnit> &CU) { + linkSingleCompileUnit(*CU, ArtificialTypeUnit, + CompileUnit::Stage::TypeNamesAssigned); + }); - // Release data. - parallelForEach(CompileUnits, [&](std::unique_ptr<CompileUnit> &CU) { - linkSingleCompileUnit(*CU, ArtificialTypeUnit, - CompileUnit::Stage::Cleaned); - }); - } + // Clone inter-connected units. + parallelForEach(CompileUnits, [&](std::unique_ptr<CompileUnit> &CU) { + linkSingleCompileUnit(*CU, ArtificialTypeUnit, + CompileUnit::Stage::Cloned); + }); + + // Update patches for inter-connected units. + parallelForEach(CompileUnits, [&](std::unique_ptr<CompileUnit> &CU) { + linkSingleCompileUnit(*CU, ArtificialTypeUnit, + CompileUnit::Stage::PatchesUpdated); + }); + + // Release data. + parallelForEach(CompileUnits, [&](std::unique_ptr<CompileUnit> &CU) { + linkSingleCompileUnit(*CU, ArtificialTypeUnit, + CompileUnit::Stage::Cleaned); + }); + } if (GlobalData.getOptions().UpdateIndexTablesOnly) { // Emit Invariant sections. @@ -598,7 +596,7 @@ Error DWARFLinkerImpl::LinkContext::link(TypeUnit *ArtificialTypeUnit) { // Emit .debug_frame section. Error ResultErr = Error::success(); - parallel::TaskGroup TGroup; + llvm::parallel::TaskGroup TGroup; // We use task group here as PerThreadBumpPtrAllocator should be called from // the threads created by ThreadPoolExecutor. TGroup.spawn([&]() { @@ -965,7 +963,7 @@ void DWARFLinkerImpl::printStatistic() { } void DWARFLinkerImpl::assignOffsets() { - parallel::TaskGroup TGroup; + llvm::parallel::TaskGroup TGroup; TGroup.spawn([&]() { assignOffsetsToStrings(); }); TGroup.spawn([&]() { assignOffsetsToSections(); }); } @@ -1134,7 +1132,7 @@ void DWARFLinkerImpl::patchOffsetsAndSizes() { } void DWARFLinkerImpl::emitCommonSectionsAndWriteCompileUnitsToTheOutput() { - parallel::TaskGroup TG; + llvm::parallel::TaskGroup TG; // Create section descriptors ahead if they are not exist at the moment. // SectionDescriptors container is not thread safe. Thus we should be sure @@ -1451,6 +1449,3 @@ void DWARFLinkerImpl::writeCommonSectionsToTheOutput() { OutSection.clearSectionContent(); }); } - -} // end of namespace dwarflinker_parallel -} // namespace llvm diff --git a/llvm/lib/DWARFLinkerParallel/DWARFLinkerImpl.h b/llvm/lib/DWARFLinker/Parallel/DWARFLinkerImpl.h index 60018eea121f..b4331df5e323 100644 --- a/llvm/lib/DWARFLinkerParallel/DWARFLinkerImpl.h +++ b/llvm/lib/DWARFLinker/Parallel/DWARFLinkerImpl.h @@ -6,8 +6,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_LIB_DWARFLINKERPARALLEL_DWARFLINKERIMPL_H -#define LLVM_LIB_DWARFLINKERPARALLEL_DWARFLINKERIMPL_H +#ifndef LLVM_LIB_DWARFLINKER_PARALLEL_DWARFLINKERIMPL_H +#define LLVM_LIB_DWARFLINKER_PARALLEL_DWARFLINKERIMPL_H #include "DWARFEmitterImpl.h" #include "DWARFLinkerCompileUnit.h" @@ -15,11 +15,12 @@ #include "StringEntryToDwarfStringPoolEntryMap.h" #include "llvm/ADT/AddressRanges.h" #include "llvm/CodeGen/AccelTable.h" -#include "llvm/DWARFLinkerParallel/DWARFLinker.h" -#include "llvm/DWARFLinkerParallel/StringPool.h" +#include "llvm/DWARFLinker/Parallel/DWARFLinker.h" +#include "llvm/DWARFLinker/StringPool.h" namespace llvm { -namespace dwarflinker_parallel { +namespace dwarf_linker { +namespace parallel { /// This class links debug info. class DWARFLinkerImpl : public DWARFLinker { @@ -100,7 +101,7 @@ public: } /// Set prepend path for clang modules. - void setPrependPath(const std::string &Ppath) override { + void setPrependPath(StringRef Ppath) override { GlobalData.Options.PrependPath = Ppath; } @@ -374,7 +375,8 @@ protected: /// @} }; -} // end namespace dwarflinker_parallel -} // end namespace llvm +} // end of namespace parallel +} // end of namespace dwarf_linker +} // end of namespace llvm -#endif // LLVM_LIB_DWARFLINKERPARALLEL_DWARFLINKERIMPL_H +#endif // LLVM_LIB_DWARFLINKER_PARALLEL_DWARFLINKERIMPL_H diff --git a/llvm/lib/DWARFLinkerParallel/DWARFLinkerTypeUnit.cpp b/llvm/lib/DWARFLinker/Parallel/DWARFLinkerTypeUnit.cpp index 9d5c213085c2..397411895a8e 100644 --- a/llvm/lib/DWARFLinkerParallel/DWARFLinkerTypeUnit.cpp +++ b/llvm/lib/DWARFLinker/Parallel/DWARFLinkerTypeUnit.cpp @@ -12,7 +12,8 @@ #include "llvm/Support/LEB128.h" using namespace llvm; -using namespace llvm::dwarflinker_parallel; +using namespace dwarf_linker; +using namespace dwarf_linker::parallel; TypeUnit::TypeUnit(LinkingGlobalData &GlobalData, unsigned ID, std::optional<uint16_t> Language, dwarf::FormParams Format, @@ -43,7 +44,7 @@ void TypeUnit::createDIETree(BumpPtrAllocator &Allocator) { // TaskGroup is created here as internal code has calls to // PerThreadBumpPtrAllocator which should be called from the task group task. - parallel::TaskGroup TG; + llvm::parallel::TaskGroup TG; TG.spawn([&]() { SectionDescriptor &DebugInfoSection = getOrCreateSectionDescriptor(DebugSectionKind::DebugInfo); @@ -134,7 +135,7 @@ void TypeUnit::prepareDataForTreeCreation() { // Type unit data created parallelly. So the order of data is not // deterministic. Order data here if we need deterministic output. - parallel::TaskGroup TG; + llvm::parallel::TaskGroup TG; if (!GlobalData.getOptions().AllowNonDeterministicOutput) { TG.spawn([&]() { diff --git a/llvm/lib/DWARFLinkerParallel/DWARFLinkerTypeUnit.h b/llvm/lib/DWARFLinker/Parallel/DWARFLinkerTypeUnit.h index 97e620eee0c4..0944de8d1315 100644 --- a/llvm/lib/DWARFLinkerParallel/DWARFLinkerTypeUnit.h +++ b/llvm/lib/DWARFLinker/Parallel/DWARFLinkerTypeUnit.h @@ -6,8 +6,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_DWARFLINKERPARALLEL_DWARFLINKERTYPEUNIT_H -#define LLVM_DWARFLINKERPARALLEL_DWARFLINKERTYPEUNIT_H +#ifndef LLVM_DWARFLINKER_PARALLEL_DWARFLINKERTYPEUNIT_H +#define LLVM_DWARFLINKER_PARALLEL_DWARFLINKERTYPEUNIT_H #include "DWARFLinkerUnit.h" #include "llvm/CodeGen/DIE.h" @@ -15,7 +15,8 @@ #include "llvm/DebugInfo/DWARF/DWARFUnit.h" namespace llvm { -namespace dwarflinker_parallel { +namespace dwarf_linker { +namespace parallel { /// Type Unit is used to represent an artificial compilation unit /// which keeps all type information. This type information is referenced @@ -132,7 +133,8 @@ private: std::mutex DebugStringIndexMapMutex; }; -} // end of namespace dwarflinker_parallel -} // end namespace llvm +} // end of namespace parallel +} // end of namespace dwarf_linker +} // end of namespace llvm -#endif // LLVM_DWARFLINKERPARALLEL_DWARFLINKERTYPEUNIT_H +#endif // LLVM_DWARFLINKER_PARALLEL_DWARFLINKERTYPEUNIT_H diff --git a/llvm/lib/DWARFLinkerParallel/DWARFLinkerUnit.cpp b/llvm/lib/DWARFLinker/Parallel/DWARFLinkerUnit.cpp index b1da1900d65e..93def34aa4fd 100644 --- a/llvm/lib/DWARFLinkerParallel/DWARFLinkerUnit.cpp +++ b/llvm/lib/DWARFLinker/Parallel/DWARFLinkerUnit.cpp @@ -10,8 +10,9 @@ #include "DWARFEmitterImpl.h" #include "DebugLineSectionEmitter.h" -namespace llvm { -namespace dwarflinker_parallel { +using namespace llvm; +using namespace dwarf_linker; +using namespace dwarf_linker::parallel; void DwarfUnit::assignAbbrev(DIEAbbrev &Abbrev) { // Check the set for priors. @@ -245,6 +246,3 @@ void DwarfUnit::emitPubAccelerators() { OutSection.OS.tell() - *TypesLengthOffset); } } - -} // end of namespace dwarflinker_parallel -} // end of namespace llvm diff --git a/llvm/lib/DWARFLinkerParallel/DWARFLinkerUnit.h b/llvm/lib/DWARFLinker/Parallel/DWARFLinkerUnit.h index 9640a8ee711e..36c24372e494 100644 --- a/llvm/lib/DWARFLinkerParallel/DWARFLinkerUnit.h +++ b/llvm/lib/DWARFLinker/Parallel/DWARFLinkerUnit.h @@ -6,20 +6,21 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_LIB_DWARFLINKERPARALLEL_DWARFLINKERUNIT_H -#define LLVM_LIB_DWARFLINKERPARALLEL_DWARFLINKERUNIT_H +#ifndef LLVM_LIB_DWARFLINKER_PARALLEL_DWARFLINKERUNIT_H +#define LLVM_LIB_DWARFLINKER_PARALLEL_DWARFLINKERUNIT_H #include "DWARFLinkerGlobalData.h" #include "IndexedValuesMap.h" #include "OutputSections.h" #include "llvm/CodeGen/DIE.h" -#include "llvm/DWARFLinkerParallel/DWARFLinker.h" -#include "llvm/DWARFLinkerParallel/StringPool.h" +#include "llvm/DWARFLinker/Parallel/DWARFLinker.h" +#include "llvm/DWARFLinker/StringPool.h" #include "llvm/DebugInfo/DWARF/DWARFUnit.h" #include "llvm/Support/LEB128.h" namespace llvm { -namespace dwarflinker_parallel { +namespace dwarf_linker { +namespace parallel { class DwarfUnit; using MacroOffset2UnitMapTy = DenseMap<uint64_t, DwarfUnit *>; @@ -215,7 +216,8 @@ inline bool isODRLanguage(uint16_t Language) { return false; } -} // end of namespace dwarflinker_parallel -} // end namespace llvm +} // end of namespace parallel +} // end of namespace dwarf_linker +} // end of namespace llvm -#endif // LLVM_LIB_DWARFLINKERPARALLEL_DWARFLINKERUNIT_H +#endif // LLVM_LIB_DWARFLINKER_PARALLEL_DWARFLINKERUNIT_H diff --git a/llvm/lib/DWARFLinkerParallel/DebugLineSectionEmitter.h b/llvm/lib/DWARFLinker/Parallel/DebugLineSectionEmitter.h index fc7f8cbc4a8e..545d04cfbe43 100644 --- a/llvm/lib/DWARFLinkerParallel/DebugLineSectionEmitter.h +++ b/llvm/lib/DWARFLinker/Parallel/DebugLineSectionEmitter.h @@ -6,18 +6,19 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_LIB_DWARFLINKERPARALLEL_DEBUGLINESECTIONEMITTER_H -#define LLVM_LIB_DWARFLINKERPARALLEL_DEBUGLINESECTIONEMITTER_H +#ifndef LLVM_LIB_DWARFLINKER_PARALLEL_DEBUGLINESECTIONEMITTER_H +#define LLVM_LIB_DWARFLINKER_PARALLEL_DEBUGLINESECTIONEMITTER_H #include "DWARFEmitterImpl.h" -#include "llvm/DWARFLinkerParallel/AddressesMap.h" -#include "llvm/DWARFLinkerParallel/DWARFLinker.h" +#include "llvm/DWARFLinker/AddressesMap.h" +#include "llvm/DWARFLinker/Parallel/DWARFLinker.h" #include "llvm/DebugInfo/DWARF/DWARFObject.h" #include "llvm/MC/MCTargetOptionsCommandFlags.h" #include "llvm/MC/TargetRegistry.h" namespace llvm { -namespace dwarflinker_parallel { +namespace dwarf_linker { +namespace parallel { /// This class emits specified line table into the .debug_line section. class DebugLineSectionEmitter { @@ -197,7 +198,7 @@ private: Section.emitIntVal(0, 1); } else { // file_name_entry_format_count (ubyte). - Section.emitIntVal(2, 1); + Section.emitIntVal(2 + (P.ContentTypes.HasMD5 ? 1 : 0), 1); // file_name_entry_format (sequence of ULEB128 pairs). encodeULEB128(dwarf::DW_LNCT_path, Section.OS); @@ -205,6 +206,11 @@ private: encodeULEB128(dwarf::DW_LNCT_directory_index, Section.OS); encodeULEB128(dwarf::DW_FORM_data1, Section.OS); + + if (P.ContentTypes.HasMD5) { + encodeULEB128(dwarf::DW_LNCT_MD5, Section.OS); + encodeULEB128(dwarf::DW_FORM_data16, Section.OS); + } } // file_names_count (ULEB128). @@ -222,6 +228,12 @@ private: // source file. Section.emitString(File.Name.getForm(), *FileNameStr); Section.emitIntVal(File.DirIdx, 1); + + if (P.ContentTypes.HasMD5) { + Section.emitBinaryData( + StringRef(reinterpret_cast<const char *>(File.Checksum.data()), + File.Checksum.size())); + } } } @@ -378,7 +390,8 @@ private: std::unique_ptr<MCSubtargetInfo> MSTI; }; -} // end of namespace dwarflinker_parallel -} // end namespace llvm +} // end of namespace parallel +} // end of namespace dwarf_linker +} // end of namespace llvm -#endif // LLVM_LIB_DWARFLINKERPARALLEL_DEBUGLINESECTIONEMITTER_H +#endif // LLVM_LIB_DWARFLINKER_PARALLEL_DEBUGLINESECTIONEMITTER_H diff --git a/llvm/lib/DWARFLinkerParallel/DependencyTracker.cpp b/llvm/lib/DWARFLinker/Parallel/DependencyTracker.cpp index 052eb6cf57d4..04152e7f9f2b 100644 --- a/llvm/lib/DWARFLinkerParallel/DependencyTracker.cpp +++ b/llvm/lib/DWARFLinker/Parallel/DependencyTracker.cpp @@ -9,8 +9,9 @@ #include "DependencyTracker.h" #include "llvm/Support/FormatVariadic.h" -namespace llvm { -namespace dwarflinker_parallel { +using namespace llvm; +using namespace dwarf_linker; +using namespace dwarf_linker::parallel; /// A broken link in the keep chain. By recording both the parent and the child /// we can show only broken links for DIEs with multiple children. @@ -834,6 +835,3 @@ bool DependencyTracker::isLiveSubprogramEntry(const UnitEntryPairTy &Entry) { Entry.CU->addFunctionRange(*LowPc, *HighPc, *RelocAdjustment); return true; } - -} // end of namespace dwarflinker_parallel -} // namespace llvm diff --git a/llvm/lib/DWARFLinkerParallel/DependencyTracker.h b/llvm/lib/DWARFLinker/Parallel/DependencyTracker.h index b0b6ad3a1e8c..4a0d985c8aaa 100644 --- a/llvm/lib/DWARFLinkerParallel/DependencyTracker.h +++ b/llvm/lib/DWARFLinker/Parallel/DependencyTracker.h @@ -6,8 +6,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_LIB_DWARFLINKERPARALLEL_DEPENDENCYTRACKER_H -#define LLVM_LIB_DWARFLINKERPARALLEL_DEPENDENCYTRACKER_H +#ifndef LLVM_LIB_DWARFLINKER_PARALLEL_DEPENDENCYTRACKER_H +#define LLVM_LIB_DWARFLINKER_PARALLEL_DEPENDENCYTRACKER_H #include "DWARFLinkerCompileUnit.h" #include "llvm/ADT/PointerIntPair.h" @@ -17,7 +17,8 @@ namespace llvm { class DWARFDebugInfoEntry; class DWARFDie; -namespace dwarflinker_parallel { +namespace dwarf_linker { +namespace parallel { /// This class discovers DIEs dependencies: marks "live" DIEs, marks DIE /// locations (whether DIE should be cloned as regular DIE or it should be put @@ -266,7 +267,8 @@ protected: RootEntriesListTy Dependencies; }; -} // end namespace dwarflinker_parallel -} // end namespace llvm +} // end of namespace parallel +} // end of namespace dwarf_linker +} // end of namespace llvm -#endif // LLVM_LIB_DWARFLINKERPARALLEL_DEPENDENCYTRACKER_H +#endif // LLVM_LIB_DWARFLINKER_PARALLEL_DEPENDENCYTRACKER_H diff --git a/llvm/lib/DWARFLinkerParallel/IndexedValuesMap.h b/llvm/lib/DWARFLinker/Parallel/IndexedValuesMap.h index 0dc8de860a42..b592ce37937b 100644 --- a/llvm/lib/DWARFLinkerParallel/IndexedValuesMap.h +++ b/llvm/lib/DWARFLinker/Parallel/IndexedValuesMap.h @@ -6,8 +6,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_LIB_DWARFLINKERPARALLEL_INDEXEDVALUESMAP_H -#define LLVM_LIB_DWARFLINKERPARALLEL_INDEXEDVALUESMAP_H +#ifndef LLVM_LIB_DWARFLINKER_PARALLEL_INDEXEDVALUESMAP_H +#define LLVM_LIB_DWARFLINKER_PARALLEL_INDEXEDVALUESMAP_H #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallVector.h" @@ -15,7 +15,8 @@ #include <utility> namespace llvm { -namespace dwarflinker_parallel { +namespace dwarf_linker { +namespace parallel { template <typename T> class IndexedValuesMap { public: @@ -43,7 +44,8 @@ protected: SmallVector<T> Values; }; -} // end of namespace dwarflinker_parallel -} // end namespace llvm +} // end of namespace parallel +} // end of namespace dwarf_linker +} // end of namespace llvm -#endif // LLVM_LIB_DWARFLINKERPARALLEL_INDEXEDVALUESMAP_H +#endif // LLVM_LIB_DWARFLINKER_PARALLEL_INDEXEDVALUESMAP_H diff --git a/llvm/lib/DWARFLinkerParallel/OutputSections.cpp b/llvm/lib/DWARFLinker/Parallel/OutputSections.cpp index 9c3e3ebd220a..cd1205b60f85 100644 --- a/llvm/lib/DWARFLinkerParallel/OutputSections.cpp +++ b/llvm/lib/DWARFLinker/Parallel/OutputSections.cpp @@ -11,22 +11,12 @@ #include "DWARFLinkerTypeUnit.h" #include "llvm/ADT/StringSwitch.h" -namespace llvm { -namespace dwarflinker_parallel { - -static constexpr StringLiteral SectionNames[SectionKindsNum] = { - "debug_info", "debug_line", "debug_frame", "debug_ranges", - "debug_rnglists", "debug_loc", "debug_loclists", "debug_aranges", - "debug_abbrev", "debug_macinfo", "debug_macro", "debug_addr", - "debug_str", "debug_line_str", "debug_str_offsets", "debug_pubnames", - "debug_pubtypes", "debug_names", "apple_names", "apple_namespac", - "apple_objc", "apple_types"}; - -const StringLiteral &getSectionName(DebugSectionKind SectionKind) { - return SectionNames[static_cast<uint8_t>(SectionKind)]; -} +using namespace llvm; +using namespace dwarf_linker; +using namespace dwarf_linker::parallel; -std::optional<DebugSectionKind> parseDebugTableName(llvm::StringRef SecName) { +std::optional<DebugSectionKind> +dwarf_linker::parallel::parseDebugTableName(llvm::StringRef SecName) { return llvm::StringSwitch<std::optional<DebugSectionKind>>( SecName.substr(SecName.find_first_not_of("._"))) .Case(getSectionName(DebugSectionKind::DebugInfo), @@ -227,6 +217,10 @@ void SectionDescriptor::emitIntVal(uint64_t Val, unsigned Size) { } } +void SectionDescriptor::emitBinaryData(llvm::StringRef Data) { + OS.write(Data.data(), Data.size()); +} + void SectionDescriptor::apply(uint64_t PatchOffset, dwarf::Form AttrForm, uint64_t Val) { switch (AttrForm) { @@ -527,6 +521,3 @@ void OutputSections::applyPatches( Section.apply(Patch.PatchOffset, dwarf::DW_FORM_sec_offset, FinalValue); }); } - -} // end of namespace dwarflinker_parallel -} // end of namespace llvm diff --git a/llvm/lib/DWARFLinkerParallel/OutputSections.h b/llvm/lib/DWARFLinker/Parallel/OutputSections.h index f23b2efb869d..b9df2228920a 100644 --- a/llvm/lib/DWARFLinkerParallel/OutputSections.h +++ b/llvm/lib/DWARFLinker/Parallel/OutputSections.h @@ -6,8 +6,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_LIB_DWARFLINKERPARALLEL_OUTPUTSECTIONS_H -#define LLVM_LIB_DWARFLINKERPARALLEL_OUTPUTSECTIONS_H +#ifndef LLVM_LIB_DWARFLINKER_PARALLEL_OUTPUTSECTIONS_H +#define LLVM_LIB_DWARFLINKER_PARALLEL_OUTPUTSECTIONS_H #include "ArrayList.h" #include "StringEntryToDwarfStringPoolEntryMap.h" @@ -15,7 +15,7 @@ #include "llvm/ADT/StringRef.h" #include "llvm/BinaryFormat/Dwarf.h" #include "llvm/CodeGen/DwarfStringPoolEntry.h" -#include "llvm/DWARFLinkerParallel/StringPool.h" +#include "llvm/DWARFLinker/StringPool.h" #include "llvm/DebugInfo/DWARF/DWARFFormValue.h" #include "llvm/DebugInfo/DWARF/DWARFObject.h" #include "llvm/Object/ObjectFile.h" @@ -29,7 +29,8 @@ #include <cstdint> namespace llvm { -namespace dwarflinker_parallel { +namespace dwarf_linker { +namespace parallel { class TypeUnit; @@ -62,12 +63,22 @@ enum class DebugSectionKind : uint8_t { constexpr static size_t SectionKindsNum = static_cast<size_t>(DebugSectionKind::NumberOfEnumEntries); +static constexpr StringLiteral SectionNames[SectionKindsNum] = { + "debug_info", "debug_line", "debug_frame", "debug_ranges", + "debug_rnglists", "debug_loc", "debug_loclists", "debug_aranges", + "debug_abbrev", "debug_macinfo", "debug_macro", "debug_addr", + "debug_str", "debug_line_str", "debug_str_offsets", "debug_pubnames", + "debug_pubtypes", "debug_names", "apple_names", "apple_namespac", + "apple_objc", "apple_types"}; + +static constexpr const StringLiteral & +getSectionName(DebugSectionKind SectionKind) { + return SectionNames[static_cast<uint8_t>(SectionKind)]; +} + /// Recognise the table name and match it with the DebugSectionKind. std::optional<DebugSectionKind> parseDebugTableName(StringRef Name); -/// Return the name of the section. -const StringLiteral &getSectionName(DebugSectionKind SectionKind); - /// There are fields(sizes, offsets) which should be updated after /// sections are generated. To remember offsets and related data /// the descendants of SectionPatch structure should be used. @@ -283,6 +294,8 @@ struct SectionDescriptor { void emitString(dwarf::Form StringForm, const char *StringVal); + void emitBinaryData(llvm::StringRef Data); + /// Emit specified inplace string value into the current section contents. void emitInplaceString(StringRef String) { OS << GlobalData.translateString(String); @@ -496,7 +509,8 @@ protected: SectionsSetTy SectionDescriptors; }; -} // end of namespace dwarflinker_parallel -} // end namespace llvm +} // end of namespace parallel +} // end of namespace dwarf_linker +} // end of namespace llvm -#endif // LLVM_LIB_DWARFLINKERPARALLEL_OUTPUTSECTIONS_H +#endif // LLVM_LIB_DWARFLINKER_PARALLEL_OUTPUTSECTIONS_H diff --git a/llvm/lib/DWARFLinkerParallel/StringEntryToDwarfStringPoolEntryMap.h b/llvm/lib/DWARFLinker/Parallel/StringEntryToDwarfStringPoolEntryMap.h index b4c74d0adba9..858f224777db 100644 --- a/llvm/lib/DWARFLinkerParallel/StringEntryToDwarfStringPoolEntryMap.h +++ b/llvm/lib/DWARFLinker/Parallel/StringEntryToDwarfStringPoolEntryMap.h @@ -6,15 +6,16 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_LIB_DWARFLINKERPARALLEL_STRINGENTRYTODWARFSTRINGPOOLENTRYMAP_H -#define LLVM_LIB_DWARFLINKERPARALLEL_STRINGENTRYTODWARFSTRINGPOOLENTRYMAP_H +#ifndef LLVM_LIB_DWARFLINKER_PARALLEL_STRINGENTRYTODWARFSTRINGPOOLENTRYMAP_H +#define LLVM_LIB_DWARFLINKER_PARALLEL_STRINGENTRYTODWARFSTRINGPOOLENTRYMAP_H #include "DWARFLinkerGlobalData.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/DWARFLinkerParallel/StringPool.h" +#include "llvm/DWARFLinker/StringPool.h" namespace llvm { -namespace dwarflinker_parallel { +namespace dwarf_linker { +namespace parallel { /// This class creates a DwarfStringPoolEntry for the corresponding StringEntry. class StringEntryToDwarfStringPoolEntryMap { @@ -66,7 +67,8 @@ protected: LinkingGlobalData &GlobalData; }; -} // end of namespace dwarflinker_parallel -} // end namespace llvm +} // end of namespace parallel +} // end of namespace dwarf_linker +} // end of namespace llvm -#endif // LLVM_LIB_DWARFLINKERPARALLEL_STRINGENTRYTODWARFSTRINGPOOLENTRYMAP_H +#endif // LLVM_LIB_DWARFLINKER_PARALLEL_STRINGENTRYTODWARFSTRINGPOOLENTRYMAP_H diff --git a/llvm/lib/DWARFLinkerParallel/SyntheticTypeNameBuilder.cpp b/llvm/lib/DWARFLinker/Parallel/SyntheticTypeNameBuilder.cpp index a9b4478e33c4..1554946c2c04 100644 --- a/llvm/lib/DWARFLinkerParallel/SyntheticTypeNameBuilder.cpp +++ b/llvm/lib/DWARFLinker/Parallel/SyntheticTypeNameBuilder.cpp @@ -12,8 +12,9 @@ #include "llvm/DebugInfo/DWARF/DWARFDebugInfoEntry.h" #include "llvm/Support/ScopedPrinter.h" -namespace llvm { -namespace dwarflinker_parallel { +using namespace llvm; +using namespace dwarf_linker; +using namespace dwarf_linker::parallel; Error SyntheticTypeNameBuilder::assignName( UnitEntryPairTy InputUnitEntryPair, @@ -762,6 +763,3 @@ OrderedChildrenIndexAssigner::getChildIndex( OrderedChildIdxs[*ArrayIndex]++; return Result; } - -} // end of namespace dwarflinker_parallel -} // namespace llvm diff --git a/llvm/lib/DWARFLinkerParallel/SyntheticTypeNameBuilder.h b/llvm/lib/DWARFLinker/Parallel/SyntheticTypeNameBuilder.h index c9dce4e94fb0..8465c0d77b9c 100644 --- a/llvm/lib/DWARFLinkerParallel/SyntheticTypeNameBuilder.h +++ b/llvm/lib/DWARFLinker/Parallel/SyntheticTypeNameBuilder.h @@ -6,8 +6,8 @@ // //===----------------------------------------------------------------------===/ -#ifndef LLVM_LIB_DWARFLINKERNEXT_SYNTHETICTYPENAMEBUILDER_H -#define LLVM_LIB_DWARFLINKERNEXT_SYNTHETICTYPENAMEBUILDER_H +#ifndef LLVM_LIB_DWARFLINKER_PARALLEL_SYNTHETICTYPENAMEBUILDER_H +#define LLVM_LIB_DWARFLINKER_PARALLEL_SYNTHETICTYPENAMEBUILDER_H #include "DWARFLinkerCompileUnit.h" #include "DWARFLinkerGlobalData.h" @@ -17,7 +17,8 @@ namespace llvm { class DWARFDebugInfoEntry; -namespace dwarflinker_parallel { +namespace dwarf_linker { +namespace parallel { struct LinkContext; class TypeTableUnit; class CompileUnit; @@ -149,7 +150,8 @@ protected: OrderedChildrenIndexesArrayTy ChildIndexesWidth = {0}; }; -} // end namespace dwarflinker_parallel -} // end namespace llvm +} // end of namespace parallel +} // end of namespace dwarf_linker +} // end of namespace llvm -#endif // LLVM_LIB_DWARFLINKERNEXT_SYNTHETICTYPENAMEBUILDER_H +#endif // LLVM_LIB_DWARFLINKER_PARALLEL_SYNTHETICTYPENAMEBUILDER_H diff --git a/llvm/lib/DWARFLinkerParallel/TypePool.h b/llvm/lib/DWARFLinker/Parallel/TypePool.h index bbb3261027ce..547532977262 100644 --- a/llvm/lib/DWARFLinkerParallel/TypePool.h +++ b/llvm/lib/DWARFLinker/Parallel/TypePool.h @@ -6,8 +6,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_DWARFLINKERPARALLEL_TYPEPOOL_H -#define LLVM_DWARFLINKERPARALLEL_TYPEPOOL_H +#ifndef LLVM_DWARFLINKER_PARALLEL_TYPEPOOL_H +#define LLVM_DWARFLINKER_PARALLEL_TYPEPOOL_H #include "ArrayList.h" #include "llvm/ADT/ConcurrentHashtable.h" @@ -17,7 +17,8 @@ #include <atomic> namespace llvm { -namespace dwarflinker_parallel { +namespace dwarf_linker { +namespace parallel { class TypePool; class CompileUnit; @@ -41,7 +42,8 @@ public: bool hasOnlyDeclaration() const { return Die == nullptr; } /// Creates type DIE for the specified name. - static TypeEntryBody *create(parallel::PerThreadBumpPtrAllocator &Allocator) { + static TypeEntryBody * + create(llvm::parallel::PerThreadBumpPtrAllocator &Allocator) { TypeEntryBody *Result = Allocator.Allocate<TypeEntryBody>(); new (Result) TypeEntryBody(Allocator); return Result; @@ -72,7 +74,7 @@ protected: TypeEntryBody &operator=(const TypeEntryBody &RHS) = delete; TypeEntryBody &operator=(const TypeEntryBody &&RHS) = delete; - TypeEntryBody(parallel::PerThreadBumpPtrAllocator &Allocator) + TypeEntryBody(llvm::parallel::PerThreadBumpPtrAllocator &Allocator) : Children(&Allocator) {} }; @@ -95,20 +97,22 @@ public: /// \returns newly created object of KeyDataTy type. static inline TypeEntry * - create(const StringRef &Key, parallel::PerThreadBumpPtrAllocator &Allocator) { + create(const StringRef &Key, + llvm::parallel::PerThreadBumpPtrAllocator &Allocator) { return TypeEntry::create(Key, Allocator); } }; /// TypePool keeps type descriptors which contain partially cloned DIE /// correspinding to each type. Types are identified by names. -class TypePool : ConcurrentHashTableByPtr<StringRef, TypeEntry, - parallel::PerThreadBumpPtrAllocator, - TypeEntryInfo> { +class TypePool + : ConcurrentHashTableByPtr<StringRef, TypeEntry, + llvm::parallel::PerThreadBumpPtrAllocator, + TypeEntryInfo> { public: TypePool() : ConcurrentHashTableByPtr<StringRef, TypeEntry, - parallel::PerThreadBumpPtrAllocator, + llvm::parallel::PerThreadBumpPtrAllocator, TypeEntryInfo>(Allocator) { Root = TypeEntry::create("", Allocator); Root->getValue().store(TypeEntryBody::create(Allocator)); @@ -116,7 +120,7 @@ public: TypeEntry *insert(StringRef Name) { return ConcurrentHashTableByPtr<StringRef, TypeEntry, - parallel::PerThreadBumpPtrAllocator, + llvm::parallel::PerThreadBumpPtrAllocator, TypeEntryInfo>::insert(Name) .first; } @@ -168,10 +172,11 @@ protected: TypeEntry *Root = nullptr; private: - parallel::PerThreadBumpPtrAllocator Allocator; + llvm::parallel::PerThreadBumpPtrAllocator Allocator; }; -} // end of namespace dwarflinker_parallel -} // end namespace llvm +} // end of namespace parallel +} // end of namespace dwarf_linker +} // end of namespace llvm -#endif // LLVM_DWARFLINKERPARALLEL_TYPEPOOL_H +#endif // LLVM_DWARFLINKER_PARALLEL_TYPEPOOL_H diff --git a/llvm/lib/DWARFLinkerParallel/Utils.h b/llvm/lib/DWARFLinker/Parallel/Utils.h index 91f9dca46a82..3c05b2ea173d 100644 --- a/llvm/lib/DWARFLinkerParallel/Utils.h +++ b/llvm/lib/DWARFLinker/Parallel/Utils.h @@ -6,13 +6,14 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_LIB_DWARFLINKERPARALLEL_UTILS_H -#define LLVM_LIB_DWARFLINKERPARALLEL_UTILS_H +#ifndef LLVM_LIB_DWARFLINKER_PARALLEL_UTILS_H +#define LLVM_LIB_DWARFLINKER_PARALLEL_UTILS_H #include "llvm/Support/Error.h" namespace llvm { -namespace dwarflinker_parallel { +namespace dwarf_linker { +namespace parallel { /// This function calls \p Iteration() until it returns false. /// If number of iterations exceeds \p MaxCounter then an Error is returned. @@ -34,7 +35,8 @@ inline Error finiteLoop(function_ref<Expected<bool>()> Iteration, return createStringError(std::errc::invalid_argument, "Infinite recursion"); } -} // end of namespace dwarflinker_parallel -} // end namespace llvm +} // end of namespace parallel +} // end of namespace dwarf_linker +} // end of namespace llvm -#endif // LLVM_LIB_DWARFLINKERPARALLEL_UTILS_H +#endif // LLVM_LIB_DWARFLINKER_PARALLEL_UTILS_H diff --git a/llvm/lib/DWARFLinkerParallel/StringPool.cpp b/llvm/lib/DWARFLinker/Utils.cpp index fbff6b05e3a5..e8b0fe303aae 100644 --- a/llvm/lib/DWARFLinkerParallel/StringPool.cpp +++ b/llvm/lib/DWARFLinker/Utils.cpp @@ -1,9 +1,7 @@ -//=== StringPool.cpp ------------------------------------------------------===// +//===- Utils.cpp ------------------------------------------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// - -#include "llvm/DWARFLinkerParallel/StringPool.h" diff --git a/llvm/lib/DWARFLinkerParallel/DWARFFile.cpp b/llvm/lib/DWARFLinkerParallel/DWARFFile.cpp deleted file mode 100644 index 5a3486e6398d..000000000000 --- a/llvm/lib/DWARFLinkerParallel/DWARFFile.cpp +++ /dev/null @@ -1,17 +0,0 @@ -//=== DWARFFile.cpp -------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "llvm/DWARFLinkerParallel/DWARFFile.h" -#include "DWARFLinkerGlobalData.h" - -llvm::dwarflinker_parallel::DWARFFile::DWARFFile( - StringRef Name, std::unique_ptr<DWARFContext> Dwarf, - std::unique_ptr<AddressesMap> Addresses, - DWARFFile::UnloadCallbackTy UnloadFunc) - : FileName(Name), Dwarf(std::move(Dwarf)), Addresses(std::move(Addresses)), - UnloadFunc(UnloadFunc) {} diff --git a/llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp b/llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp index 43ed60d7f977..d25b732fdba3 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp @@ -940,8 +940,13 @@ void DWARFVerifier::verifyDebugLineRows() { OS << '\n'; } - // Verify file index. - if (!LineTable->hasFileAtIndex(Row.File)) { + // If the prologue contains no file names and the line table has only one + // row, do not verify the file index, this is a line table of an empty + // file with an end_sequence, but the DWARF standard sets the file number + // to 1 by default, otherwise verify file index. + if ((LineTable->Prologue.FileNames.size() || + LineTable->Rows.size() != 1) && + !LineTable->hasFileAtIndex(Row.File)) { ++NumDebugLineErrors; error() << ".debug_line[" << format("0x%08" PRIx64, diff --git a/llvm/lib/Demangle/Demangle.cpp b/llvm/lib/Demangle/Demangle.cpp index 83f3cdc88c01..117b849d1c78 100644 --- a/llvm/lib/Demangle/Demangle.cpp +++ b/llvm/lib/Demangle/Demangle.cpp @@ -47,7 +47,8 @@ static bool isRustEncoding(std::string_view S) { return starts_with(S, "_R"); } static bool isDLangEncoding(std::string_view S) { return starts_with(S, "_D"); } bool llvm::nonMicrosoftDemangle(std::string_view MangledName, - std::string &Result, bool CanHaveLeadingDot) { + std::string &Result, bool CanHaveLeadingDot, + bool ParseParams) { char *Demangled = nullptr; // Do not consider the dot prefix as part of the demangled symbol name. @@ -57,7 +58,7 @@ bool llvm::nonMicrosoftDemangle(std::string_view MangledName, } if (isItaniumEncoding(MangledName)) - Demangled = itaniumDemangle(MangledName); + Demangled = itaniumDemangle(MangledName, ParseParams); else if (isRustEncoding(MangledName)) Demangled = rustDemangle(MangledName); else if (isDLangEncoding(MangledName)) diff --git a/llvm/lib/Demangle/ItaniumDemangle.cpp b/llvm/lib/Demangle/ItaniumDemangle.cpp index e3f208f0adf8..5c21b06a1d09 100644 --- a/llvm/lib/Demangle/ItaniumDemangle.cpp +++ b/llvm/lib/Demangle/ItaniumDemangle.cpp @@ -366,13 +366,13 @@ public: using Demangler = itanium_demangle::ManglingParser<DefaultAllocator>; -char *llvm::itaniumDemangle(std::string_view MangledName) { +char *llvm::itaniumDemangle(std::string_view MangledName, bool ParseParams) { if (MangledName.empty()) return nullptr; Demangler Parser(MangledName.data(), MangledName.data() + MangledName.length()); - Node *AST = Parser.parse(); + Node *AST = Parser.parse(ParseParams); if (!AST) return nullptr; diff --git a/llvm/lib/ExecutionEngine/Interpreter/Execution.cpp b/llvm/lib/ExecutionEngine/Interpreter/Execution.cpp index 770fc9349083..ae978070ac9f 100644 --- a/llvm/lib/ExecutionEngine/Interpreter/Execution.cpp +++ b/llvm/lib/ExecutionEngine/Interpreter/Execution.cpp @@ -1074,7 +1074,7 @@ GenericValue Interpreter::executeGEPOperation(Value *Ptr, gep_type_iterator I, assert(BitWidth == 64 && "Invalid index type for getelementptr"); Idx = (int64_t)IdxGV.IntVal.getZExtValue(); } - Total += getDataLayout().getTypeAllocSize(I.getIndexedType()) * Idx; + Total += I.getSequentialElementStride(getDataLayout()) * Idx; } } diff --git a/llvm/lib/ExecutionEngine/JITLink/JITLink.cpp b/llvm/lib/ExecutionEngine/JITLink/JITLink.cpp index d86ceb99ded0..7f743dba60a9 100644 --- a/llvm/lib/ExecutionEngine/JITLink/JITLink.cpp +++ b/llvm/lib/ExecutionEngine/JITLink/JITLink.cpp @@ -468,6 +468,41 @@ createLinkGraphFromObject(MemoryBufferRef ObjectBuffer) { }; } +std::unique_ptr<LinkGraph> absoluteSymbolsLinkGraph(const Triple &TT, + orc::SymbolMap Symbols) { + unsigned PointerSize; + endianness Endianness = + TT.isLittleEndian() ? endianness::little : endianness::big; + switch (TT.getArch()) { + case Triple::aarch64: + case llvm::Triple::riscv64: + case Triple::x86_64: + PointerSize = 8; + break; + case llvm::Triple::arm: + case llvm::Triple::riscv32: + case llvm::Triple::x86: + PointerSize = 4; + break; + default: + llvm::report_fatal_error("unhandled target architecture"); + } + + static std::atomic<uint64_t> Counter = {0}; + auto Index = Counter.fetch_add(1, std::memory_order_relaxed); + auto G = std::make_unique<LinkGraph>( + "<Absolute Symbols " + std::to_string(Index) + ">", TT, PointerSize, + Endianness, /*GetEdgeKindName=*/nullptr); + for (auto &[Name, Def] : Symbols) { + auto &Sym = + G->addAbsoluteSymbol(*Name, Def.getAddress(), /*Size=*/0, + Linkage::Strong, Scope::Default, /*IsLive=*/true); + Sym.setCallable(Def.getFlags().isCallable()); + } + + return G; +} + void link(std::unique_ptr<LinkGraph> G, std::unique_ptr<JITLinkContext> Ctx) { switch (G->getTargetTriple().getObjectFormat()) { case Triple::MachO: diff --git a/llvm/lib/ExecutionEngine/JITLink/JITLinkGeneric.cpp b/llvm/lib/ExecutionEngine/JITLink/JITLinkGeneric.cpp index 5361272ae79e..01144763ac4c 100644 --- a/llvm/lib/ExecutionEngine/JITLink/JITLinkGeneric.cpp +++ b/llvm/lib/ExecutionEngine/JITLink/JITLinkGeneric.cpp @@ -48,6 +48,14 @@ void JITLinkerBase::linkPhase1(std::unique_ptr<JITLinkerBase> Self) { if (auto Err = runPasses(Passes.PostPrunePasses)) return Ctx->notifyFailed(std::move(Err)); + // Skip straight to phase 2 if the graph is empty with no associated actions. + if (G->allocActions().empty() && llvm::all_of(G->sections(), [](Section &S) { + return S.getMemLifetime() == orc::MemLifetime::NoAlloc; + })) { + linkPhase2(std::move(Self), nullptr); + return; + } + Ctx->getMemoryManager().allocate( Ctx->getJITLinkDylib(), *G, [S = std::move(Self)](AllocResult AR) mutable { @@ -163,6 +171,12 @@ void JITLinkerBase::linkPhase3(std::unique_ptr<JITLinkerBase> Self, if (auto Err = runPasses(Passes.PostFixupPasses)) return abandonAllocAndBailOut(std::move(Self), std::move(Err)); + // Skip straight to phase 4 if the graph has no allocation. + if (!Alloc) { + linkPhase4(std::move(Self), JITLinkMemoryManager::FinalizedAlloc{}); + return; + } + Alloc->finalize([S = std::move(Self)](FinalizeResult FR) mutable { // FIXME: Once MSVC implements c++17 order of evaluation rules for calls // this can be simplified to diff --git a/llvm/lib/ExecutionEngine/Orc/EPCDebugObjectRegistrar.cpp b/llvm/lib/ExecutionEngine/Orc/EPCDebugObjectRegistrar.cpp index b8969de54936..acd7e5a409fc 100644 --- a/llvm/lib/ExecutionEngine/Orc/EPCDebugObjectRegistrar.cpp +++ b/llvm/lib/ExecutionEngine/Orc/EPCDebugObjectRegistrar.cpp @@ -45,7 +45,8 @@ Expected<std::unique_ptr<EPCDebugObjectRegistrar>> createJITLoaderGDBRegistrar( assert((*Result)[0].size() == 1 && "Unexpected number of addresses in result"); - return std::make_unique<EPCDebugObjectRegistrar>(ES, (*Result)[0][0]); + ExecutorAddr RegisterAddr = (*Result)[0][0].getAddress(); + return std::make_unique<EPCDebugObjectRegistrar>(ES, RegisterAddr); } Error EPCDebugObjectRegistrar::registerDebugObject(ExecutorAddrRange TargetMem, diff --git a/llvm/lib/ExecutionEngine/Orc/EPCDynamicLibrarySearchGenerator.cpp b/llvm/lib/ExecutionEngine/Orc/EPCDynamicLibrarySearchGenerator.cpp index 46e16a55c7e1..460f4e1c448e 100644 --- a/llvm/lib/ExecutionEngine/Orc/EPCDynamicLibrarySearchGenerator.cpp +++ b/llvm/lib/ExecutionEngine/Orc/EPCDynamicLibrarySearchGenerator.cpp @@ -12,15 +12,15 @@ namespace llvm { namespace orc { Expected<std::unique_ptr<EPCDynamicLibrarySearchGenerator>> -EPCDynamicLibrarySearchGenerator::Load(ExecutionSession &ES, - const char *LibraryPath, - SymbolPredicate Allow) { +EPCDynamicLibrarySearchGenerator::Load( + ExecutionSession &ES, const char *LibraryPath, SymbolPredicate Allow, + AddAbsoluteSymbolsFn AddAbsoluteSymbols) { auto Handle = ES.getExecutorProcessControl().loadDylib(LibraryPath); if (!Handle) return Handle.takeError(); - return std::make_unique<EPCDynamicLibrarySearchGenerator>(ES, *Handle, - std::move(Allow)); + return std::make_unique<EPCDynamicLibrarySearchGenerator>( + ES, *Handle, std::move(Allow), std::move(AddAbsoluteSymbols)); } Error EPCDynamicLibrarySearchGenerator::tryToGenerate( @@ -52,8 +52,8 @@ Error EPCDynamicLibrarySearchGenerator::tryToGenerate( auto ResultI = Result->front().begin(); for (auto &KV : LookupSymbols) { - if (*ResultI) - NewSymbols[KV.first] = {*ResultI, JITSymbolFlags::Exported}; + if (ResultI->getAddress()) + NewSymbols[KV.first] = *ResultI; ++ResultI; } @@ -62,6 +62,8 @@ Error EPCDynamicLibrarySearchGenerator::tryToGenerate( return Error::success(); // Define resolved symbols. + if (AddAbsoluteSymbols) + return AddAbsoluteSymbols(JD, std::move(NewSymbols)); return JD.define(absoluteSymbols(std::move(NewSymbols))); } diff --git a/llvm/lib/ExecutionEngine/Orc/EPCGenericDylibManager.cpp b/llvm/lib/ExecutionEngine/Orc/EPCGenericDylibManager.cpp index e70749cdfab2..da185c80c6c7 100644 --- a/llvm/lib/ExecutionEngine/Orc/EPCGenericDylibManager.cpp +++ b/llvm/lib/ExecutionEngine/Orc/EPCGenericDylibManager.cpp @@ -81,10 +81,11 @@ Expected<tpctypes::DylibHandle> EPCGenericDylibManager::open(StringRef Path, return H; } -Expected<std::vector<ExecutorAddr>> +Expected<std::vector<ExecutorSymbolDef>> EPCGenericDylibManager::lookup(tpctypes::DylibHandle H, const SymbolLookupSet &Lookup) { - Expected<std::vector<ExecutorAddr>> Result((std::vector<ExecutorAddr>())); + Expected<std::vector<ExecutorSymbolDef>> Result( + (std::vector<ExecutorSymbolDef>())); if (auto Err = EPC.callSPSWrapper<rt::SPSSimpleExecutorDylibManagerLookupSignature>( SAs.Lookup, Result, SAs.Instance, H, Lookup)) @@ -92,10 +93,11 @@ EPCGenericDylibManager::lookup(tpctypes::DylibHandle H, return Result; } -Expected<std::vector<ExecutorAddr>> +Expected<std::vector<ExecutorSymbolDef>> EPCGenericDylibManager::lookup(tpctypes::DylibHandle H, const RemoteSymbolLookupSet &Lookup) { - Expected<std::vector<ExecutorAddr>> Result((std::vector<ExecutorAddr>())); + Expected<std::vector<ExecutorSymbolDef>> Result( + (std::vector<ExecutorSymbolDef>())); if (auto Err = EPC.callSPSWrapper<rt::SPSSimpleExecutorDylibManagerLookupSignature>( SAs.Lookup, Result, SAs.Instance, H, Lookup)) diff --git a/llvm/lib/ExecutionEngine/Orc/ExecutionUtils.cpp b/llvm/lib/ExecutionEngine/Orc/ExecutionUtils.cpp index 8d5608cc4d4c..3952445bb1aa 100644 --- a/llvm/lib/ExecutionEngine/Orc/ExecutionUtils.cpp +++ b/llvm/lib/ExecutionEngine/Orc/ExecutionUtils.cpp @@ -218,19 +218,23 @@ void ItaniumCXAAtExitSupport::runAtExits(void *DSOHandle) { } DynamicLibrarySearchGenerator::DynamicLibrarySearchGenerator( - sys::DynamicLibrary Dylib, char GlobalPrefix, SymbolPredicate Allow) + sys::DynamicLibrary Dylib, char GlobalPrefix, SymbolPredicate Allow, + AddAbsoluteSymbolsFn AddAbsoluteSymbols) : Dylib(std::move(Dylib)), Allow(std::move(Allow)), + AddAbsoluteSymbols(std::move(AddAbsoluteSymbols)), GlobalPrefix(GlobalPrefix) {} Expected<std::unique_ptr<DynamicLibrarySearchGenerator>> DynamicLibrarySearchGenerator::Load(const char *FileName, char GlobalPrefix, - SymbolPredicate Allow) { + SymbolPredicate Allow, + AddAbsoluteSymbolsFn AddAbsoluteSymbols) { std::string ErrMsg; auto Lib = sys::DynamicLibrary::getPermanentLibrary(FileName, &ErrMsg); if (!Lib.isValid()) return make_error<StringError>(std::move(ErrMsg), inconvertibleErrorCode()); return std::make_unique<DynamicLibrarySearchGenerator>( - std::move(Lib), GlobalPrefix, std::move(Allow)); + std::move(Lib), GlobalPrefix, std::move(Allow), + std::move(AddAbsoluteSymbols)); } Error DynamicLibrarySearchGenerator::tryToGenerate( @@ -261,6 +265,8 @@ Error DynamicLibrarySearchGenerator::tryToGenerate( if (NewSymbols.empty()) return Error::success(); + if (AddAbsoluteSymbols) + return AddAbsoluteSymbols(JD, std::move(NewSymbols)); return JD.define(absoluteSymbols(std::move(NewSymbols))); } diff --git a/llvm/lib/ExecutionEngine/Orc/ExecutorProcessControl.cpp b/llvm/lib/ExecutionEngine/Orc/ExecutorProcessControl.cpp index ad27deff38d9..f0c551cd7780 100644 --- a/llvm/lib/ExecutionEngine/Orc/ExecutorProcessControl.cpp +++ b/llvm/lib/ExecutionEngine/Orc/ExecutorProcessControl.cpp @@ -95,7 +95,7 @@ SelfExecutorProcessControl::lookupSymbols(ArrayRef<LookupRequest> Request) { for (auto &Elem : Request) { sys::DynamicLibrary Dylib(Elem.Handle.toPtr<void *>()); - R.push_back(std::vector<ExecutorAddr>()); + R.push_back(std::vector<ExecutorSymbolDef>()); for (auto &KV : Elem.Symbols) { auto &Sym = KV.first; std::string Tmp((*Sym).data() + !!GlobalManglingPrefix, @@ -107,7 +107,9 @@ SelfExecutorProcessControl::lookupSymbols(ArrayRef<LookupRequest> Request) { MissingSymbols.push_back(Sym); return make_error<SymbolsNotFound>(SSP, std::move(MissingSymbols)); } - R.back().push_back(ExecutorAddr::fromPtr(Addr)); + // FIXME: determine accurate JITSymbolFlags. + R.back().push_back( + {ExecutorAddr::fromPtr(Addr), JITSymbolFlags::Exported}); } } diff --git a/llvm/lib/ExecutionEngine/Orc/LookupAndRecordAddrs.cpp b/llvm/lib/ExecutionEngine/Orc/LookupAndRecordAddrs.cpp index 75075c5c2a22..a369e1b53382 100644 --- a/llvm/lib/ExecutionEngine/Orc/LookupAndRecordAddrs.cpp +++ b/llvm/lib/ExecutionEngine/Orc/LookupAndRecordAddrs.cpp @@ -73,7 +73,7 @@ Error lookupAndRecordAddrs( inconvertibleErrorCode()); for (unsigned I = 0; I != Pairs.size(); ++I) - *Pairs[I].second = Result->front()[I]; + *Pairs[I].second = Result->front()[I].getAddress(); return Error::success(); } diff --git a/llvm/lib/ExecutionEngine/Orc/MachOPlatform.cpp b/llvm/lib/ExecutionEngine/Orc/MachOPlatform.cpp index 9057300bf043..6c17f14aa4c7 100644 --- a/llvm/lib/ExecutionEngine/Orc/MachOPlatform.cpp +++ b/llvm/lib/ExecutionEngine/Orc/MachOPlatform.cpp @@ -1608,6 +1608,8 @@ Error MachOPlatform::MachOPlatformPlugin::prepareSymbolTableRegistration( SmallVector<jitlink::Symbol *> SymsToProcess; for (auto *Sym : G.defined_symbols()) SymsToProcess.push_back(Sym); + for (auto *Sym : G.absolute_symbols()) + SymsToProcess.push_back(Sym); for (auto *Sym : SymsToProcess) { if (!Sym->hasName()) diff --git a/llvm/lib/ExecutionEngine/Orc/ObjectLinkingLayer.cpp b/llvm/lib/ExecutionEngine/Orc/ObjectLinkingLayer.cpp index 3d77f82e6569..b8282948034e 100644 --- a/llvm/lib/ExecutionEngine/Orc/ObjectLinkingLayer.cpp +++ b/llvm/lib/ExecutionEngine/Orc/ObjectLinkingLayer.cpp @@ -93,15 +93,20 @@ private: Interface LGI; - for (auto *Sym : G.defined_symbols()) { + auto AddSymbol = [&](Symbol *Sym) { // Skip local symbols. if (Sym->getScope() == Scope::Local) - continue; + return; assert(Sym->hasName() && "Anonymous non-local symbol?"); LGI.SymbolFlags[ES.intern(Sym->getName())] = getJITSymbolFlagsForSymbol(*Sym); - } + }; + + for (auto *Sym : G.defined_symbols()) + AddSymbol(Sym); + for (auto *Sym : G.absolute_symbols()) + AddSymbol(Sym); if (hasInitializerSection(G)) LGI.InitSymbol = makeInitSymbol(ES, G); @@ -705,6 +710,9 @@ Error ObjectLinkingLayer::notifyEmitted(MaterializationResponsibility &MR, if (Err) return Err; + if (!FA) + return Error::success(); + return MR.withResourceKeyDo( [&](ResourceKey K) { Allocs[K].push_back(std::move(FA)); }); } diff --git a/llvm/lib/ExecutionEngine/Orc/TargetProcess/JITLoaderGDB.cpp b/llvm/lib/ExecutionEngine/Orc/TargetProcess/JITLoaderGDB.cpp index 8eca874c48b8..8a4145a6b02a 100644 --- a/llvm/lib/ExecutionEngine/Orc/TargetProcess/JITLoaderGDB.cpp +++ b/llvm/lib/ExecutionEngine/Orc/TargetProcess/JITLoaderGDB.cpp @@ -21,31 +21,8 @@ // First version as landed in August 2009 static constexpr uint32_t JitDescriptorVersion = 1; -// Keep in sync with gdb/gdb/jit.h extern "C" { -typedef enum { - JIT_NOACTION = 0, - JIT_REGISTER_FN, - JIT_UNREGISTER_FN -} jit_actions_t; - -struct jit_code_entry { - struct jit_code_entry *next_entry; - struct jit_code_entry *prev_entry; - const char *symfile_addr; - uint64_t symfile_size; -}; - -struct jit_descriptor { - uint32_t version; - // This should be jit_actions_t, but we want to be specific about the - // bit-width. - uint32_t action_flag; - struct jit_code_entry *relevant_entry; - struct jit_code_entry *first_entry; -}; - // We put information about the JITed function in this global, which the // debugger reads. Make sure to specify the version statically, because the // debugger checks the version before we can set it during runtime. diff --git a/llvm/lib/ExecutionEngine/Orc/TargetProcess/SimpleExecutorDylibManager.cpp b/llvm/lib/ExecutionEngine/Orc/TargetProcess/SimpleExecutorDylibManager.cpp index cb11b68e2719..b7e256a826ca 100644 --- a/llvm/lib/ExecutionEngine/Orc/TargetProcess/SimpleExecutorDylibManager.cpp +++ b/llvm/lib/ExecutionEngine/Orc/TargetProcess/SimpleExecutorDylibManager.cpp @@ -40,10 +40,10 @@ SimpleExecutorDylibManager::open(const std::string &Path, uint64_t Mode) { return H; } -Expected<std::vector<ExecutorAddr>> +Expected<std::vector<ExecutorSymbolDef>> SimpleExecutorDylibManager::lookup(tpctypes::DylibHandle H, const RemoteSymbolLookupSet &L) { - std::vector<ExecutorAddr> Result; + std::vector<ExecutorSymbolDef> Result; auto DL = sys::DynamicLibrary(H.toPtr<void *>()); for (const auto &E : L) { @@ -52,7 +52,7 @@ SimpleExecutorDylibManager::lookup(tpctypes::DylibHandle H, return make_error<StringError>("Required address for empty symbol \"\"", inconvertibleErrorCode()); else - Result.push_back(ExecutorAddr()); + Result.push_back(ExecutorSymbolDef()); } else { const char *DemangledSymName = E.Name.c_str(); @@ -70,7 +70,8 @@ SimpleExecutorDylibManager::lookup(tpctypes::DylibHandle H, DemangledSymName, inconvertibleErrorCode()); - Result.push_back(ExecutorAddr::fromPtr(Addr)); + // FIXME: determine accurate JITSymbolFlags. + Result.push_back({ExecutorAddr::fromPtr(Addr), JITSymbolFlags::Exported}); } } diff --git a/llvm/lib/IR/AsmWriter.cpp b/llvm/lib/IR/AsmWriter.cpp index 95cdec722062..278cdfce4110 100644 --- a/llvm/lib/IR/AsmWriter.cpp +++ b/llvm/lib/IR/AsmWriter.cpp @@ -859,6 +859,9 @@ private: /// Add all of the metadata from an instruction. void processInstructionMetadata(const Instruction &I); + + /// Add all of the metadata from an instruction. + void processDPValueMetadata(const DPValue &DPV); }; } // end namespace llvm @@ -1126,11 +1129,19 @@ void SlotTracker::processGlobalObjectMetadata(const GlobalObject &GO) { void SlotTracker::processFunctionMetadata(const Function &F) { processGlobalObjectMetadata(F); for (auto &BB : F) { - for (auto &I : BB) + for (auto &I : BB) { + for (const DPValue &DPV : I.getDbgValueRange()) + processDPValueMetadata(DPV); processInstructionMetadata(I); + } } } +void SlotTracker::processDPValueMetadata(const DPValue &DPV) { + CreateMetadataSlot(DPV.getVariable()); + CreateMetadataSlot(DPV.getDebugLoc()); +} + void SlotTracker::processInstructionMetadata(const Instruction &I) { // Process metadata used directly by intrinsics. if (const CallInst *CI = dyn_cast<CallInst>(&I)) diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp index 6b54047020a0..3a3b41fb786c 100644 --- a/llvm/lib/IR/AutoUpgrade.cpp +++ b/llvm/lib/IR/AutoUpgrade.cpp @@ -621,6 +621,284 @@ static bool UpgradeX86IntrinsicFunction(Function *F, StringRef Name, return false; } +// Upgrade ARM (IsArm) or Aarch64 (!IsArm) intrinsic fns. Return true iff so. +// IsArm: 'arm.*', !IsArm: 'aarch64.*'. +static bool UpgradeArmOrAarch64IntrinsicFunction(bool IsArm, Function *F, + StringRef Name, + Function *&NewFn) { + if (Name.starts_with("rbit")) { + // '(arm|aarch64).rbit'. + NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::bitreverse, + F->arg_begin()->getType()); + return true; + } + + if (Name == "thread.pointer") { + // '(arm|aarch64).thread.pointer'. + NewFn = + Intrinsic::getDeclaration(F->getParent(), Intrinsic::thread_pointer); + return true; + } + + bool Neon = Name.consume_front("neon."); + if (Neon) { + // '(arm|aarch64).neon.*'. + // Changed in 12.0: bfdot accept v4bf16 and v8bf16 instead of v8i8 and + // v16i8 respectively. + if (Name.consume_front("bfdot.")) { + // (arm|aarch64).neon.bfdot.*'. + Intrinsic::ID ID = StringSwitch<Intrinsic::ID>(Name) + .Cases("v2f32.v8i8", "v4f32.v16i8", + IsArm ? Intrinsic::arm_neon_bfdot + : Intrinsic::aarch64_neon_bfdot) + .Default(Intrinsic::not_intrinsic); + if (ID != Intrinsic::not_intrinsic) { + size_t OperandWidth = F->getReturnType()->getPrimitiveSizeInBits(); + assert((OperandWidth == 64 || OperandWidth == 128) && + "Unexpected operand width"); + LLVMContext &Ctx = F->getParent()->getContext(); + std::array<Type *, 2> Tys{ + {F->getReturnType(), + FixedVectorType::get(Type::getBFloatTy(Ctx), OperandWidth / 16)}}; + NewFn = Intrinsic::getDeclaration(F->getParent(), ID, Tys); + return true; + } + return false; // No other '(arm|aarch64).neon.bfdot.*'. + } + + // Changed in 12.0: bfmmla, bfmlalb and bfmlalt are not polymorphic + // anymore and accept v8bf16 instead of v16i8. + if (Name.consume_front("bfm")) { + // (arm|aarch64).neon.bfm*'. + if (Name.consume_back(".v4f32.v16i8")) { + // (arm|aarch64).neon.bfm*.v4f32.v16i8'. + Intrinsic::ID ID = + StringSwitch<Intrinsic::ID>(Name) + .Case("mla", IsArm ? Intrinsic::arm_neon_bfmmla + : Intrinsic::aarch64_neon_bfmmla) + .Case("lalb", IsArm ? Intrinsic::arm_neon_bfmlalb + : Intrinsic::aarch64_neon_bfmlalb) + .Case("lalt", IsArm ? Intrinsic::arm_neon_bfmlalt + : Intrinsic::aarch64_neon_bfmlalt) + .Default(Intrinsic::not_intrinsic); + if (ID != Intrinsic::not_intrinsic) { + NewFn = Intrinsic::getDeclaration(F->getParent(), ID); + return true; + } + return false; // No other '(arm|aarch64).neon.bfm*.v16i8'. + } + return false; // No other '(arm|aarch64).neon.bfm*. + } + // Continue on to Aarch64 Neon or Arm Neon. + } + // Continue on to Arm or Aarch64. + + if (IsArm) { + // 'arm.*'. + if (Neon) { + // 'arm.neon.*'. + Intrinsic::ID ID = StringSwitch<Intrinsic::ID>(Name) + .StartsWith("vclz.", Intrinsic::ctlz) + .StartsWith("vcnt.", Intrinsic::ctpop) + .StartsWith("vqadds.", Intrinsic::sadd_sat) + .StartsWith("vqaddu.", Intrinsic::uadd_sat) + .StartsWith("vqsubs.", Intrinsic::ssub_sat) + .StartsWith("vqsubu.", Intrinsic::usub_sat) + .Default(Intrinsic::not_intrinsic); + if (ID != Intrinsic::not_intrinsic) { + NewFn = Intrinsic::getDeclaration(F->getParent(), ID, + F->arg_begin()->getType()); + return true; + } + + if (Name.consume_front("vst")) { + // 'arm.neon.vst*'. + static const Regex vstRegex("^([1234]|[234]lane)\\.v[a-z0-9]*$"); + SmallVector<StringRef, 2> Groups; + if (vstRegex.match(Name, &Groups)) { + static const Intrinsic::ID StoreInts[] = { + Intrinsic::arm_neon_vst1, Intrinsic::arm_neon_vst2, + Intrinsic::arm_neon_vst3, Intrinsic::arm_neon_vst4}; + + static const Intrinsic::ID StoreLaneInts[] = { + Intrinsic::arm_neon_vst2lane, Intrinsic::arm_neon_vst3lane, + Intrinsic::arm_neon_vst4lane}; + + auto fArgs = F->getFunctionType()->params(); + Type *Tys[] = {fArgs[0], fArgs[1]}; + if (Groups[1].size() == 1) + NewFn = Intrinsic::getDeclaration(F->getParent(), + StoreInts[fArgs.size() - 3], Tys); + else + NewFn = Intrinsic::getDeclaration( + F->getParent(), StoreLaneInts[fArgs.size() - 5], Tys); + return true; + } + return false; // No other 'arm.neon.vst*'. + } + + return false; // No other 'arm.neon.*'. + } + + if (Name.consume_front("mve.")) { + // 'arm.mve.*'. + if (Name == "vctp64") { + if (cast<FixedVectorType>(F->getReturnType())->getNumElements() == 4) { + // A vctp64 returning a v4i1 is converted to return a v2i1. Rename + // the function and deal with it below in UpgradeIntrinsicCall. + rename(F); + return true; + } + return false; // Not 'arm.mve.vctp64'. + } + + // These too are changed to accept a v2i1 instead of the old v4i1. + if (Name.consume_back(".v4i1")) { + // 'arm.mve.*.v4i1'. + if (Name.consume_back(".predicated.v2i64.v4i32")) + // 'arm.mve.*.predicated.v2i64.v4i32.v4i1' + return Name == "mull.int" || Name == "vqdmull"; + + if (Name.consume_back(".v2i64")) { + // 'arm.mve.*.v2i64.v4i1' + bool IsGather = Name.consume_front("vldr.gather."); + if (IsGather || Name.consume_front("vstr.scatter.")) { + if (Name.consume_front("base.")) { + // Optional 'wb.' prefix. + Name.consume_front("wb."); + // 'arm.mve.(vldr.gather|vstr.scatter).base.(wb.)? + // predicated.v2i64.v2i64.v4i1'. + return Name == "predicated.v2i64"; + } + + if (Name.consume_front("offset.predicated.")) + return Name == (IsGather ? "v2i64.p0i64" : "p0i64.v2i64") || + Name == (IsGather ? "v2i64.p0" : "p0.v2i64"); + + // No other 'arm.mve.(vldr.gather|vstr.scatter).*.v2i64.v4i1'. + return false; + } + + return false; // No other 'arm.mve.*.v2i64.v4i1'. + } + return false; // No other 'arm.mve.*.v4i1'. + } + return false; // No other 'arm.mve.*'. + } + + if (Name.consume_front("cde.vcx")) { + // 'arm.cde.vcx*'. + if (Name.consume_back(".predicated.v2i64.v4i1")) + // 'arm.cde.vcx*.predicated.v2i64.v4i1'. + return Name == "1q" || Name == "1qa" || Name == "2q" || Name == "2qa" || + Name == "3q" || Name == "3qa"; + + return false; // No other 'arm.cde.vcx*'. + } + } else { + // 'aarch64.*'. + if (Neon) { + // 'aarch64.neon.*'. + Intrinsic::ID ID = StringSwitch<Intrinsic::ID>(Name) + .StartsWith("frintn", Intrinsic::roundeven) + .StartsWith("rbit", Intrinsic::bitreverse) + .Default(Intrinsic::not_intrinsic); + if (ID != Intrinsic::not_intrinsic) { + NewFn = Intrinsic::getDeclaration(F->getParent(), ID, + F->arg_begin()->getType()); + return true; + } + + if (Name.starts_with("addp")) { + // 'aarch64.neon.addp*'. + if (F->arg_size() != 2) + return false; // Invalid IR. + VectorType *Ty = dyn_cast<VectorType>(F->getReturnType()); + if (Ty && Ty->getElementType()->isFloatingPointTy()) { + NewFn = Intrinsic::getDeclaration(F->getParent(), + Intrinsic::aarch64_neon_faddp, Ty); + return true; + } + } + return false; // No other 'aarch64.neon.*'. + } + if (Name.consume_front("sve.")) { + // 'aarch64.sve.*'. + if (Name.consume_front("bf")) { + if (Name.consume_back(".lane")) { + // 'aarch64.sve.bf*.lane'. + Intrinsic::ID ID = + StringSwitch<Intrinsic::ID>(Name) + .Case("dot", Intrinsic::aarch64_sve_bfdot_lane_v2) + .Case("mlalb", Intrinsic::aarch64_sve_bfmlalb_lane_v2) + .Case("mlalt", Intrinsic::aarch64_sve_bfmlalt_lane_v2) + .Default(Intrinsic::not_intrinsic); + if (ID != Intrinsic::not_intrinsic) { + NewFn = Intrinsic::getDeclaration(F->getParent(), ID); + return true; + } + return false; // No other 'aarch64.sve.bf*.lane'. + } + return false; // No other 'aarch64.sve.bf*'. + } + + if (Name.consume_front("ld")) { + // 'aarch64.sve.ld*'. + static const Regex LdRegex("^[234](.nxv[a-z0-9]+|$)"); + if (LdRegex.match(Name)) { + Type *ScalarTy = + dyn_cast<VectorType>(F->getReturnType())->getElementType(); + ElementCount EC = dyn_cast<VectorType>(F->arg_begin()->getType()) + ->getElementCount(); + Type *Ty = VectorType::get(ScalarTy, EC); + static const Intrinsic::ID LoadIDs[] = { + Intrinsic::aarch64_sve_ld2_sret, + Intrinsic::aarch64_sve_ld3_sret, + Intrinsic::aarch64_sve_ld4_sret, + }; + NewFn = Intrinsic::getDeclaration(F->getParent(), + LoadIDs[Name[0] - '2'], Ty); + return true; + } + return false; // No other 'aarch64.sve.ld*'. + } + + if (Name.consume_front("tuple.")) { + // 'aarch64.sve.tuple.*'. + if (Name.starts_with("get")) { + // 'aarch64.sve.tuple.get*'. + Type *Tys[] = {F->getReturnType(), F->arg_begin()->getType()}; + NewFn = Intrinsic::getDeclaration(F->getParent(), + Intrinsic::vector_extract, Tys); + return true; + } + + if (Name.starts_with("set")) { + // 'aarch64.sve.tuple.set*'. + auto Args = F->getFunctionType()->params(); + Type *Tys[] = {Args[0], Args[2], Args[1]}; + NewFn = Intrinsic::getDeclaration(F->getParent(), + Intrinsic::vector_insert, Tys); + return true; + } + + static const Regex CreateTupleRegex("^create[234](.nxv[a-z0-9]+|$)"); + if (CreateTupleRegex.match(Name)) { + // 'aarch64.sve.tuple.create*'. + auto Args = F->getFunctionType()->params(); + Type *Tys[] = {F->getReturnType(), Args[1]}; + NewFn = Intrinsic::getDeclaration(F->getParent(), + Intrinsic::vector_insert, Tys); + return true; + } + return false; // No other 'aarch64.sve.tuple.*'. + } + return false; // No other 'aarch64.sve.*'. + } + } + return false; // No other 'arm.*', 'aarch64.*'. +} + static Intrinsic::ID ShouldUpgradeNVPTXBF16Intrinsic(StringRef Name) { if (Name.consume_front("abs.")) return StringSwitch<Intrinsic::ID>(Name) @@ -713,225 +991,12 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) { switch (Name[0]) { default: break; case 'a': { - if (Name.starts_with("arm.rbit") || Name.starts_with("aarch64.rbit")) { - NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::bitreverse, - F->arg_begin()->getType()); - return true; - } - if (Name.starts_with("aarch64.neon.frintn")) { - NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::roundeven, - F->arg_begin()->getType()); - return true; - } - if (Name.starts_with("aarch64.neon.rbit")) { - NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::bitreverse, - F->arg_begin()->getType()); - return true; - } - if (Name == "aarch64.sve.bfdot.lane") { - NewFn = Intrinsic::getDeclaration(F->getParent(), - Intrinsic::aarch64_sve_bfdot_lane_v2); - return true; - } - if (Name == "aarch64.sve.bfmlalb.lane") { - NewFn = Intrinsic::getDeclaration(F->getParent(), - Intrinsic::aarch64_sve_bfmlalb_lane_v2); - return true; - } - if (Name == "aarch64.sve.bfmlalt.lane") { - NewFn = Intrinsic::getDeclaration(F->getParent(), - Intrinsic::aarch64_sve_bfmlalt_lane_v2); - return true; - } - static const Regex LdRegex("^aarch64\\.sve\\.ld[234](.nxv[a-z0-9]+|$)"); - if (LdRegex.match(Name)) { - Type *ScalarTy = - dyn_cast<VectorType>(F->getReturnType())->getElementType(); - ElementCount EC = - dyn_cast<VectorType>(F->arg_begin()->getType())->getElementCount(); - Type *Ty = VectorType::get(ScalarTy, EC); - Intrinsic::ID ID = - StringSwitch<Intrinsic::ID>(Name) - .StartsWith("aarch64.sve.ld2", Intrinsic::aarch64_sve_ld2_sret) - .StartsWith("aarch64.sve.ld3", Intrinsic::aarch64_sve_ld3_sret) - .StartsWith("aarch64.sve.ld4", Intrinsic::aarch64_sve_ld4_sret) - .Default(Intrinsic::not_intrinsic); - NewFn = Intrinsic::getDeclaration(F->getParent(), ID, Ty); - return true; - } - if (Name.starts_with("aarch64.sve.tuple.get")) { - Type *Tys[] = {F->getReturnType(), F->arg_begin()->getType()}; - NewFn = Intrinsic::getDeclaration(F->getParent(), - Intrinsic::vector_extract, Tys); - return true; - } - if (Name.starts_with("aarch64.sve.tuple.set")) { - auto Args = F->getFunctionType()->params(); - Type *Tys[] = {Args[0], Args[2], Args[1]}; - NewFn = Intrinsic::getDeclaration(F->getParent(), - Intrinsic::vector_insert, Tys); - return true; - } - static const Regex CreateTupleRegex( - "^aarch64\\.sve\\.tuple\\.create[234](.nxv[a-z0-9]+|$)"); - if (CreateTupleRegex.match(Name)) { - auto Args = F->getFunctionType()->params(); - Type *Tys[] = {F->getReturnType(), Args[1]}; - NewFn = Intrinsic::getDeclaration(F->getParent(), - Intrinsic::vector_insert, Tys); - return true; - } - if (Name.starts_with("arm.neon.vclz")) { - NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz, - F->arg_begin()->getType()); - return true; - } - if (Name.starts_with("arm.neon.vcnt")) { - NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop, - F->arg_begin()->getType()); - return true; - } - static const Regex vstRegex("^arm\\.neon\\.vst([1234]|[234]lane)\\.v[a-z0-9]*$"); - if (vstRegex.match(Name)) { - static const Intrinsic::ID StoreInts[] = {Intrinsic::arm_neon_vst1, - Intrinsic::arm_neon_vst2, - Intrinsic::arm_neon_vst3, - Intrinsic::arm_neon_vst4}; - - static const Intrinsic::ID StoreLaneInts[] = { - Intrinsic::arm_neon_vst2lane, Intrinsic::arm_neon_vst3lane, - Intrinsic::arm_neon_vst4lane - }; - - auto fArgs = F->getFunctionType()->params(); - Type *Tys[] = {fArgs[0], fArgs[1]}; - if (!Name.contains("lane")) - NewFn = Intrinsic::getDeclaration(F->getParent(), - StoreInts[fArgs.size() - 3], Tys); - else - NewFn = Intrinsic::getDeclaration(F->getParent(), - StoreLaneInts[fArgs.size() - 5], Tys); - return true; - } - if (Name == "aarch64.thread.pointer" || Name == "arm.thread.pointer") { - NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::thread_pointer); - return true; - } - if (Name.starts_with("arm.neon.vqadds.")) { - NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::sadd_sat, - F->arg_begin()->getType()); - return true; - } - if (Name.starts_with("arm.neon.vqaddu.")) { - NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::uadd_sat, - F->arg_begin()->getType()); - return true; - } - if (Name.starts_with("arm.neon.vqsubs.")) { - NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ssub_sat, - F->arg_begin()->getType()); - return true; - } - if (Name.starts_with("arm.neon.vqsubu.")) { - NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::usub_sat, - F->arg_begin()->getType()); - return true; - } - if (Name.starts_with("aarch64.neon.addp")) { - if (F->arg_size() != 2) - break; // Invalid IR. - VectorType *Ty = dyn_cast<VectorType>(F->getReturnType()); - if (Ty && Ty->getElementType()->isFloatingPointTy()) { - NewFn = Intrinsic::getDeclaration(F->getParent(), - Intrinsic::aarch64_neon_faddp, Ty); + bool IsArm = Name.consume_front("arm."); + if (IsArm || Name.consume_front("aarch64.")) { + if (UpgradeArmOrAarch64IntrinsicFunction(IsArm, F, Name, NewFn)) return true; - } - } - - // Changed in 12.0: bfdot accept v4bf16 and v8bf16 instead of v8i8 and v16i8 - // respectively - if ((Name.starts_with("arm.neon.bfdot.") || - Name.starts_with("aarch64.neon.bfdot.")) && - Name.ends_with("i8")) { - Intrinsic::ID IID = - StringSwitch<Intrinsic::ID>(Name) - .Cases("arm.neon.bfdot.v2f32.v8i8", - "arm.neon.bfdot.v4f32.v16i8", - Intrinsic::arm_neon_bfdot) - .Cases("aarch64.neon.bfdot.v2f32.v8i8", - "aarch64.neon.bfdot.v4f32.v16i8", - Intrinsic::aarch64_neon_bfdot) - .Default(Intrinsic::not_intrinsic); - if (IID == Intrinsic::not_intrinsic) - break; - - size_t OperandWidth = F->getReturnType()->getPrimitiveSizeInBits(); - assert((OperandWidth == 64 || OperandWidth == 128) && - "Unexpected operand width"); - LLVMContext &Ctx = F->getParent()->getContext(); - std::array<Type *, 2> Tys {{ - F->getReturnType(), - FixedVectorType::get(Type::getBFloatTy(Ctx), OperandWidth / 16) - }}; - NewFn = Intrinsic::getDeclaration(F->getParent(), IID, Tys); - return true; - } - - // Changed in 12.0: bfmmla, bfmlalb and bfmlalt are not polymorphic anymore - // and accept v8bf16 instead of v16i8 - if ((Name.starts_with("arm.neon.bfm") || - Name.starts_with("aarch64.neon.bfm")) && - Name.ends_with(".v4f32.v16i8")) { - Intrinsic::ID IID = - StringSwitch<Intrinsic::ID>(Name) - .Case("arm.neon.bfmmla.v4f32.v16i8", - Intrinsic::arm_neon_bfmmla) - .Case("arm.neon.bfmlalb.v4f32.v16i8", - Intrinsic::arm_neon_bfmlalb) - .Case("arm.neon.bfmlalt.v4f32.v16i8", - Intrinsic::arm_neon_bfmlalt) - .Case("aarch64.neon.bfmmla.v4f32.v16i8", - Intrinsic::aarch64_neon_bfmmla) - .Case("aarch64.neon.bfmlalb.v4f32.v16i8", - Intrinsic::aarch64_neon_bfmlalb) - .Case("aarch64.neon.bfmlalt.v4f32.v16i8", - Intrinsic::aarch64_neon_bfmlalt) - .Default(Intrinsic::not_intrinsic); - if (IID == Intrinsic::not_intrinsic) - break; - - std::array<Type *, 0> Tys; - NewFn = Intrinsic::getDeclaration(F->getParent(), IID, Tys); - return true; - } - - if (Name == "arm.mve.vctp64" && - cast<FixedVectorType>(F->getReturnType())->getNumElements() == 4) { - // A vctp64 returning a v4i1 is converted to return a v2i1. Rename the - // function and deal with it below in UpgradeIntrinsicCall. - rename(F); - return true; + break; } - // These too are changed to accept a v2i1 insteead of the old v4i1. - if (Name == "arm.mve.mull.int.predicated.v2i64.v4i32.v4i1" || - Name == "arm.mve.vqdmull.predicated.v2i64.v4i32.v4i1" || - Name == "arm.mve.vldr.gather.base.predicated.v2i64.v2i64.v4i1" || - Name == "arm.mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v4i1" || - Name == - "arm.mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1" || - Name == "arm.mve.vldr.gather.offset.predicated.v2i64.p0.v2i64.v4i1" || - Name == "arm.mve.vstr.scatter.base.predicated.v2i64.v2i64.v4i1" || - Name == "arm.mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v4i1" || - Name == - "arm.mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1" || - Name == "arm.mve.vstr.scatter.offset.predicated.p0.v2i64.v2i64.v4i1" || - Name == "arm.cde.vcx1q.predicated.v2i64.v4i1" || - Name == "arm.cde.vcx1qa.predicated.v2i64.v4i1" || - Name == "arm.cde.vcx2q.predicated.v2i64.v4i1" || - Name == "arm.cde.vcx2qa.predicated.v2i64.v4i1" || - Name == "arm.cde.vcx3q.predicated.v2i64.v4i1" || - Name == "arm.cde.vcx3qa.predicated.v2i64.v4i1") - return true; if (Name.consume_front("amdgcn.")) { if (Name == "alignbit") { diff --git a/llvm/lib/IR/DataLayout.cpp b/llvm/lib/IR/DataLayout.cpp index e28f043cf9e0..a2f5714c7068 100644 --- a/llvm/lib/IR/DataLayout.cpp +++ b/llvm/lib/IR/DataLayout.cpp @@ -936,9 +936,8 @@ int64_t DataLayout::getIndexedOffsetInType(Type *ElemTy, // Add in the offset, as calculated by the structure layout info... Result += Layout->getElementOffset(FieldNo); } else { - // Get the array index and the size of each array element. - if (int64_t arrayIdx = cast<ConstantInt>(Idx)->getSExtValue()) - Result += arrayIdx * getTypeAllocSize(GTI.getIndexedType()); + if (int64_t ArrayIdx = cast<ConstantInt>(Idx)->getSExtValue()) + Result += ArrayIdx * GTI.getSequentialElementStride(*this); } } diff --git a/llvm/lib/IR/Operator.cpp b/llvm/lib/IR/Operator.cpp index cd982c7da102..16a89534b4b3 100644 --- a/llvm/lib/IR/Operator.cpp +++ b/llvm/lib/IR/Operator.cpp @@ -87,7 +87,7 @@ Align GEPOperator::getMaxPreservedAlignment(const DataLayout &DL) const { /// If the index isn't known, we take 1 because it is the index that will /// give the worse alignment of the offset. const uint64_t ElemCount = OpC ? OpC->getZExtValue() : 1; - Offset = DL.getTypeAllocSize(GTI.getIndexedType()) * ElemCount; + Offset = GTI.getSequentialElementStride(DL) * ElemCount; } Result = Align(MinAlign(Offset, Result.value())); } @@ -157,7 +157,7 @@ bool GEPOperator::accumulateConstantOffset( continue; } if (!AccumulateOffset(ConstOffset->getValue(), - DL.getTypeAllocSize(GTI.getIndexedType()))) + GTI.getSequentialElementStride(DL))) return false; continue; } @@ -170,8 +170,7 @@ bool GEPOperator::accumulateConstantOffset( if (!ExternalAnalysis(*V, AnalysisIndex)) return false; UsedExternalAnalysis = true; - if (!AccumulateOffset(AnalysisIndex, - DL.getTypeAllocSize(GTI.getIndexedType()))) + if (!AccumulateOffset(AnalysisIndex, GTI.getSequentialElementStride(DL))) return false; } return true; @@ -218,14 +217,13 @@ bool GEPOperator::collectOffset( continue; } CollectConstantOffset(ConstOffset->getValue(), - DL.getTypeAllocSize(GTI.getIndexedType())); + GTI.getSequentialElementStride(DL)); continue; } if (STy || ScalableType) return false; - APInt IndexedSize = - APInt(BitWidth, DL.getTypeAllocSize(GTI.getIndexedType())); + APInt IndexedSize = APInt(BitWidth, GTI.getSequentialElementStride(DL)); // Insert an initial offset of 0 for V iff none exists already, then // increment the offset by IndexedSize. if (!IndexedSize.isZero()) { diff --git a/llvm/lib/IR/Value.cpp b/llvm/lib/IR/Value.cpp index b6e25c46b514..94b0ae7435c9 100644 --- a/llvm/lib/IR/Value.cpp +++ b/llvm/lib/IR/Value.cpp @@ -1015,7 +1015,7 @@ getOffsetFromIndex(const GEPOperator *GEP, unsigned Idx, const DataLayout &DL) { // Otherwise, we have a sequential type like an array or fixed-length // vector. Multiply the index by the ElementSize. - TypeSize Size = DL.getTypeAllocSize(GTI.getIndexedType()); + TypeSize Size = GTI.getSequentialElementStride(DL); if (Size.isScalable()) return std::nullopt; Offset += Size.getFixedValue() * OpC->getSExtValue(); diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp index 05836fd28f52..6a1e53b96998 100644 --- a/llvm/lib/LTO/LTO.cpp +++ b/llvm/lib/LTO/LTO.cpp @@ -592,7 +592,9 @@ LTO::LTO(Config Conf, ThinBackend Backend, unsigned ParallelCodeGenParallelismLevel, LTOKind LTOMode) : Conf(std::move(Conf)), RegularLTO(ParallelCodeGenParallelismLevel, this->Conf), - ThinLTO(std::move(Backend)), LTOMode(LTOMode) {} + ThinLTO(std::move(Backend)), + GlobalResolutions(std::make_optional<StringMap<GlobalResolution>>()), + LTOMode(LTOMode) {} // Requires a destructor for MapVector<BitcodeModule>. LTO::~LTO() = default; @@ -610,7 +612,7 @@ void LTO::addModuleToGlobalRes(ArrayRef<InputFile::Symbol> Syms, assert(ResI != ResE); SymbolResolution Res = *ResI++; - auto &GlobalRes = GlobalResolutions[Sym.getName()]; + auto &GlobalRes = (*GlobalResolutions)[Sym.getName()]; GlobalRes.UnnamedAddr &= Sym.isUnnamedAddr(); if (Res.Prevailing) { assert(!GlobalRes.Prevailing && @@ -1125,7 +1127,7 @@ Error LTO::run(AddStreamFn AddStream, FileCache Cache) { // Compute "dead" symbols, we don't want to import/export these! DenseSet<GlobalValue::GUID> GUIDPreservedSymbols; DenseMap<GlobalValue::GUID, PrevailingType> GUIDPrevailingResolutions; - for (auto &Res : GlobalResolutions) { + for (auto &Res : *GlobalResolutions) { // Normally resolution have IR name of symbol. We can do nothing here // otherwise. See comments in GlobalResolution struct for more details. if (Res.second.IRName.empty()) @@ -1169,6 +1171,8 @@ Error LTO::run(AddStreamFn AddStream, FileCache Cache) { Error Result = runRegularLTO(AddStream); if (!Result) + // This will reset the GlobalResolutions optional once done with it to + // reduce peak memory before importing. Result = runThinLTO(AddStream, Cache, GUIDPreservedSymbols); if (StatsFile) @@ -1273,8 +1277,8 @@ Error LTO::runRegularLTO(AddStreamFn AddStream) { // This returns true when the name is local or not defined. Locals are // expected to be handled separately. auto IsVisibleToRegularObj = [&](StringRef name) { - auto It = GlobalResolutions.find(name); - return (It == GlobalResolutions.end() || It->second.VisibleOutsideSummary); + auto It = GlobalResolutions->find(name); + return (It == GlobalResolutions->end() || It->second.VisibleOutsideSummary); }; // If allowed, upgrade public vcall visibility metadata to linkage unit @@ -1291,7 +1295,7 @@ Error LTO::runRegularLTO(AddStreamFn AddStream) { return finalizeOptimizationRemarks(std::move(DiagnosticOutputFile)); if (!Conf.CodeGenOnly) { - for (const auto &R : GlobalResolutions) { + for (const auto &R : *GlobalResolutions) { GlobalValue *GV = RegularLTO.CombinedModule->getNamedValue(R.second.IRName); if (!R.second.isPrevailingIRSymbol()) @@ -1708,8 +1712,8 @@ Error LTO::runThinLTO(AddStreamFn AddStream, FileCache Cache, // This returns true when the name is local or not defined. Locals are // expected to be handled separately. auto IsVisibleToRegularObj = [&](StringRef name) { - auto It = GlobalResolutions.find(name); - return (It == GlobalResolutions.end() || + auto It = GlobalResolutions->find(name); + return (It == GlobalResolutions->end() || It->second.VisibleOutsideSummary); }; @@ -1739,15 +1743,11 @@ Error LTO::runThinLTO(AddStreamFn AddStream, FileCache Cache, ContextDisambiguation.run(ThinLTO.CombinedIndex, isPrevailing); } - if (Conf.OptLevel > 0) - ComputeCrossModuleImport(ThinLTO.CombinedIndex, ModuleToDefinedGVSummaries, - isPrevailing, ImportLists, ExportLists); - // Figure out which symbols need to be internalized. This also needs to happen // at -O0 because summary-based DCE is implemented using internalization, and // we must apply DCE consistently with the full LTO module in order to avoid // undefined references during the final link. - for (auto &Res : GlobalResolutions) { + for (auto &Res : *GlobalResolutions) { // If the symbol does not have external references or it is not prevailing, // then not need to mark it as exported from a ThinLTO partition. if (Res.second.Partition != GlobalResolution::External || @@ -1760,6 +1760,16 @@ Error LTO::runThinLTO(AddStreamFn AddStream, FileCache Cache, ExportedGUIDs.insert(GUID); } + // Reset the GlobalResolutions to deallocate the associated memory, as there + // are no further accesses. We specifically want to do this before computing + // cross module importing, which adds to peak memory via the computed import + // and export lists. + GlobalResolutions.reset(); + + if (Conf.OptLevel > 0) + ComputeCrossModuleImport(ThinLTO.CombinedIndex, ModuleToDefinedGVSummaries, + isPrevailing, ImportLists, ExportLists); + // Any functions referenced by the jump table in the regular LTO object must // be exported. for (auto &Def : ThinLTO.CombinedIndex.cfiFunctionDefs()) diff --git a/llvm/lib/MC/MCAssembler.cpp b/llvm/lib/MC/MCAssembler.cpp index def13044dfcc..ad30b5ce9e63 100644 --- a/llvm/lib/MC/MCAssembler.cpp +++ b/llvm/lib/MC/MCAssembler.cpp @@ -1026,7 +1026,9 @@ bool MCAssembler::relaxLEB(MCAsmLayout &Layout, MCLEBFragment &LF) { ? LF.getValue().evaluateKnownAbsolute(Value, Layout) : LF.getValue().evaluateAsAbsolute(Value, Layout); if (!Abs) { - if (!getBackend().relaxLEB128(LF, Layout, Value)) { + bool Relaxed, UseZeroPad; + std::tie(Relaxed, UseZeroPad) = getBackend().relaxLEB128(LF, Layout, Value); + if (!Relaxed) { getContext().reportError(LF.getValue().getLoc(), Twine(LF.isSigned() ? ".s" : ".u") + "leb128 expression is not absolute"); @@ -1034,6 +1036,8 @@ bool MCAssembler::relaxLEB(MCAsmLayout &Layout, MCLEBFragment &LF) { } uint8_t Tmp[10]; // maximum size: ceil(64/7) PadTo = std::max(PadTo, encodeULEB128(uint64_t(Value), Tmp)); + if (UseZeroPad) + Value = 0; } Data.clear(); raw_svector_ostream OSE(Data); diff --git a/llvm/lib/MC/MCExpr.cpp b/llvm/lib/MC/MCExpr.cpp index a85182aa06ad..80def6dfc24b 100644 --- a/llvm/lib/MC/MCExpr.cpp +++ b/llvm/lib/MC/MCExpr.cpp @@ -704,8 +704,14 @@ static void AttemptToFoldSymbolOffsetDifference( } int64_t Num; + unsigned Count; if (DF) { Displacement += DF->getContents().size(); + } else if (auto *AF = dyn_cast<MCAlignFragment>(FI); + AF && Layout && AF->hasEmitNops() && + !Asm->getBackend().shouldInsertExtraNopBytesForCodeAlign( + *AF, Count)) { + Displacement += Asm->computeFragmentSize(*Layout, *AF); } else if (auto *FF = dyn_cast<MCFillFragment>(FI); FF && FF->getNumValues().evaluateAsAbsolute(Num)) { Displacement += Num * FF->getValueSize(); diff --git a/llvm/lib/Object/ELF.cpp b/llvm/lib/Object/ELF.cpp index 300639f2bfa0..f24395b02043 100644 --- a/llvm/lib/Object/ELF.cpp +++ b/llvm/lib/Object/ELF.cpp @@ -774,7 +774,7 @@ decodeBBAddrMapImpl(const ELFFile<ELFT> &EF, } FunctionEntries.emplace_back(Address, std::move(BBEntries)); - if (FeatEnable.FuncEntryCount || FeatEnable.BBFreq || FeatEnable.BrProb) { + if (PGOAnalyses || FeatEnable.anyEnabled()) { // Function entry count uint64_t FuncEntryCount = FeatEnable.FuncEntryCount @@ -782,8 +782,9 @@ decodeBBAddrMapImpl(const ELFFile<ELFT> &EF, : 0; std::vector<PGOAnalysisMap::PGOBBEntry> PGOBBEntries; - for (uint32_t BlockIndex = 0; !MetadataDecodeErr && !ULEBSizeErr && Cur && - (BlockIndex < NumBlocks); + for (uint32_t BlockIndex = 0; + (FeatEnable.BBFreq || FeatEnable.BrProb) && !MetadataDecodeErr && + !ULEBSizeErr && Cur && (BlockIndex < NumBlocks); ++BlockIndex) { // Block frequency uint64_t BBF = FeatEnable.BBFreq diff --git a/llvm/lib/Object/ELFObjectFile.cpp b/llvm/lib/Object/ELFObjectFile.cpp index 95c4f9f8545d..ae21b81c10c8 100644 --- a/llvm/lib/Object/ELFObjectFile.cpp +++ b/llvm/lib/Object/ELFObjectFile.cpp @@ -315,7 +315,7 @@ Expected<SubtargetFeatures> ELFObjectFileBase::getRISCVFeatures() const { else llvm_unreachable("XLEN should be 32 or 64."); - Features.addFeaturesVector(ISAInfo->toFeatureVector()); + Features.addFeaturesVector(ISAInfo->toFeatures()); } return Features; diff --git a/llvm/lib/Object/ObjectFile.cpp b/llvm/lib/Object/ObjectFile.cpp index ca921836b7f6..c05eb0a0468e 100644 --- a/llvm/lib/Object/ObjectFile.cpp +++ b/llvm/lib/Object/ObjectFile.cpp @@ -111,6 +111,10 @@ Triple ObjectFile::makeTriple() const { auto Arch = getArch(); TheTriple.setArch(Triple::ArchType(Arch)); + auto OS = getOS(); + if (OS != Triple::UnknownOS) + TheTriple.setOS(OS); + // For ARM targets, try to use the build attributes to build determine // the build target. Target features are also added, but later during // disassembly. @@ -129,10 +133,13 @@ Triple ObjectFile::makeTriple() const { // XCOFF implies AIX. TheTriple.setOS(Triple::AIX); TheTriple.setObjectFormat(Triple::XCOFF); - } - else if (isGOFF()) { + } else if (isGOFF()) { TheTriple.setOS(Triple::ZOS); TheTriple.setObjectFormat(Triple::GOFF); + } else if (TheTriple.isAMDGPU()) { + TheTriple.setVendor(Triple::AMD); + } else if (TheTriple.isNVPTX()) { + TheTriple.setVendor(Triple::NVIDIA); } return TheTriple; diff --git a/llvm/lib/Object/WasmObjectFile.cpp b/llvm/lib/Object/WasmObjectFile.cpp index ccc29d0cb73d..94cd96968ff2 100644 --- a/llvm/lib/Object/WasmObjectFile.cpp +++ b/llvm/lib/Object/WasmObjectFile.cpp @@ -1351,6 +1351,7 @@ Error WasmObjectFile::parseExportSection(ReadContext &Ctx) { break; case wasm::WASM_EXTERNAL_TABLE: Info.Kind = wasm::WASM_SYMBOL_TYPE_TABLE; + Info.ElementIndex = Ex.Index; break; default: return make_error<GenericBinaryError>("unexpected export kind", diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp index f94bd422c6b5..27bfe12127cc 100644 --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -72,7 +72,9 @@ #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/TypeBasedAliasAnalysis.h" #include "llvm/Analysis/UniformityAnalysis.h" +#include "llvm/CodeGen/BasicBlockSectionsProfileReader.h" #include "llvm/CodeGen/CallBrPrepare.h" +#include "llvm/CodeGen/CodeGenPrepare.h" #include "llvm/CodeGen/DwarfEHPrepare.h" #include "llvm/CodeGen/ExpandLargeDivRem.h" #include "llvm/CodeGen/ExpandLargeFpConvert.h" @@ -86,7 +88,9 @@ #include "llvm/CodeGen/LowerEmuTLS.h" #include "llvm/CodeGen/SafeStack.h" #include "llvm/CodeGen/SelectOptimize.h" +#include "llvm/CodeGen/ShadowStackGCLowering.h" #include "llvm/CodeGen/SjLjEHPrepare.h" +#include "llvm/CodeGen/StackProtector.h" #include "llvm/CodeGen/TypePromotion.h" #include "llvm/CodeGen/WasmEHPrepare.h" #include "llvm/CodeGen/WinEHPrepare.h" @@ -452,9 +456,10 @@ PassBuilder::PassBuilder(TargetMachine *TM, PipelineTuningOptions PTO, std::optional<PGOOptions> PGOOpt, PassInstrumentationCallbacks *PIC) : TM(TM), PTO(PTO), PGOOpt(PGOOpt), PIC(PIC) { + bool ShouldPopulateClassToPassNames = PIC && shouldPopulateClassToPassNames(); if (TM) - TM->registerPassBuilderCallbacks(*this); - if (PIC && shouldPopulateClassToPassNames()) { + TM->registerPassBuilderCallbacks(*this, ShouldPopulateClassToPassNames); + if (ShouldPopulateClassToPassNames) { #define MODULE_PASS(NAME, CREATE_PASS) \ PIC->addClassToPassName(decltype(CREATE_PASS)::name(), NAME); #define MODULE_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS) \ diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def index 82ce040c6496..bda36bd8c107 100644 --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -128,6 +128,7 @@ MODULE_PASS("sanmd-module", SanitizerBinaryMetadataPass()) MODULE_PASS("scc-oz-module-inliner", buildInlinerPipeline(OptimizationLevel::Oz, ThinOrFullLTOPhase::None)) +MODULE_PASS("shadow-stack-gc-lowering", ShadowStackGCLoweringPass()) MODULE_PASS("strip", StripSymbolsPass()) MODULE_PASS("strip-dead-debug-info", StripDeadDebugInfoPass()) MODULE_PASS("strip-dead-prototypes", StripDeadPrototypesPass()) @@ -229,6 +230,7 @@ CGSCC_PASS_WITH_PARAMS( FUNCTION_ANALYSIS("aa", AAManager()) FUNCTION_ANALYSIS("access-info", LoopAccessAnalysis()) FUNCTION_ANALYSIS("assumptions", AssumptionAnalysis()) +FUNCTION_ANALYSIS("bb-sections-profile-reader", BasicBlockSectionsProfileReaderAnalysis(TM)) FUNCTION_ANALYSIS("block-freq", BlockFrequencyAnalysis()) FUNCTION_ANALYSIS("branch-prob", BranchProbabilityAnalysis()) FUNCTION_ANALYSIS("cycles", CycleAnalysis()) @@ -254,6 +256,7 @@ FUNCTION_ANALYSIS("should-not-run-function-passes", ShouldNotRunFunctionPassesAnalysis()) FUNCTION_ANALYSIS("should-run-extra-vector-passes", ShouldRunExtraVectorPasses()) +FUNCTION_ANALYSIS("ssp-layout", SSPLayoutAnalysis()) FUNCTION_ANALYSIS("stack-safety-local", StackSafetyAnalysis()) FUNCTION_ANALYSIS("targetir", TM ? TM->getTargetIRAnalysis() : TargetIRAnalysis()) @@ -290,6 +293,7 @@ FUNCTION_PASS("break-crit-edges", BreakCriticalEdgesPass()) FUNCTION_PASS("callbrprepare", CallBrPreparePass()) FUNCTION_PASS("callsite-splitting", CallSiteSplittingPass()) FUNCTION_PASS("chr", ControlHeightReductionPass()) +FUNCTION_PASS("codegenprepare", CodeGenPreparePass(TM)) FUNCTION_PASS("consthoist", ConstantHoistingPass()) FUNCTION_PASS("constraint-elimination", ConstraintEliminationPass()) FUNCTION_PASS("coro-elide", CoroElidePass()) @@ -313,6 +317,7 @@ FUNCTION_PASS("expand-memcmp", ExpandMemCmpPass(TM)) FUNCTION_PASS("fix-irreducible", FixIrreduciblePass()) FUNCTION_PASS("flattencfg", FlattenCFGPass()) FUNCTION_PASS("float2int", Float2IntPass()) +FUNCTION_PASS("gc-lowering", GCLoweringPass()) FUNCTION_PASS("guard-widening", GuardWideningPass()) FUNCTION_PASS("gvn-hoist", GVNHoistPass()) FUNCTION_PASS("gvn-sink", GVNSinkPass()) @@ -410,6 +415,7 @@ FUNCTION_PASS("sink", SinkingPass()) FUNCTION_PASS("sjlj-eh-prepare", SjLjEHPreparePass(TM)) FUNCTION_PASS("slp-vectorizer", SLPVectorizerPass()) FUNCTION_PASS("slsr", StraightLineStrengthReducePass()) +FUNCTION_PASS("stack-protector", StackProtectorPass(TM)) FUNCTION_PASS("strip-gc-relocates", StripGCRelocates()) FUNCTION_PASS("structurizecfg", StructurizeCFGPass()) FUNCTION_PASS("tailcallelim", TailCallElimPass()) diff --git a/llvm/lib/Passes/StandardInstrumentations.cpp b/llvm/lib/Passes/StandardInstrumentations.cpp index fd1317e3eb25..d467fe5c9a8e 100644 --- a/llvm/lib/Passes/StandardInstrumentations.cpp +++ b/llvm/lib/Passes/StandardInstrumentations.cpp @@ -118,10 +118,10 @@ static cl::opt<bool> PrintPassNumbers( "print-pass-numbers", cl::init(false), cl::Hidden, cl::desc("Print pass names and their ordinals")); -static cl::opt<unsigned> - PrintAtPassNumber("print-at-pass-number", cl::init(0), cl::Hidden, - cl::desc("Print IR at pass with this number as " - "reported by print-passes-names")); +static cl::opt<unsigned> PrintBeforePassNumber( + "print-before-pass-number", cl::init(0), cl::Hidden, + cl::desc("Print IR before the pass with this number as " + "reported by print-pass-numbers")); static cl::opt<std::string> IRDumpDirectory( "ir-dump-directory", @@ -806,8 +806,7 @@ void PrintIRInstrumentation::printBeforePass(StringRef PassID, Any IR) { // Note: here we rely on a fact that we do not change modules while // traversing the pipeline, so the latest captured module is good // for all print operations that has not happen yet. - if (shouldPrintPassNumbers() || shouldPrintAtPassNumber() || - shouldPrintAfterPass(PassID)) + if (shouldPrintAfterPass(PassID)) pushPassRunDescriptor(PassID, IR, DumpIRFilename); if (!shouldPrintIR(IR)) @@ -823,8 +822,10 @@ void PrintIRInstrumentation::printBeforePass(StringRef PassID, Any IR) { return; auto WriteIRToStream = [&](raw_ostream &Stream) { - Stream << "; *** IR Dump Before " << PassID << " on " << getIRName(IR) - << " ***\n"; + Stream << "; *** IR Dump Before "; + if (shouldPrintBeforePassNumber()) + Stream << CurrentPassNumber << "-"; + Stream << PassID << " on " << getIRName(IR) << " ***\n"; unwrapAndPrint(Stream, IR); }; @@ -842,8 +843,7 @@ void PrintIRInstrumentation::printAfterPass(StringRef PassID, Any IR) { if (isIgnored(PassID)) return; - if (!shouldPrintAfterPass(PassID) && !shouldPrintPassNumbers() && - !shouldPrintAtPassNumber()) + if (!shouldPrintAfterPass(PassID)) return; auto [M, DumpIRFilename, IRName, StoredPassID] = popPassRunDescriptor(PassID); @@ -853,10 +853,7 @@ void PrintIRInstrumentation::printAfterPass(StringRef PassID, Any IR) { return; auto WriteIRToStream = [&](raw_ostream &Stream, const StringRef IRName) { - Stream << "; *** IR Dump " - << (shouldPrintAtPassNumber() - ? StringRef(formatv("At {0}-{1}", CurrentPassNumber, PassID)) - : StringRef(formatv("After {0}", PassID))) + Stream << "; *** IR Dump " << StringRef(formatv("After {0}", PassID)) << " on " << IRName << " ***\n"; unwrapAndPrint(Stream, IR); }; @@ -879,8 +876,7 @@ void PrintIRInstrumentation::printAfterPassInvalidated(StringRef PassID) { if (isIgnored(PassID)) return; - if (!shouldPrintAfterPass(PassID) && !shouldPrintPassNumbers() && - !shouldPrintAtPassNumber()) + if (!shouldPrintAfterPass(PassID)) return; auto [M, DumpIRFilename, IRName, StoredPassID] = popPassRunDescriptor(PassID); @@ -893,12 +889,8 @@ void PrintIRInstrumentation::printAfterPassInvalidated(StringRef PassID) { auto WriteIRToStream = [&](raw_ostream &Stream, const Module *M, const StringRef IRName) { SmallString<20> Banner; - if (shouldPrintAtPassNumber()) - Banner = formatv("; *** IR Dump At {0}-{1} on {2} (invalidated) ***", - CurrentPassNumber, PassID, IRName); - else - Banner = formatv("; *** IR Dump After {0} on {1} (invalidated) ***", - PassID, IRName); + Banner = formatv("; *** IR Dump After {0} on {1} (invalidated) ***", PassID, + IRName); Stream << Banner << "\n"; printIR(Stream, M); }; @@ -921,6 +913,10 @@ bool PrintIRInstrumentation::shouldPrintBeforePass(StringRef PassID) { if (shouldPrintBeforeAll()) return true; + if (shouldPrintBeforePassNumber() && + CurrentPassNumber == PrintBeforePassNumber) + return true; + StringRef PassName = PIC->getPassNameForClassName(PassID); return is_contained(printBeforePasses(), PassName); } @@ -929,9 +925,6 @@ bool PrintIRInstrumentation::shouldPrintAfterPass(StringRef PassID) { if (shouldPrintAfterAll()) return true; - if (shouldPrintAtPassNumber() && CurrentPassNumber == PrintAtPassNumber) - return true; - StringRef PassName = PIC->getPassNameForClassName(PassID); return is_contained(printAfterPasses(), PassName); } @@ -940,8 +933,8 @@ bool PrintIRInstrumentation::shouldPrintPassNumbers() { return PrintPassNumbers; } -bool PrintIRInstrumentation::shouldPrintAtPassNumber() { - return PrintAtPassNumber > 0; +bool PrintIRInstrumentation::shouldPrintBeforePassNumber() { + return PrintBeforePassNumber > 0; } void PrintIRInstrumentation::registerCallbacks( @@ -950,13 +943,12 @@ void PrintIRInstrumentation::registerCallbacks( // BeforePass callback is not just for printing, it also saves a Module // for later use in AfterPassInvalidated. - if (shouldPrintPassNumbers() || shouldPrintAtPassNumber() || + if (shouldPrintPassNumbers() || shouldPrintBeforePassNumber() || shouldPrintBeforeSomePass() || shouldPrintAfterSomePass()) PIC.registerBeforeNonSkippedPassCallback( [this](StringRef P, Any IR) { this->printBeforePass(P, IR); }); - if (shouldPrintPassNumbers() || shouldPrintAtPassNumber() || - shouldPrintAfterSomePass()) { + if (shouldPrintAfterSomePass()) { PIC.registerAfterPassCallback( [this](StringRef P, Any IR, const PreservedAnalyses &) { this->printAfterPass(P, IR); diff --git a/llvm/lib/ProfileData/InstrProf.cpp b/llvm/lib/ProfileData/InstrProf.cpp index 134a400e639c..4264da8ad751 100644 --- a/llvm/lib/ProfileData/InstrProf.cpp +++ b/llvm/lib/ProfileData/InstrProf.cpp @@ -14,7 +14,6 @@ #include "llvm/ProfileData/InstrProf.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/SetVector.h" -#include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" @@ -27,7 +26,6 @@ #include "llvm/IR/Instruction.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/MDBuilder.h" -#include "llvm/IR/Mangler.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" #include "llvm/IR/Type.h" @@ -297,29 +295,20 @@ static StringRef getStrippedSourceFileName(const GlobalObject &GO) { return FileName; } -// The PGO name has the format [<filepath>;]<linkage-name> where <filepath>; is -// provided if linkage is local and <linkage-name> is the mangled function -// name. The filepath is used to discriminate possibly identical function names. -// ; is used because it is unlikely to be found in either <filepath> or -// <linkage-name>. +// The PGO name has the format [<filepath>;]<mangled-name> where <filepath>; is +// provided if linkage is local and is used to discriminate possibly identical +// mangled names. ";" is used because it is unlikely to be found in either +// <filepath> or <mangled-name>. // // Older compilers used getPGOFuncName() which has the format -// [<filepath>:]<function-name>. <filepath> is used to discriminate between -// possibly identical function names when linkage is local and <function-name> -// simply comes from F.getName(). This caused trouble for Objective-C functions -// which commonly have :'s in their names. Also, since <function-name> is not -// mangled, they cannot be passed to Mach-O linkers via -order_file. We still -// need to compute this name to lookup functions from profiles built by older -// compilers. +// [<filepath>:]<mangled-name>. This caused trouble for Objective-C functions +// which commonly have :'s in their names. We still need to compute this name to +// lookup functions from profiles built by older compilers. static std::string getIRPGONameForGlobalObject(const GlobalObject &GO, GlobalValue::LinkageTypes Linkage, StringRef FileName) { - SmallString<64> Name; - // FIXME: Mangler's handling is kept outside of `getGlobalIdentifier` for now. - // For more details please check issue #74565. - Mangler().getNameWithPrefix(Name, &GO, /*CannotUsePrivateLabel=*/true); - return GlobalValue::getGlobalIdentifier(Name, Linkage, FileName); + return GlobalValue::getGlobalIdentifier(GO.getName(), Linkage, FileName); } static std::optional<std::string> lookupPGONameFromMetadata(MDNode *MD) { diff --git a/llvm/lib/Support/CommandLine.cpp b/llvm/lib/Support/CommandLine.cpp index 368dead44914..7360d733d96e 100644 --- a/llvm/lib/Support/CommandLine.cpp +++ b/llvm/lib/Support/CommandLine.cpp @@ -2474,8 +2474,7 @@ protected: for (OptionCategory *Category : SortedCategories) { // Hide empty categories for --help, but show for --help-hidden. const auto &CategoryOptions = CategorizedOptions[Category]; - bool IsEmptyCategory = CategoryOptions.empty(); - if (!ShowHidden && IsEmptyCategory) + if (CategoryOptions.empty()) continue; // Print category information. @@ -2488,12 +2487,6 @@ protected: else outs() << "\n"; - // When using --help-hidden explicitly state if the category has no - // options associated with it. - if (IsEmptyCategory) { - outs() << " This option category has no options.\n"; - continue; - } // Loop over the options in the category and print. for (const Option *Opt : CategoryOptions) Opt->printOptionInfo(MaxArgLen); diff --git a/llvm/lib/Support/RISCVISAInfo.cpp b/llvm/lib/Support/RISCVISAInfo.cpp index a9b7e209915a..70f531e40b90 100644 --- a/llvm/lib/Support/RISCVISAInfo.cpp +++ b/llvm/lib/Support/RISCVISAInfo.cpp @@ -466,35 +466,38 @@ bool RISCVISAInfo::compareExtension(const std::string &LHS, return LHS < RHS; } -void RISCVISAInfo::toFeatures( - std::vector<StringRef> &Features, - llvm::function_ref<StringRef(const Twine &)> StrAlloc, - bool AddAllExtensions) const { - for (auto const &Ext : Exts) { - StringRef ExtName = Ext.first; - +std::vector<std::string> RISCVISAInfo::toFeatures(bool AddAllExtensions, + bool IgnoreUnknown) const { + std::vector<std::string> Features; + for (const auto &[ExtName, _] : Exts) { + // i is a base instruction set, not an extension (see + // https://github.com/riscv/riscv-isa-manual/blob/main/src/naming.adoc#base-integer-isa) + // and is not recognized in clang -cc1 if (ExtName == "i") continue; + if (IgnoreUnknown && !isSupportedExtension(ExtName)) + continue; if (isExperimentalExtension(ExtName)) { - Features.push_back(StrAlloc("+experimental-" + ExtName)); + Features.push_back((llvm::Twine("+experimental-") + ExtName).str()); } else { - Features.push_back(StrAlloc("+" + ExtName)); + Features.push_back((llvm::Twine("+") + ExtName).str()); } } if (AddAllExtensions) { for (const RISCVSupportedExtension &Ext : SupportedExtensions) { if (Exts.count(Ext.Name)) continue; - Features.push_back(StrAlloc(Twine("-") + Ext.Name)); + Features.push_back((llvm::Twine("-") + Ext.Name).str()); } for (const RISCVSupportedExtension &Ext : SupportedExperimentalExtensions) { if (Exts.count(Ext.Name)) continue; - Features.push_back(StrAlloc(Twine("-experimental-") + Ext.Name)); + Features.push_back((llvm::Twine("-experimental-") + Ext.Name).str()); } } + return Features; } // Extensions may have a version number, and may be separated by @@ -1269,22 +1272,6 @@ std::string RISCVISAInfo::toString() const { return Arch.str(); } -std::vector<std::string> RISCVISAInfo::toFeatureVector() const { - std::vector<std::string> FeatureVector; - for (auto const &Ext : Exts) { - std::string ExtName = Ext.first; - if (ExtName == "i") // i is not recognized in clang -cc1 - continue; - if (!isSupportedExtension(ExtName)) - continue; - std::string Feature = isExperimentalExtension(ExtName) - ? "+experimental-" + ExtName - : "+" + ExtName; - FeatureVector.push_back(Feature); - } - return FeatureVector; -} - llvm::Expected<std::unique_ptr<RISCVISAInfo>> RISCVISAInfo::postProcessAndChecking(std::unique_ptr<RISCVISAInfo> &&ISAInfo) { ISAInfo->updateImplication(); diff --git a/llvm/lib/Target/AArch64/AArch64.h b/llvm/lib/Target/AArch64/AArch64.h index 901769c54b6e..d20ef63a72e8 100644 --- a/llvm/lib/Target/AArch64/AArch64.h +++ b/llvm/lib/Target/AArch64/AArch64.h @@ -88,6 +88,7 @@ void initializeAArch64DeadRegisterDefinitionsPass(PassRegistry&); void initializeAArch64ExpandPseudoPass(PassRegistry &); void initializeAArch64GlobalsTaggingPass(PassRegistry &); void initializeAArch64LoadStoreOptPass(PassRegistry&); +void initializeAArch64LoopIdiomTransformLegacyPassPass(PassRegistry &); void initializeAArch64LowerHomogeneousPrologEpilogPass(PassRegistry &); void initializeAArch64MIPeepholeOptPass(PassRegistry &); void initializeAArch64O0PreLegalizerCombinerPass(PassRegistry &); diff --git a/llvm/lib/Target/AArch64/AArch64.td b/llvm/lib/Target/AArch64/AArch64.td index 68f452039c9b..d5e8ed101d1c 100644 --- a/llvm/lib/Target/AArch64/AArch64.td +++ b/llvm/lib/Target/AArch64/AArch64.td @@ -1405,7 +1405,7 @@ def ProcessorFeatures { FeatureSSBS]; list<SubtargetFeature> A78C = [HasV8_2aOps, FeatureCrypto, FeatureFPARMv8, FeatureNEON, FeatureFullFP16, FeatureDotProd, - FeatureFlagM, FeatureFP16FML, FeaturePAuth, + FeatureFlagM, FeaturePAuth, FeaturePerfMon, FeatureRCPC, FeatureSPE, FeatureSSBS]; list<SubtargetFeature> A710 = [HasV9_0aOps, FeatureNEON, FeaturePerfMon, diff --git a/llvm/lib/Target/AArch64/AArch64FastISel.cpp b/llvm/lib/Target/AArch64/AArch64FastISel.cpp index 9b8162ce8dd4..e98f6c4984a7 100644 --- a/llvm/lib/Target/AArch64/AArch64FastISel.cpp +++ b/llvm/lib/Target/AArch64/AArch64FastISel.cpp @@ -645,7 +645,7 @@ bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr, Type *Ty) unsigned Idx = cast<ConstantInt>(Op)->getZExtValue(); TmpOffset += SL->getElementOffset(Idx); } else { - uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType()); + uint64_t S = GTI.getSequentialElementStride(DL); while (true) { if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) { // Constant-offset addressing. @@ -1231,15 +1231,6 @@ unsigned AArch64FastISel::emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS, // Only extend the RHS within the instruction if there is a valid extend type. if (ExtendType != AArch64_AM::InvalidShiftExtend && RHS->hasOneUse() && isValueAvailable(RHS)) { - if (const auto *SI = dyn_cast<BinaryOperator>(RHS)) - if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) - if ((SI->getOpcode() == Instruction::Shl) && (C->getZExtValue() < 4)) { - Register RHSReg = getRegForValue(SI->getOperand(0)); - if (!RHSReg) - return 0; - return emitAddSub_rx(UseAdd, RetVT, LHSReg, RHSReg, ExtendType, - C->getZExtValue(), SetFlags, WantResult); - } Register RHSReg = getRegForValue(RHS); if (!RHSReg) return 0; @@ -4987,15 +4978,13 @@ bool AArch64FastISel::selectGetElementPtr(const Instruction *I) { if (Field) TotalOffs += DL.getStructLayout(StTy)->getElementOffset(Field); } else { - Type *Ty = GTI.getIndexedType(); - // If this is a constant subscript, handle it quickly. if (const auto *CI = dyn_cast<ConstantInt>(Idx)) { if (CI->isZero()) continue; // N = N + Offset - TotalOffs += - DL.getTypeAllocSize(Ty) * cast<ConstantInt>(CI)->getSExtValue(); + TotalOffs += GTI.getSequentialElementStride(DL) * + cast<ConstantInt>(CI)->getSExtValue(); continue; } if (TotalOffs) { @@ -5006,7 +4995,7 @@ bool AArch64FastISel::selectGetElementPtr(const Instruction *I) { } // N = N + Idx * ElementSize; - uint64_t ElementSize = DL.getTypeAllocSize(Ty); + uint64_t ElementSize = GTI.getSequentialElementStride(DL); unsigned IdxN = getRegForGEPIndex(Idx); if (!IdxN) return false; diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp index 476d99c2a7e0..edc8cc7d4d1e 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -580,7 +580,7 @@ bool AArch64DAGToDAGISel::SelectArithImmed(SDValue N, SDValue &Val, if (!isa<ConstantSDNode>(N.getNode())) return false; - uint64_t Immed = cast<ConstantSDNode>(N.getNode())->getZExtValue(); + uint64_t Immed = N.getNode()->getAsZExtVal(); unsigned ShiftAmt; if (Immed >> 12 == 0) { @@ -611,7 +611,7 @@ bool AArch64DAGToDAGISel::SelectNegArithImmed(SDValue N, SDValue &Val, return false; // The immediate operand must be a 24-bit zero-extended immediate. - uint64_t Immed = cast<ConstantSDNode>(N.getNode())->getZExtValue(); + uint64_t Immed = N.getNode()->getAsZExtVal(); // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0" // have the opposite effect on the C flag, so this pattern mustn't match under @@ -1326,7 +1326,7 @@ bool AArch64DAGToDAGISel::SelectAddrModeXRO(SDValue N, unsigned Size, // MOV X0, WideImmediate // LDR X2, [BaseReg, X0] if (isa<ConstantSDNode>(RHS)) { - int64_t ImmOff = (int64_t)cast<ConstantSDNode>(RHS)->getZExtValue(); + int64_t ImmOff = (int64_t)RHS->getAsZExtVal(); // Skip the immediate can be selected by load/store addressing mode. // Also skip the immediate can be encoded by a single ADD (SUB is also // checked by using -ImmOff). diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 102fd0c3dae2..47e665176e8b 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -3588,8 +3588,7 @@ static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, // cmp w13, w12 // can be turned into: // cmp w12, w11, lsl #1 - if (!isa<ConstantSDNode>(RHS) || - !isLegalArithImmed(cast<ConstantSDNode>(RHS)->getZExtValue())) { + if (!isa<ConstantSDNode>(RHS) || !isLegalArithImmed(RHS->getAsZExtVal())) { SDValue TheLHS = isCMN(LHS, CC) ? LHS.getOperand(1) : LHS; if (getCmpOperandFoldingProfit(TheLHS) > getCmpOperandFoldingProfit(RHS)) { @@ -3623,7 +3622,7 @@ static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, cast<LoadSDNode>(LHS)->getExtensionType() == ISD::ZEXTLOAD && cast<LoadSDNode>(LHS)->getMemoryVT() == MVT::i16 && LHS.getNode()->hasNUsesOfValue(1, 0)) { - int16_t ValueofRHS = cast<ConstantSDNode>(RHS)->getZExtValue(); + int16_t ValueofRHS = RHS->getAsZExtVal(); if (ValueofRHS < 0 && isLegalArithImmed(-ValueofRHS)) { SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, LHS.getValueType(), LHS, @@ -5619,7 +5618,7 @@ SDValue AArch64TargetLowering::LowerMGATHER(SDValue Op, // SVE supports an index scaled by sizeof(MemVT.elt) only, everything else // must be calculated before hand. - uint64_t ScaleVal = cast<ConstantSDNode>(Scale)->getZExtValue(); + uint64_t ScaleVal = Scale->getAsZExtVal(); if (IsScaled && ScaleVal != MemVT.getScalarStoreSize()) { assert(isPowerOf2_64(ScaleVal) && "Expecting power-of-two types"); EVT IndexVT = Index.getValueType(); @@ -5707,7 +5706,7 @@ SDValue AArch64TargetLowering::LowerMSCATTER(SDValue Op, // SVE supports an index scaled by sizeof(MemVT.elt) only, everything else // must be calculated before hand. - uint64_t ScaleVal = cast<ConstantSDNode>(Scale)->getZExtValue(); + uint64_t ScaleVal = Scale->getAsZExtVal(); if (IsScaled && ScaleVal != MemVT.getScalarStoreSize()) { assert(isPowerOf2_64(ScaleVal) && "Expecting power-of-two types"); EVT IndexVT = Index.getValueType(); @@ -16516,9 +16515,9 @@ static SDValue performUADDVAddCombine(SDValue A, SelectionDAG &DAG) { if (Ext0.getOperand(0).getValueType().getVectorNumElements() != VT.getVectorNumElements() * 2) return SDValue(); - if ((Ext0.getConstantOperandVal(1) != 0 && + if ((Ext0.getConstantOperandVal(1) != 0 || Ext1.getConstantOperandVal(1) != VT.getVectorNumElements()) && - (Ext1.getConstantOperandVal(1) != 0 && + (Ext1.getConstantOperandVal(1) != 0 || Ext0.getConstantOperandVal(1) != VT.getVectorNumElements())) return SDValue(); unsigned Opcode = Op0.getOpcode() == ISD::ZERO_EXTEND ? AArch64ISD::UADDLP @@ -22011,7 +22010,7 @@ static SDValue performBRCONDCombine(SDNode *N, SDValue Cmp = N->getOperand(3); assert(isa<ConstantSDNode>(CCVal) && "Expected a ConstantSDNode here!"); - unsigned CC = cast<ConstantSDNode>(CCVal)->getZExtValue(); + unsigned CC = CCVal->getAsZExtVal(); if (CC != AArch64CC::EQ && CC != AArch64CC::NE) return SDValue(); diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td index cb63d8726744..10ad5b1f8f25 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td +++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td @@ -12586,6 +12586,7 @@ def : TokenAlias<".4S", ".4s">; def : TokenAlias<".2D", ".2d">; def : TokenAlias<".1Q", ".1q">; def : TokenAlias<".2H", ".2h">; +def : TokenAlias<".2B", ".2b">; def : TokenAlias<".B", ".b">; def : TokenAlias<".H", ".h">; def : TokenAlias<".S", ".s">; diff --git a/llvm/lib/Target/AArch64/AArch64LoopIdiomTransform.cpp b/llvm/lib/Target/AArch64/AArch64LoopIdiomTransform.cpp new file mode 100644 index 000000000000..6fcd9c290e9c --- /dev/null +++ b/llvm/lib/Target/AArch64/AArch64LoopIdiomTransform.cpp @@ -0,0 +1,828 @@ +//===- AArch64LoopIdiomTransform.cpp - Loop idiom recognition -------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This pass implements a pass that recognizes certain loop idioms and +// transforms them into more optimized versions of the same loop. In cases +// where this happens, it can be a significant performance win. +// +// We currently only recognize one loop that finds the first mismatched byte +// in an array and returns the index, i.e. something like: +// +// while (++i != n) { +// if (a[i] != b[i]) +// break; +// } +// +// In this example we can actually vectorize the loop despite the early exit, +// although the loop vectorizer does not support it. It requires some extra +// checks to deal with the possibility of faulting loads when crossing page +// boundaries. However, even with these checks it is still profitable to do the +// transformation. +// +//===----------------------------------------------------------------------===// +// +// TODO List: +// +// * Add support for the inverse case where we scan for a matching element. +// * Permit 64-bit induction variable types. +// * Recognize loops that increment the IV *after* comparing bytes. +// * Allow 32-bit sign-extends of the IV used by the GEP. +// +//===----------------------------------------------------------------------===// + +#include "AArch64LoopIdiomTransform.h" +#include "llvm/Analysis/DomTreeUpdater.h" +#include "llvm/Analysis/LoopPass.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/MDBuilder.h" +#include "llvm/IR/PatternMatch.h" +#include "llvm/InitializePasses.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" + +using namespace llvm; +using namespace PatternMatch; + +#define DEBUG_TYPE "aarch64-loop-idiom-transform" + +static cl::opt<bool> + DisableAll("disable-aarch64-lit-all", cl::Hidden, cl::init(true), + cl::desc("Disable AArch64 Loop Idiom Transform Pass.")); + +static cl::opt<bool> DisableByteCmp( + "disable-aarch64-lit-bytecmp", cl::Hidden, cl::init(false), + cl::desc("Proceed with AArch64 Loop Idiom Transform Pass, but do " + "not convert byte-compare loop(s).")); + +static cl::opt<bool> VerifyLoops( + "aarch64-lit-verify", cl::Hidden, cl::init(false), + cl::desc("Verify loops generated AArch64 Loop Idiom Transform Pass.")); + +namespace llvm { + +void initializeAArch64LoopIdiomTransformLegacyPassPass(PassRegistry &); +Pass *createAArch64LoopIdiomTransformPass(); + +} // end namespace llvm + +namespace { + +class AArch64LoopIdiomTransform { + Loop *CurLoop = nullptr; + DominatorTree *DT; + LoopInfo *LI; + const TargetTransformInfo *TTI; + const DataLayout *DL; + +public: + explicit AArch64LoopIdiomTransform(DominatorTree *DT, LoopInfo *LI, + const TargetTransformInfo *TTI, + const DataLayout *DL) + : DT(DT), LI(LI), TTI(TTI), DL(DL) {} + + bool run(Loop *L); + +private: + /// \name Countable Loop Idiom Handling + /// @{ + + bool runOnCountableLoop(); + bool runOnLoopBlock(BasicBlock *BB, const SCEV *BECount, + SmallVectorImpl<BasicBlock *> &ExitBlocks); + + bool recognizeByteCompare(); + Value *expandFindMismatch(IRBuilder<> &Builder, DomTreeUpdater &DTU, + GetElementPtrInst *GEPA, GetElementPtrInst *GEPB, + Instruction *Index, Value *Start, Value *MaxLen); + void transformByteCompare(GetElementPtrInst *GEPA, GetElementPtrInst *GEPB, + PHINode *IndPhi, Value *MaxLen, Instruction *Index, + Value *Start, bool IncIdx, BasicBlock *FoundBB, + BasicBlock *EndBB); + /// @} +}; + +class AArch64LoopIdiomTransformLegacyPass : public LoopPass { +public: + static char ID; + + explicit AArch64LoopIdiomTransformLegacyPass() : LoopPass(ID) { + initializeAArch64LoopIdiomTransformLegacyPassPass( + *PassRegistry::getPassRegistry()); + } + + StringRef getPassName() const override { + return "Transform AArch64-specific loop idioms"; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired<LoopInfoWrapperPass>(); + AU.addRequired<DominatorTreeWrapperPass>(); + AU.addRequired<TargetTransformInfoWrapperPass>(); + } + + bool runOnLoop(Loop *L, LPPassManager &LPM) override; +}; + +bool AArch64LoopIdiomTransformLegacyPass::runOnLoop(Loop *L, + LPPassManager &LPM) { + + if (skipLoop(L)) + return false; + + auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); + auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); + auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI( + *L->getHeader()->getParent()); + return AArch64LoopIdiomTransform( + DT, LI, &TTI, &L->getHeader()->getModule()->getDataLayout()) + .run(L); +} + +} // end anonymous namespace + +char AArch64LoopIdiomTransformLegacyPass::ID = 0; + +INITIALIZE_PASS_BEGIN( + AArch64LoopIdiomTransformLegacyPass, "aarch64-lit", + "Transform specific loop idioms into optimized vector forms", false, false) +INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(LoopSimplify) +INITIALIZE_PASS_DEPENDENCY(LCSSAWrapperPass) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) +INITIALIZE_PASS_END( + AArch64LoopIdiomTransformLegacyPass, "aarch64-lit", + "Transform specific loop idioms into optimized vector forms", false, false) + +Pass *llvm::createAArch64LoopIdiomTransformPass() { + return new AArch64LoopIdiomTransformLegacyPass(); +} + +PreservedAnalyses +AArch64LoopIdiomTransformPass::run(Loop &L, LoopAnalysisManager &AM, + LoopStandardAnalysisResults &AR, + LPMUpdater &) { + if (DisableAll) + return PreservedAnalyses::all(); + + const auto *DL = &L.getHeader()->getModule()->getDataLayout(); + + AArch64LoopIdiomTransform LIT(&AR.DT, &AR.LI, &AR.TTI, DL); + if (!LIT.run(&L)) + return PreservedAnalyses::all(); + + return PreservedAnalyses::none(); +} + +//===----------------------------------------------------------------------===// +// +// Implementation of AArch64LoopIdiomTransform +// +//===----------------------------------------------------------------------===// + +bool AArch64LoopIdiomTransform::run(Loop *L) { + CurLoop = L; + + if (DisableAll || L->getHeader()->getParent()->hasOptSize()) + return false; + + // If the loop could not be converted to canonical form, it must have an + // indirectbr in it, just give up. + if (!L->getLoopPreheader()) + return false; + + LLVM_DEBUG(dbgs() << DEBUG_TYPE " Scanning: F[" + << CurLoop->getHeader()->getParent()->getName() + << "] Loop %" << CurLoop->getHeader()->getName() << "\n"); + + return recognizeByteCompare(); +} + +bool AArch64LoopIdiomTransform::recognizeByteCompare() { + // Currently the transformation only works on scalable vector types, although + // there is no fundamental reason why it cannot be made to work for fixed + // width too. + + // We also need to know the minimum page size for the target in order to + // generate runtime memory checks to ensure the vector version won't fault. + if (!TTI->supportsScalableVectors() || !TTI->getMinPageSize().has_value() || + DisableByteCmp) + return false; + + BasicBlock *Header = CurLoop->getHeader(); + + // In AArch64LoopIdiomTransform::run we have already checked that the loop + // has a preheader so we can assume it's in a canonical form. + if (CurLoop->getNumBackEdges() != 1 || CurLoop->getNumBlocks() != 2) + return false; + + PHINode *PN = dyn_cast<PHINode>(&Header->front()); + if (!PN || PN->getNumIncomingValues() != 2) + return false; + + auto LoopBlocks = CurLoop->getBlocks(); + // The first block in the loop should contain only 4 instructions, e.g. + // + // while.cond: + // %res.phi = phi i32 [ %start, %ph ], [ %inc, %while.body ] + // %inc = add i32 %res.phi, 1 + // %cmp.not = icmp eq i32 %inc, %n + // br i1 %cmp.not, label %while.end, label %while.body + // + auto CondBBInsts = LoopBlocks[0]->instructionsWithoutDebug(); + if (std::distance(CondBBInsts.begin(), CondBBInsts.end()) > 4) + return false; + + // The second block should contain 7 instructions, e.g. + // + // while.body: + // %idx = zext i32 %inc to i64 + // %idx.a = getelementptr inbounds i8, ptr %a, i64 %idx + // %load.a = load i8, ptr %idx.a + // %idx.b = getelementptr inbounds i8, ptr %b, i64 %idx + // %load.b = load i8, ptr %idx.b + // %cmp.not.ld = icmp eq i8 %load.a, %load.b + // br i1 %cmp.not.ld, label %while.cond, label %while.end + // + auto LoopBBInsts = LoopBlocks[1]->instructionsWithoutDebug(); + if (std::distance(LoopBBInsts.begin(), LoopBBInsts.end()) > 7) + return false; + + // The incoming value to the PHI node from the loop should be an add of 1. + Value *StartIdx = nullptr; + Instruction *Index = nullptr; + if (!CurLoop->contains(PN->getIncomingBlock(0))) { + StartIdx = PN->getIncomingValue(0); + Index = dyn_cast<Instruction>(PN->getIncomingValue(1)); + } else { + StartIdx = PN->getIncomingValue(1); + Index = dyn_cast<Instruction>(PN->getIncomingValue(0)); + } + + // Limit to 32-bit types for now + if (!Index || !Index->getType()->isIntegerTy(32) || + !match(Index, m_c_Add(m_Specific(PN), m_One()))) + return false; + + // If we match the pattern, PN and Index will be replaced with the result of + // the cttz.elts intrinsic. If any other instructions are used outside of + // the loop, we cannot replace it. + for (BasicBlock *BB : LoopBlocks) + for (Instruction &I : *BB) + if (&I != PN && &I != Index) + for (User *U : I.users()) + if (!CurLoop->contains(cast<Instruction>(U))) + return false; + + // Match the branch instruction for the header + ICmpInst::Predicate Pred; + Value *MaxLen; + BasicBlock *EndBB, *WhileBB; + if (!match(Header->getTerminator(), + m_Br(m_ICmp(Pred, m_Specific(Index), m_Value(MaxLen)), + m_BasicBlock(EndBB), m_BasicBlock(WhileBB))) || + Pred != ICmpInst::Predicate::ICMP_EQ || !CurLoop->contains(WhileBB)) + return false; + + // WhileBB should contain the pattern of load & compare instructions. Match + // the pattern and find the GEP instructions used by the loads. + ICmpInst::Predicate WhilePred; + BasicBlock *FoundBB; + BasicBlock *TrueBB; + Value *LoadA, *LoadB; + if (!match(WhileBB->getTerminator(), + m_Br(m_ICmp(WhilePred, m_Value(LoadA), m_Value(LoadB)), + m_BasicBlock(TrueBB), m_BasicBlock(FoundBB))) || + WhilePred != ICmpInst::Predicate::ICMP_EQ || !CurLoop->contains(TrueBB)) + return false; + + Value *A, *B; + if (!match(LoadA, m_Load(m_Value(A))) || !match(LoadB, m_Load(m_Value(B)))) + return false; + + LoadInst *LoadAI = cast<LoadInst>(LoadA); + LoadInst *LoadBI = cast<LoadInst>(LoadB); + if (!LoadAI->isSimple() || !LoadBI->isSimple()) + return false; + + GetElementPtrInst *GEPA = dyn_cast<GetElementPtrInst>(A); + GetElementPtrInst *GEPB = dyn_cast<GetElementPtrInst>(B); + + if (!GEPA || !GEPB) + return false; + + Value *PtrA = GEPA->getPointerOperand(); + Value *PtrB = GEPB->getPointerOperand(); + + // Check we are loading i8 values from two loop invariant pointers + if (!CurLoop->isLoopInvariant(PtrA) || !CurLoop->isLoopInvariant(PtrB) || + !GEPA->getResultElementType()->isIntegerTy(8) || + !GEPB->getResultElementType()->isIntegerTy(8) || + !LoadAI->getType()->isIntegerTy(8) || + !LoadBI->getType()->isIntegerTy(8) || PtrA == PtrB) + return false; + + // Check that the index to the GEPs is the index we found earlier + if (GEPA->getNumIndices() > 1 || GEPB->getNumIndices() > 1) + return false; + + Value *IdxA = GEPA->getOperand(GEPA->getNumIndices()); + Value *IdxB = GEPB->getOperand(GEPB->getNumIndices()); + if (IdxA != IdxB || !match(IdxA, m_ZExt(m_Specific(Index)))) + return false; + + // We only ever expect the pre-incremented index value to be used inside the + // loop. + if (!PN->hasOneUse()) + return false; + + // Ensure that when the Found and End blocks are identical the PHIs have the + // supported format. We don't currently allow cases like this: + // while.cond: + // ... + // br i1 %cmp.not, label %while.end, label %while.body + // + // while.body: + // ... + // br i1 %cmp.not2, label %while.cond, label %while.end + // + // while.end: + // %final_ptr = phi ptr [ %c, %while.body ], [ %d, %while.cond ] + // + // Where the incoming values for %final_ptr are unique and from each of the + // loop blocks, but not actually defined in the loop. This requires extra + // work setting up the byte.compare block, i.e. by introducing a select to + // choose the correct value. + // TODO: We could add support for this in future. + if (FoundBB == EndBB) { + for (PHINode &EndPN : EndBB->phis()) { + Value *WhileCondVal = EndPN.getIncomingValueForBlock(Header); + Value *WhileBodyVal = EndPN.getIncomingValueForBlock(WhileBB); + + // The value of the index when leaving the while.cond block is always the + // same as the end value (MaxLen) so we permit either. Otherwise for any + // other value defined outside the loop we only allow values that are the + // same as the exit value for while.body. + if (WhileCondVal != Index && WhileCondVal != MaxLen && + WhileCondVal != WhileBodyVal) + return false; + } + } + + LLVM_DEBUG(dbgs() << "FOUND IDIOM IN LOOP: \n" + << *(EndBB->getParent()) << "\n\n"); + + // The index is incremented before the GEP/Load pair so we need to + // add 1 to the start value. + transformByteCompare(GEPA, GEPB, PN, MaxLen, Index, StartIdx, /*IncIdx=*/true, + FoundBB, EndBB); + return true; +} + +Value *AArch64LoopIdiomTransform::expandFindMismatch( + IRBuilder<> &Builder, DomTreeUpdater &DTU, GetElementPtrInst *GEPA, + GetElementPtrInst *GEPB, Instruction *Index, Value *Start, Value *MaxLen) { + Value *PtrA = GEPA->getPointerOperand(); + Value *PtrB = GEPB->getPointerOperand(); + + // Get the arguments and types for the intrinsic. + BasicBlock *Preheader = CurLoop->getLoopPreheader(); + BranchInst *PHBranch = cast<BranchInst>(Preheader->getTerminator()); + LLVMContext &Ctx = PHBranch->getContext(); + Type *LoadType = Type::getInt8Ty(Ctx); + Type *ResType = Builder.getInt32Ty(); + + // Split block in the original loop preheader. + BasicBlock *EndBlock = + SplitBlock(Preheader, PHBranch, DT, LI, nullptr, "mismatch_end"); + + // Create the blocks that we're going to need: + // 1. A block for checking the zero-extended length exceeds 0 + // 2. A block to check that the start and end addresses of a given array + // lie on the same page. + // 3. The SVE loop preheader. + // 4. The first SVE loop block. + // 5. The SVE loop increment block. + // 6. A block we can jump to from the SVE loop when a mismatch is found. + // 7. The first block of the scalar loop itself, containing PHIs , loads + // and cmp. + // 8. A scalar loop increment block to increment the PHIs and go back + // around the loop. + + BasicBlock *MinItCheckBlock = BasicBlock::Create( + Ctx, "mismatch_min_it_check", EndBlock->getParent(), EndBlock); + + // Update the terminator added by SplitBlock to branch to the first block + Preheader->getTerminator()->setSuccessor(0, MinItCheckBlock); + + BasicBlock *MemCheckBlock = BasicBlock::Create( + Ctx, "mismatch_mem_check", EndBlock->getParent(), EndBlock); + + BasicBlock *SVELoopPreheaderBlock = BasicBlock::Create( + Ctx, "mismatch_sve_loop_preheader", EndBlock->getParent(), EndBlock); + + BasicBlock *SVELoopStartBlock = BasicBlock::Create( + Ctx, "mismatch_sve_loop", EndBlock->getParent(), EndBlock); + + BasicBlock *SVELoopIncBlock = BasicBlock::Create( + Ctx, "mismatch_sve_loop_inc", EndBlock->getParent(), EndBlock); + + BasicBlock *SVELoopMismatchBlock = BasicBlock::Create( + Ctx, "mismatch_sve_loop_found", EndBlock->getParent(), EndBlock); + + BasicBlock *LoopPreHeaderBlock = BasicBlock::Create( + Ctx, "mismatch_loop_pre", EndBlock->getParent(), EndBlock); + + BasicBlock *LoopStartBlock = + BasicBlock::Create(Ctx, "mismatch_loop", EndBlock->getParent(), EndBlock); + + BasicBlock *LoopIncBlock = BasicBlock::Create( + Ctx, "mismatch_loop_inc", EndBlock->getParent(), EndBlock); + + DTU.applyUpdates({{DominatorTree::Insert, Preheader, MinItCheckBlock}, + {DominatorTree::Delete, Preheader, EndBlock}}); + + // Update LoopInfo with the new SVE & scalar loops. + auto SVELoop = LI->AllocateLoop(); + auto ScalarLoop = LI->AllocateLoop(); + + if (CurLoop->getParentLoop()) { + CurLoop->getParentLoop()->addBasicBlockToLoop(MinItCheckBlock, *LI); + CurLoop->getParentLoop()->addBasicBlockToLoop(MemCheckBlock, *LI); + CurLoop->getParentLoop()->addBasicBlockToLoop(SVELoopPreheaderBlock, *LI); + CurLoop->getParentLoop()->addChildLoop(SVELoop); + CurLoop->getParentLoop()->addBasicBlockToLoop(SVELoopMismatchBlock, *LI); + CurLoop->getParentLoop()->addBasicBlockToLoop(LoopPreHeaderBlock, *LI); + CurLoop->getParentLoop()->addChildLoop(ScalarLoop); + } else { + LI->addTopLevelLoop(SVELoop); + LI->addTopLevelLoop(ScalarLoop); + } + + // Add the new basic blocks to their associated loops. + SVELoop->addBasicBlockToLoop(SVELoopStartBlock, *LI); + SVELoop->addBasicBlockToLoop(SVELoopIncBlock, *LI); + + ScalarLoop->addBasicBlockToLoop(LoopStartBlock, *LI); + ScalarLoop->addBasicBlockToLoop(LoopIncBlock, *LI); + + // Set up some types and constants that we intend to reuse. + Type *I64Type = Builder.getInt64Ty(); + + // Check the zero-extended iteration count > 0 + Builder.SetInsertPoint(MinItCheckBlock); + Value *ExtStart = Builder.CreateZExt(Start, I64Type); + Value *ExtEnd = Builder.CreateZExt(MaxLen, I64Type); + // This check doesn't really cost us very much. + + Value *LimitCheck = Builder.CreateICmpULE(Start, MaxLen); + BranchInst *MinItCheckBr = + BranchInst::Create(MemCheckBlock, LoopPreHeaderBlock, LimitCheck); + MinItCheckBr->setMetadata( + LLVMContext::MD_prof, + MDBuilder(MinItCheckBr->getContext()).createBranchWeights(99, 1)); + Builder.Insert(MinItCheckBr); + + DTU.applyUpdates( + {{DominatorTree::Insert, MinItCheckBlock, MemCheckBlock}, + {DominatorTree::Insert, MinItCheckBlock, LoopPreHeaderBlock}}); + + // For each of the arrays, check the start/end addresses are on the same + // page. + Builder.SetInsertPoint(MemCheckBlock); + + // The early exit in the original loop means that when performing vector + // loads we are potentially reading ahead of the early exit. So we could + // fault if crossing a page boundary. Therefore, we create runtime memory + // checks based on the minimum page size as follows: + // 1. Calculate the addresses of the first memory accesses in the loop, + // i.e. LhsStart and RhsStart. + // 2. Get the last accessed addresses in the loop, i.e. LhsEnd and RhsEnd. + // 3. Determine which pages correspond to all the memory accesses, i.e + // LhsStartPage, LhsEndPage, RhsStartPage, RhsEndPage. + // 4. If LhsStartPage == LhsEndPage and RhsStartPage == RhsEndPage, then + // we know we won't cross any page boundaries in the loop so we can + // enter the vector loop! Otherwise we fall back on the scalar loop. + Value *LhsStartGEP = Builder.CreateGEP(LoadType, PtrA, ExtStart); + Value *RhsStartGEP = Builder.CreateGEP(LoadType, PtrB, ExtStart); + Value *RhsStart = Builder.CreatePtrToInt(RhsStartGEP, I64Type); + Value *LhsStart = Builder.CreatePtrToInt(LhsStartGEP, I64Type); + Value *LhsEndGEP = Builder.CreateGEP(LoadType, PtrA, ExtEnd); + Value *RhsEndGEP = Builder.CreateGEP(LoadType, PtrB, ExtEnd); + Value *LhsEnd = Builder.CreatePtrToInt(LhsEndGEP, I64Type); + Value *RhsEnd = Builder.CreatePtrToInt(RhsEndGEP, I64Type); + + const uint64_t MinPageSize = TTI->getMinPageSize().value(); + const uint64_t AddrShiftAmt = llvm::Log2_64(MinPageSize); + Value *LhsStartPage = Builder.CreateLShr(LhsStart, AddrShiftAmt); + Value *LhsEndPage = Builder.CreateLShr(LhsEnd, AddrShiftAmt); + Value *RhsStartPage = Builder.CreateLShr(RhsStart, AddrShiftAmt); + Value *RhsEndPage = Builder.CreateLShr(RhsEnd, AddrShiftAmt); + Value *LhsPageCmp = Builder.CreateICmpNE(LhsStartPage, LhsEndPage); + Value *RhsPageCmp = Builder.CreateICmpNE(RhsStartPage, RhsEndPage); + + Value *CombinedPageCmp = Builder.CreateOr(LhsPageCmp, RhsPageCmp); + BranchInst *CombinedPageCmpCmpBr = BranchInst::Create( + LoopPreHeaderBlock, SVELoopPreheaderBlock, CombinedPageCmp); + CombinedPageCmpCmpBr->setMetadata( + LLVMContext::MD_prof, MDBuilder(CombinedPageCmpCmpBr->getContext()) + .createBranchWeights(10, 90)); + Builder.Insert(CombinedPageCmpCmpBr); + + DTU.applyUpdates( + {{DominatorTree::Insert, MemCheckBlock, LoopPreHeaderBlock}, + {DominatorTree::Insert, MemCheckBlock, SVELoopPreheaderBlock}}); + + // Set up the SVE loop preheader, i.e. calculate initial loop predicate, + // zero-extend MaxLen to 64-bits, determine the number of vector elements + // processed in each iteration, etc. + Builder.SetInsertPoint(SVELoopPreheaderBlock); + + // At this point we know two things must be true: + // 1. Start <= End + // 2. ExtMaxLen <= MinPageSize due to the page checks. + // Therefore, we know that we can use a 64-bit induction variable that + // starts from 0 -> ExtMaxLen and it will not overflow. + ScalableVectorType *PredVTy = + ScalableVectorType::get(Builder.getInt1Ty(), 16); + + Value *InitialPred = Builder.CreateIntrinsic( + Intrinsic::get_active_lane_mask, {PredVTy, I64Type}, {ExtStart, ExtEnd}); + + Value *VecLen = Builder.CreateIntrinsic(Intrinsic::vscale, {I64Type}, {}); + VecLen = Builder.CreateMul(VecLen, ConstantInt::get(I64Type, 16), "", + /*HasNUW=*/true, /*HasNSW=*/true); + + Value *PFalse = Builder.CreateVectorSplat(PredVTy->getElementCount(), + Builder.getInt1(false)); + + BranchInst *JumpToSVELoop = BranchInst::Create(SVELoopStartBlock); + Builder.Insert(JumpToSVELoop); + + DTU.applyUpdates( + {{DominatorTree::Insert, SVELoopPreheaderBlock, SVELoopStartBlock}}); + + // Set up the first SVE loop block by creating the PHIs, doing the vector + // loads and comparing the vectors. + Builder.SetInsertPoint(SVELoopStartBlock); + PHINode *LoopPred = Builder.CreatePHI(PredVTy, 2, "mismatch_sve_loop_pred"); + LoopPred->addIncoming(InitialPred, SVELoopPreheaderBlock); + PHINode *SVEIndexPhi = Builder.CreatePHI(I64Type, 2, "mismatch_sve_index"); + SVEIndexPhi->addIncoming(ExtStart, SVELoopPreheaderBlock); + Type *SVELoadType = ScalableVectorType::get(Builder.getInt8Ty(), 16); + Value *Passthru = ConstantInt::getNullValue(SVELoadType); + + Value *SVELhsGep = Builder.CreateGEP(LoadType, PtrA, SVEIndexPhi); + if (GEPA->isInBounds()) + cast<GetElementPtrInst>(SVELhsGep)->setIsInBounds(true); + Value *SVELhsLoad = Builder.CreateMaskedLoad(SVELoadType, SVELhsGep, Align(1), + LoopPred, Passthru); + + Value *SVERhsGep = Builder.CreateGEP(LoadType, PtrB, SVEIndexPhi); + if (GEPB->isInBounds()) + cast<GetElementPtrInst>(SVERhsGep)->setIsInBounds(true); + Value *SVERhsLoad = Builder.CreateMaskedLoad(SVELoadType, SVERhsGep, Align(1), + LoopPred, Passthru); + + Value *SVEMatchCmp = Builder.CreateICmpNE(SVELhsLoad, SVERhsLoad); + SVEMatchCmp = Builder.CreateSelect(LoopPred, SVEMatchCmp, PFalse); + Value *SVEMatchHasActiveLanes = Builder.CreateOrReduce(SVEMatchCmp); + BranchInst *SVEEarlyExit = BranchInst::Create( + SVELoopMismatchBlock, SVELoopIncBlock, SVEMatchHasActiveLanes); + Builder.Insert(SVEEarlyExit); + + DTU.applyUpdates( + {{DominatorTree::Insert, SVELoopStartBlock, SVELoopMismatchBlock}, + {DominatorTree::Insert, SVELoopStartBlock, SVELoopIncBlock}}); + + // Increment the index counter and calculate the predicate for the next + // iteration of the loop. We branch back to the start of the loop if there + // is at least one active lane. + Builder.SetInsertPoint(SVELoopIncBlock); + Value *NewSVEIndexPhi = Builder.CreateAdd(SVEIndexPhi, VecLen, "", + /*HasNUW=*/true, /*HasNSW=*/true); + SVEIndexPhi->addIncoming(NewSVEIndexPhi, SVELoopIncBlock); + Value *NewPred = + Builder.CreateIntrinsic(Intrinsic::get_active_lane_mask, + {PredVTy, I64Type}, {NewSVEIndexPhi, ExtEnd}); + LoopPred->addIncoming(NewPred, SVELoopIncBlock); + + Value *PredHasActiveLanes = + Builder.CreateExtractElement(NewPred, uint64_t(0)); + BranchInst *SVELoopBranchBack = + BranchInst::Create(SVELoopStartBlock, EndBlock, PredHasActiveLanes); + Builder.Insert(SVELoopBranchBack); + + DTU.applyUpdates({{DominatorTree::Insert, SVELoopIncBlock, SVELoopStartBlock}, + {DominatorTree::Insert, SVELoopIncBlock, EndBlock}}); + + // If we found a mismatch then we need to calculate which lane in the vector + // had a mismatch and add that on to the current loop index. + Builder.SetInsertPoint(SVELoopMismatchBlock); + PHINode *FoundPred = Builder.CreatePHI(PredVTy, 1, "mismatch_sve_found_pred"); + FoundPred->addIncoming(SVEMatchCmp, SVELoopStartBlock); + PHINode *LastLoopPred = + Builder.CreatePHI(PredVTy, 1, "mismatch_sve_last_loop_pred"); + LastLoopPred->addIncoming(LoopPred, SVELoopStartBlock); + PHINode *SVEFoundIndex = + Builder.CreatePHI(I64Type, 1, "mismatch_sve_found_index"); + SVEFoundIndex->addIncoming(SVEIndexPhi, SVELoopStartBlock); + + Value *PredMatchCmp = Builder.CreateAnd(LastLoopPred, FoundPred); + Value *Ctz = Builder.CreateIntrinsic( + Intrinsic::experimental_cttz_elts, {ResType, PredMatchCmp->getType()}, + {PredMatchCmp, /*ZeroIsPoison=*/Builder.getInt1(true)}); + Ctz = Builder.CreateZExt(Ctz, I64Type); + Value *SVELoopRes64 = Builder.CreateAdd(SVEFoundIndex, Ctz, "", + /*HasNUW=*/true, /*HasNSW=*/true); + Value *SVELoopRes = Builder.CreateTrunc(SVELoopRes64, ResType); + + Builder.Insert(BranchInst::Create(EndBlock)); + + DTU.applyUpdates({{DominatorTree::Insert, SVELoopMismatchBlock, EndBlock}}); + + // Generate code for scalar loop. + Builder.SetInsertPoint(LoopPreHeaderBlock); + Builder.Insert(BranchInst::Create(LoopStartBlock)); + + DTU.applyUpdates( + {{DominatorTree::Insert, LoopPreHeaderBlock, LoopStartBlock}}); + + Builder.SetInsertPoint(LoopStartBlock); + PHINode *IndexPhi = Builder.CreatePHI(ResType, 2, "mismatch_index"); + IndexPhi->addIncoming(Start, LoopPreHeaderBlock); + + // Otherwise compare the values + // Load bytes from each array and compare them. + Value *GepOffset = Builder.CreateZExt(IndexPhi, I64Type); + + Value *LhsGep = Builder.CreateGEP(LoadType, PtrA, GepOffset); + if (GEPA->isInBounds()) + cast<GetElementPtrInst>(LhsGep)->setIsInBounds(true); + Value *LhsLoad = Builder.CreateLoad(LoadType, LhsGep); + + Value *RhsGep = Builder.CreateGEP(LoadType, PtrB, GepOffset); + if (GEPB->isInBounds()) + cast<GetElementPtrInst>(RhsGep)->setIsInBounds(true); + Value *RhsLoad = Builder.CreateLoad(LoadType, RhsGep); + + Value *MatchCmp = Builder.CreateICmpEQ(LhsLoad, RhsLoad); + // If we have a mismatch then exit the loop ... + BranchInst *MatchCmpBr = BranchInst::Create(LoopIncBlock, EndBlock, MatchCmp); + Builder.Insert(MatchCmpBr); + + DTU.applyUpdates({{DominatorTree::Insert, LoopStartBlock, LoopIncBlock}, + {DominatorTree::Insert, LoopStartBlock, EndBlock}}); + + // Have we reached the maximum permitted length for the loop? + Builder.SetInsertPoint(LoopIncBlock); + Value *PhiInc = Builder.CreateAdd(IndexPhi, ConstantInt::get(ResType, 1), "", + /*HasNUW=*/Index->hasNoUnsignedWrap(), + /*HasNSW=*/Index->hasNoSignedWrap()); + IndexPhi->addIncoming(PhiInc, LoopIncBlock); + Value *IVCmp = Builder.CreateICmpEQ(PhiInc, MaxLen); + BranchInst *IVCmpBr = BranchInst::Create(EndBlock, LoopStartBlock, IVCmp); + Builder.Insert(IVCmpBr); + + DTU.applyUpdates({{DominatorTree::Insert, LoopIncBlock, EndBlock}, + {DominatorTree::Insert, LoopIncBlock, LoopStartBlock}}); + + // In the end block we need to insert a PHI node to deal with three cases: + // 1. We didn't find a mismatch in the scalar loop, so we return MaxLen. + // 2. We exitted the scalar loop early due to a mismatch and need to return + // the index that we found. + // 3. We didn't find a mismatch in the SVE loop, so we return MaxLen. + // 4. We exitted the SVE loop early due to a mismatch and need to return + // the index that we found. + Builder.SetInsertPoint(EndBlock, EndBlock->getFirstInsertionPt()); + PHINode *ResPhi = Builder.CreatePHI(ResType, 4, "mismatch_result"); + ResPhi->addIncoming(MaxLen, LoopIncBlock); + ResPhi->addIncoming(IndexPhi, LoopStartBlock); + ResPhi->addIncoming(MaxLen, SVELoopIncBlock); + ResPhi->addIncoming(SVELoopRes, SVELoopMismatchBlock); + + Value *FinalRes = Builder.CreateTrunc(ResPhi, ResType); + + if (VerifyLoops) { + ScalarLoop->verifyLoop(); + SVELoop->verifyLoop(); + if (!SVELoop->isRecursivelyLCSSAForm(*DT, *LI)) + report_fatal_error("Loops must remain in LCSSA form!"); + if (!ScalarLoop->isRecursivelyLCSSAForm(*DT, *LI)) + report_fatal_error("Loops must remain in LCSSA form!"); + } + + return FinalRes; +} + +void AArch64LoopIdiomTransform::transformByteCompare( + GetElementPtrInst *GEPA, GetElementPtrInst *GEPB, PHINode *IndPhi, + Value *MaxLen, Instruction *Index, Value *Start, bool IncIdx, + BasicBlock *FoundBB, BasicBlock *EndBB) { + + // Insert the byte compare code at the end of the preheader block + BasicBlock *Preheader = CurLoop->getLoopPreheader(); + BasicBlock *Header = CurLoop->getHeader(); + BranchInst *PHBranch = cast<BranchInst>(Preheader->getTerminator()); + IRBuilder<> Builder(PHBranch); + DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy); + Builder.SetCurrentDebugLocation(PHBranch->getDebugLoc()); + + // Increment the pointer if this was done before the loads in the loop. + if (IncIdx) + Start = Builder.CreateAdd(Start, ConstantInt::get(Start->getType(), 1)); + + Value *ByteCmpRes = + expandFindMismatch(Builder, DTU, GEPA, GEPB, Index, Start, MaxLen); + + // Replaces uses of index & induction Phi with intrinsic (we already + // checked that the the first instruction of Header is the Phi above). + assert(IndPhi->hasOneUse() && "Index phi node has more than one use!"); + Index->replaceAllUsesWith(ByteCmpRes); + + assert(PHBranch->isUnconditional() && + "Expected preheader to terminate with an unconditional branch."); + + // If no mismatch was found, we can jump to the end block. Create a + // new basic block for the compare instruction. + auto *CmpBB = BasicBlock::Create(Preheader->getContext(), "byte.compare", + Preheader->getParent()); + CmpBB->moveBefore(EndBB); + + // Replace the branch in the preheader with an always-true conditional branch. + // This ensures there is still a reference to the original loop. + Builder.CreateCondBr(Builder.getTrue(), CmpBB, Header); + PHBranch->eraseFromParent(); + + BasicBlock *MismatchEnd = cast<Instruction>(ByteCmpRes)->getParent(); + DTU.applyUpdates({{DominatorTree::Insert, MismatchEnd, CmpBB}}); + + // Create the branch to either the end or found block depending on the value + // returned by the intrinsic. + Builder.SetInsertPoint(CmpBB); + if (FoundBB != EndBB) { + Value *FoundCmp = Builder.CreateICmpEQ(ByteCmpRes, MaxLen); + Builder.CreateCondBr(FoundCmp, EndBB, FoundBB); + DTU.applyUpdates({{DominatorTree::Insert, CmpBB, FoundBB}, + {DominatorTree::Insert, CmpBB, EndBB}}); + + } else { + Builder.CreateBr(FoundBB); + DTU.applyUpdates({{DominatorTree::Insert, CmpBB, FoundBB}}); + } + + auto fixSuccessorPhis = [&](BasicBlock *SuccBB) { + for (PHINode &PN : SuccBB->phis()) { + // At this point we've already replaced all uses of the result from the + // loop with ByteCmp. Look through the incoming values to find ByteCmp, + // meaning this is a Phi collecting the results of the byte compare. + bool ResPhi = false; + for (Value *Op : PN.incoming_values()) + if (Op == ByteCmpRes) { + ResPhi = true; + break; + } + + // Any PHI that depended upon the result of the byte compare needs a new + // incoming value from CmpBB. This is because the original loop will get + // deleted. + if (ResPhi) + PN.addIncoming(ByteCmpRes, CmpBB); + else { + // There should be no other outside uses of other values in the + // original loop. Any incoming values should either: + // 1. Be for blocks outside the loop, which aren't interesting. Or .. + // 2. These are from blocks in the loop with values defined outside + // the loop. We should a similar incoming value from CmpBB. + for (BasicBlock *BB : PN.blocks()) + if (CurLoop->contains(BB)) { + PN.addIncoming(PN.getIncomingValueForBlock(BB), CmpBB); + break; + } + } + } + }; + + // Ensure all Phis in the successors of CmpBB have an incoming value from it. + fixSuccessorPhis(EndBB); + if (EndBB != FoundBB) + fixSuccessorPhis(FoundBB); + + // The new CmpBB block isn't part of the loop, but will need to be added to + // the outer loop if there is one. + if (!CurLoop->isOutermost()) + CurLoop->getParentLoop()->addBasicBlockToLoop(CmpBB, *LI); + + if (VerifyLoops && CurLoop->getParentLoop()) { + CurLoop->getParentLoop()->verifyLoop(); + if (!CurLoop->getParentLoop()->isRecursivelyLCSSAForm(*DT, *LI)) + report_fatal_error("Loops must remain in LCSSA form!"); + } +} diff --git a/llvm/lib/Target/AArch64/AArch64LoopIdiomTransform.h b/llvm/lib/Target/AArch64/AArch64LoopIdiomTransform.h new file mode 100644 index 000000000000..cc68425bb68b --- /dev/null +++ b/llvm/lib/Target/AArch64/AArch64LoopIdiomTransform.h @@ -0,0 +1,25 @@ +//===- AArch64LoopIdiomTransform.h --------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_AARCH64_AARCH64LOOPIDIOMTRANSFORM_H +#define LLVM_LIB_TARGET_AARCH64_AARCH64LOOPIDIOMTRANSFORM_H + +#include "llvm/IR/PassManager.h" +#include "llvm/Transforms/Scalar/LoopPassManager.h" + +namespace llvm { + +struct AArch64LoopIdiomTransformPass + : PassInfoMixin<AArch64LoopIdiomTransformPass> { + PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, + LoopStandardAnalysisResults &AR, LPMUpdater &U); +}; + +} // namespace llvm + +#endif // LLVM_LIB_TARGET_AARCH64_AARCH64LOOPIDIOMTRANSFORM_H diff --git a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td index 738a52eebad2..380f6e1fcfda 100644 --- a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td @@ -810,7 +810,7 @@ defm FMOPA_MPPZZ_H : sme2p1_fmop_tile_fp16<"fmopa", 0b0, 0b0, 0b11, ZPR16>; defm FMOPS_MPPZZ_H : sme2p1_fmop_tile_fp16<"fmops", 0b0, 0b1, 0b11, ZPR16>; } -let Predicates = [HasSME2p1, HasB16B16] in { +let Predicates = [HasSME2, HasB16B16] in { defm BFADD_VG2_M2Z_H : sme2_multivec_accum_add_sub_vg2<"bfadd", 0b1100, MatrixOp16, ZZ_h_mul_r, nxv8bf16, null_frag>; defm BFADD_VG4_M4Z_H : sme2_multivec_accum_add_sub_vg4<"bfadd", 0b1100, MatrixOp16, ZZZZ_h_mul_r, nxv8bf16, null_frag>; defm BFSUB_VG2_M2Z_H : sme2_multivec_accum_add_sub_vg2<"bfsub", 0b1101, MatrixOp16, ZZ_h_mul_r, nxv8bf16, null_frag>; diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td index 344a15389063..ee10a7d1c706 100644 --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -453,6 +453,9 @@ def AArch64msb_m1 : PatFrags<(ops node:$pred, node:$op1, node:$op2, node:$op3), def AArch64eor3 : PatFrags<(ops node:$op1, node:$op2, node:$op3), [(int_aarch64_sve_eor3 node:$op1, node:$op2, node:$op3), (xor node:$op1, (xor node:$op2, node:$op3))]>; +def AArch64bcax : PatFrags<(ops node:$op1, node:$op2, node:$op3), + [(int_aarch64_sve_bcax node:$op1, node:$op2, node:$op3), + (xor node:$op1, (and node:$op2, (vnot node:$op3)))]>; def AArch64fmla_m1 : PatFrags<(ops node:$pg, node:$za, node:$zn, node:$zm), [(int_aarch64_sve_fmla node:$pg, node:$za, node:$zn, node:$zm), @@ -3714,7 +3717,7 @@ let Predicates = [HasSVE2orSME] in { // SVE2 bitwise ternary operations defm EOR3_ZZZZ : sve2_int_bitwise_ternary_op<0b000, "eor3", AArch64eor3>; - defm BCAX_ZZZZ : sve2_int_bitwise_ternary_op<0b010, "bcax", int_aarch64_sve_bcax>; + defm BCAX_ZZZZ : sve2_int_bitwise_ternary_op<0b010, "bcax", AArch64bcax>; defm BSL_ZZZZ : sve2_int_bitwise_ternary_op<0b001, "bsl", int_aarch64_sve_bsl, AArch64bsp>; defm BSL1N_ZZZZ : sve2_int_bitwise_ternary_op<0b011, "bsl1n", int_aarch64_sve_bsl1n>; defm BSL2N_ZZZZ : sve2_int_bitwise_ternary_op<0b101, "bsl2n", int_aarch64_sve_bsl2n>; diff --git a/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp b/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp index 1a76f354589e..9e43f206efcf 100644 --- a/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp @@ -172,7 +172,7 @@ static SDValue EmitUnrolledSetTag(SelectionDAG &DAG, const SDLoc &dl, SDValue AArch64SelectionDAGInfo::EmitTargetCodeForSetTag( SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Addr, SDValue Size, MachinePointerInfo DstPtrInfo, bool ZeroData) const { - uint64_t ObjSize = cast<ConstantSDNode>(Size)->getZExtValue(); + uint64_t ObjSize = Size->getAsZExtVal(); assert(ObjSize % 16 == 0); MachineFunction &MF = DAG.getMachineFunction(); diff --git a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp index 036719be06d8..144610e021c5 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp @@ -11,6 +11,7 @@ #include "AArch64TargetMachine.h" #include "AArch64.h" +#include "AArch64LoopIdiomTransform.h" #include "AArch64MachineFunctionInfo.h" #include "AArch64MachineScheduler.h" #include "AArch64MacroFusion.h" @@ -43,6 +44,7 @@ #include "llvm/MC/MCTargetOptions.h" #include "llvm/MC/TargetRegistry.h" #include "llvm/Pass.h" +#include "llvm/Passes/PassBuilder.h" #include "llvm/Support/CodeGen.h" #include "llvm/Support/CommandLine.h" #include "llvm/Target/TargetLoweringObjectFile.h" @@ -222,6 +224,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAArch64Target() { initializeAArch64DeadRegisterDefinitionsPass(*PR); initializeAArch64ExpandPseudoPass(*PR); initializeAArch64LoadStoreOptPass(*PR); + initializeAArch64LoopIdiomTransformLegacyPassPass(*PR); initializeAArch64MIPeepholeOptPass(*PR); initializeAArch64SIMDInstrOptPass(*PR); initializeAArch64O0PreLegalizerCombinerPass(*PR); @@ -537,6 +540,14 @@ public: } // end anonymous namespace +void AArch64TargetMachine::registerPassBuilderCallbacks( + PassBuilder &PB, bool PopulateClassToPassNames) { + PB.registerLateLoopOptimizationsEPCallback( + [=](LoopPassManager &LPM, OptimizationLevel Level) { + LPM.addPass(AArch64LoopIdiomTransformPass()); + }); +} + TargetTransformInfo AArch64TargetMachine::getTargetTransformInfo(const Function &F) const { return TargetTransformInfo(AArch64TTIImpl(this, F)); diff --git a/llvm/lib/Target/AArch64/AArch64TargetMachine.h b/llvm/lib/Target/AArch64/AArch64TargetMachine.h index 12b971853f84..8fb68b06f137 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetMachine.h +++ b/llvm/lib/Target/AArch64/AArch64TargetMachine.h @@ -14,6 +14,7 @@ #define LLVM_LIB_TARGET_AARCH64_AARCH64TARGETMACHINE_H #include "AArch64InstrInfo.h" +#include "AArch64LoopIdiomTransform.h" #include "AArch64Subtarget.h" #include "llvm/IR/DataLayout.h" #include "llvm/Target/TargetMachine.h" @@ -43,6 +44,9 @@ public: // Pass Pipeline Configuration TargetPassConfig *createPassConfig(PassManagerBase &PM) override; + void registerPassBuilderCallbacks(PassBuilder &PB, + bool PopulateClassToPassNames) override; + TargetTransformInfo getTargetTransformInfo(const Function &F) const override; TargetLoweringObjectFile* getObjFileLowering() const override { diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h index 0b220069a388..f471294ffc25 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h @@ -420,6 +420,8 @@ public: return BaseT::getStoreMinimumVF(VF, ScalarMemTy, ScalarValTy); } + + std::optional<unsigned> getMinPageSize() const { return 4096; } }; } // end namespace llvm diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp index 1d0e8be80d07..b657a0954d78 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp @@ -282,6 +282,10 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) // Regardless of FP16 support, widen 16-bit elements to 32-bits. .minScalar(0, s32) .libcallFor({s32, s64}); + getActionDefinitionsBuilder(G_FPOWI) + .scalarize(0) + .minScalar(0, s32) + .libcallFor({{s32, s32}, {s64, s32}}); getActionDefinitionsBuilder(G_INSERT) .legalIf(all(typeInSet(0, {s32, s64, p0}), @@ -362,7 +366,8 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) {v4s32, p0, s128, 8}, {v2s64, p0, s128, 8}}) // These extends are also legal - .legalForTypesWithMemDesc({{s32, p0, s8, 8}, {s32, p0, s16, 8}}) + .legalForTypesWithMemDesc( + {{s32, p0, s8, 8}, {s32, p0, s16, 8}, {s64, p0, s32, 8}}) .widenScalarToNextPow2(0, /* MinSize = */ 8) .lowerIfMemSizeNotByteSizePow2() .clampScalar(0, s8, s64) @@ -761,17 +766,35 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) .lowerIf( all(typeInSet(0, {s8, s16, s32, s64, s128}), typeIs(2, p0))); + LegalityPredicate UseOutlineAtomics = [&ST](const LegalityQuery &Query) { + return ST.outlineAtomics() && !ST.hasLSE(); + }; + getActionDefinitionsBuilder(G_ATOMIC_CMPXCHG) - .legalIf(all(typeInSet(0, {s32, s64}), typeIs(1, p0))) - .customIf([](const LegalityQuery &Query) { - return Query.Types[0].getSizeInBits() == 128; + .legalIf(all(typeInSet(0, {s32, s64}), typeIs(1, p0), + predNot(UseOutlineAtomics))) + .customIf(all(typeIs(0, s128), predNot(UseOutlineAtomics))) + .customIf([UseOutlineAtomics](const LegalityQuery &Query) { + return Query.Types[0].getSizeInBits() == 128 && + !UseOutlineAtomics(Query); }) + .libcallIf(all(typeInSet(0, {s8, s16, s32, s64, s128}), typeIs(1, p0), + UseOutlineAtomics)) + .clampScalar(0, s32, s64); + + getActionDefinitionsBuilder({G_ATOMICRMW_XCHG, G_ATOMICRMW_ADD, + G_ATOMICRMW_SUB, G_ATOMICRMW_AND, G_ATOMICRMW_OR, + G_ATOMICRMW_XOR}) + .legalIf(all(typeInSet(0, {s32, s64}), typeIs(1, p0), + predNot(UseOutlineAtomics))) + .libcallIf(all(typeInSet(0, {s8, s16, s32, s64}), typeIs(1, p0), + UseOutlineAtomics)) .clampScalar(0, s32, s64); + // Do not outline these atomics operations, as per comment in + // AArch64ISelLowering.cpp's shouldExpandAtomicRMWInIR(). getActionDefinitionsBuilder( - {G_ATOMICRMW_XCHG, G_ATOMICRMW_ADD, G_ATOMICRMW_SUB, G_ATOMICRMW_AND, - G_ATOMICRMW_OR, G_ATOMICRMW_XOR, G_ATOMICRMW_MIN, G_ATOMICRMW_MAX, - G_ATOMICRMW_UMIN, G_ATOMICRMW_UMAX}) + {G_ATOMICRMW_MIN, G_ATOMICRMW_MAX, G_ATOMICRMW_UMIN, G_ATOMICRMW_UMAX}) .legalIf(all(typeInSet(0, {s32, s64}), typeIs(1, p0))) .clampScalar(0, s32, s64); @@ -989,6 +1012,23 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) .clampMaxNumElements(1, s16, 8) .lower(); + // For fmul reductions we need to split up into individual operations. We + // clamp to 128 bit vectors then to 64bit vectors to produce a cascade of + // smaller types, followed by scalarizing what remains. + getActionDefinitionsBuilder(G_VECREDUCE_FMUL) + .minScalarOrElt(0, MinFPScalar) + .clampMaxNumElements(1, s64, 2) + .clampMaxNumElements(1, s32, 4) + .clampMaxNumElements(1, s16, 8) + .clampMaxNumElements(1, s32, 2) + .clampMaxNumElements(1, s16, 4) + .scalarize(1) + .lower(); + + getActionDefinitionsBuilder({G_VECREDUCE_SEQ_FADD, G_VECREDUCE_SEQ_FMUL}) + .scalarize(2) + .lower(); + getActionDefinitionsBuilder(G_VECREDUCE_ADD) .legalFor({{s8, v16s8}, {s8, v8s8}, @@ -1137,8 +1177,9 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) verify(*ST.getInstrInfo()); } -bool AArch64LegalizerInfo::legalizeCustom(LegalizerHelper &Helper, - MachineInstr &MI) const { +bool AArch64LegalizerInfo::legalizeCustom( + LegalizerHelper &Helper, MachineInstr &MI, + LostDebugLocObserver &LocObserver) const { MachineIRBuilder &MIRBuilder = Helper.MIRBuilder; MachineRegisterInfo &MRI = *MIRBuilder.getMRI(); GISelChangeObserver &Observer = Helper.Observer; diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h index 19f77baa77f8..c62a9d847c52 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h +++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h @@ -23,12 +23,12 @@ namespace llvm { class AArch64Subtarget; -/// This class provides the information for the target register banks. class AArch64LegalizerInfo : public LegalizerInfo { public: AArch64LegalizerInfo(const AArch64Subtarget &ST); - bool legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI) const override; + bool legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI, + LostDebugLocObserver &LocObserver) const override; bool legalizeIntrinsic(LegalizerHelper &Helper, MachineInstr &MI) const override; diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td index b7552541e950..789ec817d3d8 100644 --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -10082,6 +10082,12 @@ multiclass sve2p1_vector_to_pred<string mnemonic, SDPatternOperator Op_lane, SDP def : InstAlias<mnemonic # "\t$Pd, $Zn", (!cast<Instruction>(NAME # _B) PPR8:$Pd, ZPRAny:$Zn, 0), 1>; + def : InstAlias<mnemonic # "\t$Pd, $Zn", + (!cast<Instruction>(NAME # _H) PPR16:$Pd, ZPRAny:$Zn, 0), 0>; + def : InstAlias<mnemonic # "\t$Pd, $Zn", + (!cast<Instruction>(NAME # _S) PPR32:$Pd, ZPRAny:$Zn, 0), 0>; + def : InstAlias<mnemonic # "\t$Pd, $Zn", + (!cast<Instruction>(NAME # _D) PPR64:$Pd, ZPRAny:$Zn, 0), 0>; // any_lane def : Pat<(nxv16i1 (Op_lane (nxv16i8 ZPRAny:$Zn), (i32 timm32_0_0:$Idx))), @@ -10143,6 +10149,12 @@ multiclass sve2p1_pred_to_vector<string mnemonic, SDPatternOperator MergeOp, def : InstAlias<mnemonic # "\t$Zd, $Pn", (!cast<Instruction>(NAME # _B) ZPRAny:$Zd, 0, PPR8:$Pn), 1>; + def : InstAlias<mnemonic # "\t$Zd, $Pn", + (!cast<Instruction>(NAME # _H) ZPRAny:$Zd, 0, PPR16:$Pn), 0>; + def : InstAlias<mnemonic # "\t$Zd, $Pn", + (!cast<Instruction>(NAME # _S) ZPRAny:$Zd, 0, PPR32:$Pn), 0>; + def : InstAlias<mnemonic # "\t$Zd, $Pn", + (!cast<Instruction>(NAME # _D) ZPRAny:$Zd, 0, PPR64:$Pn), 0>; // Merge def : Pat<(nxv8i16 (MergeOp (nxv8i16 ZPRAny:$Zd), (nxv8i1 PPR16:$Pn), (i32 timm32_1_1:$Idx))), diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td index d2a325d5ad89..df8c35ffd457 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -761,6 +761,12 @@ def FeatureShaderCyclesRegister : SubtargetFeature<"shader-cycles-register", "Has SHADER_CYCLES hardware register" >; +def FeatureShaderCyclesHiLoRegisters : SubtargetFeature<"shader-cycles-hi-lo-registers", + "HasShaderCyclesHiLoRegisters", + "true", + "Has SHADER_CYCLES_HI/LO hardware registers" +>; + def FeatureMadMacF32Insts : SubtargetFeature<"mad-mac-f32-insts", "HasMadMacF32Insts", "true", @@ -1469,7 +1475,7 @@ def FeatureISAVersion12 : FeatureSet< FeatureNSAEncoding, FeaturePartialNSAEncoding, FeatureWavefrontSize32, - FeatureShaderCyclesRegister, + FeatureShaderCyclesHiLoRegisters, FeatureArchitectedFlatScratch, FeatureAtomicFaddRtnInsts, FeatureAtomicFaddNoRtnInsts, @@ -1970,6 +1976,8 @@ def HasSMemTimeInst : Predicate<"Subtarget->hasSMemTimeInst()">, def HasShaderCyclesRegister : Predicate<"Subtarget->hasShaderCyclesRegister()">, AssemblerPredicate<(all_of FeatureShaderCyclesRegister)>; +def HasShaderCyclesHiLoRegisters : Predicate<"Subtarget->hasShaderCyclesHiLoRegisters()">; + def HasFP8Insts : Predicate<"Subtarget->hasFP8Insts()">, AssemblerPredicate<(all_of FeatureFP8Insts)>; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td b/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td index 9036b26a6f6b..c5207228dc91 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td @@ -22,28 +22,28 @@ def CC_SI_Gfx : CallingConv<[ // 32 is reserved for the stack pointer // 33 is reserved for the frame pointer // 34 is reserved for the base pointer - CCIfInReg<CCIfType<[f32, i32, f16, i16, v2i16, v2f16] , CCAssignToReg<[ + CCIfInReg<CCIfType<[f32, i32, f16, i16, v2i16, v2f16, bf16, v2bf16] , CCAssignToReg<[ SGPR4, SGPR5, SGPR6, SGPR7, SGPR8, SGPR9, SGPR10, SGPR11, SGPR12, SGPR13, SGPR14, SGPR15, SGPR16, SGPR17, SGPR18, SGPR19, SGPR20, SGPR21, SGPR22, SGPR23, SGPR24, SGPR25, SGPR26, SGPR27, SGPR28, SGPR29 ]>>>, - CCIfNotInReg<CCIfType<[f32, i32, f16, i16, v2i16, v2f16] , CCAssignToReg<[ + CCIfNotInReg<CCIfType<[f32, i32, f16, i16, v2i16, v2f16, bf16, v2bf16] , CCAssignToReg<[ VGPR0, VGPR1, VGPR2, VGPR3, VGPR4, VGPR5, VGPR6, VGPR7, VGPR8, VGPR9, VGPR10, VGPR11, VGPR12, VGPR13, VGPR14, VGPR15, VGPR16, VGPR17, VGPR18, VGPR19, VGPR20, VGPR21, VGPR22, VGPR23, VGPR24, VGPR25, VGPR26, VGPR27, VGPR28, VGPR29, VGPR30, VGPR31 ]>>>, - CCIfType<[i32, f32, v2i16, v2f16, i16, f16, i1], CCAssignToStack<4, 4>> + CCIfType<[i32, f32, v2i16, v2f16, i16, f16, i1, bf16, v2bf16], CCAssignToStack<4, 4>> ]>; def RetCC_SI_Gfx : CallingConv<[ CCIfType<[i1], CCPromoteToType<i32>>, CCIfType<[i1, i16], CCIfExtend<CCPromoteToType<i32>>>, - CCIfNotInReg<CCIfType<[f32, i32, f16, i16, v2i16, v2f16] , CCAssignToReg<[ + CCIfNotInReg<CCIfType<[f32, i32, f16, i16, v2i16, v2f16, bf16, v2bf16] , CCAssignToReg<[ VGPR0, VGPR1, VGPR2, VGPR3, VGPR4, VGPR5, VGPR6, VGPR7, VGPR8, VGPR9, VGPR10, VGPR11, VGPR12, VGPR13, VGPR14, VGPR15, VGPR16, VGPR17, VGPR18, VGPR19, VGPR20, VGPR21, VGPR22, VGPR23, @@ -66,7 +66,7 @@ def RetCC_SI_Gfx : CallingConv<[ def CC_SI_SHADER : CallingConv<[ - CCIfInReg<CCIfType<[f32, i32, f16, i16, v2i16, v2f16] , CCAssignToReg<[ + CCIfInReg<CCIfType<[f32, i32, f16, i16, v2i16, v2f16, bf16, v2bf16] , CCAssignToReg<[ SGPR0, SGPR1, SGPR2, SGPR3, SGPR4, SGPR5, SGPR6, SGPR7, SGPR8, SGPR9, SGPR10, SGPR11, SGPR12, SGPR13, SGPR14, SGPR15, SGPR16, SGPR17, SGPR18, SGPR19, SGPR20, SGPR21, SGPR22, SGPR23, @@ -76,7 +76,7 @@ def CC_SI_SHADER : CallingConv<[ ]>>>, // 32*4 + 4 is the minimum for a fetch shader consumer with 32 inputs. - CCIfNotInReg<CCIfType<[f32, i32, f16, i16, v2i16, v2f16] , CCAssignToReg<[ + CCIfNotInReg<CCIfType<[f32, i32, f16, i16, v2i16, v2f16, bf16, v2bf16] , CCAssignToReg<[ VGPR0, VGPR1, VGPR2, VGPR3, VGPR4, VGPR5, VGPR6, VGPR7, VGPR8, VGPR9, VGPR10, VGPR11, VGPR12, VGPR13, VGPR14, VGPR15, VGPR16, VGPR17, VGPR18, VGPR19, VGPR20, VGPR21, VGPR22, VGPR23, @@ -109,7 +109,7 @@ def RetCC_SI_Shader : CallingConv<[ ]>>, // 32*4 + 4 is the minimum for a fetch shader with 32 outputs. - CCIfType<[f32, f16, v2f16] , CCAssignToReg<[ + CCIfType<[f32, f16, v2f16, bf16, v2bf16] , CCAssignToReg<[ VGPR0, VGPR1, VGPR2, VGPR3, VGPR4, VGPR5, VGPR6, VGPR7, VGPR8, VGPR9, VGPR10, VGPR11, VGPR12, VGPR13, VGPR14, VGPR15, VGPR16, VGPR17, VGPR18, VGPR19, VGPR20, VGPR21, VGPR22, VGPR23, @@ -188,23 +188,23 @@ def CC_AMDGPU_Func : CallingConv<[ CCIfType<[i1], CCPromoteToType<i32>>, CCIfType<[i8, i16], CCIfExtend<CCPromoteToType<i32>>>, - CCIfInReg<CCIfType<[f32, i32, f16, i16, v2i16, v2f16] , CCAssignToReg< + CCIfInReg<CCIfType<[f32, i32, f16, i16, v2i16, v2f16, bf16, v2bf16] , CCAssignToReg< !foreach(i, !range(0, 30), !cast<Register>("SGPR"#i)) // SGPR0-29 >>>, - CCIfType<[i32, f32, i16, f16, v2i16, v2f16, i1], CCAssignToReg<[ + CCIfType<[i32, f32, i16, f16, v2i16, v2f16, i1, bf16, v2bf16], CCAssignToReg<[ VGPR0, VGPR1, VGPR2, VGPR3, VGPR4, VGPR5, VGPR6, VGPR7, VGPR8, VGPR9, VGPR10, VGPR11, VGPR12, VGPR13, VGPR14, VGPR15, VGPR16, VGPR17, VGPR18, VGPR19, VGPR20, VGPR21, VGPR22, VGPR23, VGPR24, VGPR25, VGPR26, VGPR27, VGPR28, VGPR29, VGPR30, VGPR31]>>, - CCIfType<[i32, f32, v2i16, v2f16, i16, f16, i1], CCAssignToStack<4, 4>> + CCIfType<[i32, f32, v2i16, v2f16, i16, f16, i1, bf16, v2bf16], CCAssignToStack<4, 4>> ]>; // Calling convention for leaf functions def RetCC_AMDGPU_Func : CallingConv<[ CCIfType<[i1], CCPromoteToType<i32>>, CCIfType<[i1, i16], CCIfExtend<CCPromoteToType<i32>>>, - CCIfType<[i32, f32, i16, f16, v2i16, v2f16], CCAssignToReg<[ + CCIfType<[i32, f32, i16, f16, v2i16, v2f16, bf16, v2bf16], CCAssignToReg<[ VGPR0, VGPR1, VGPR2, VGPR3, VGPR4, VGPR5, VGPR6, VGPR7, VGPR8, VGPR9, VGPR10, VGPR11, VGPR12, VGPR13, VGPR14, VGPR15, VGPR16, VGPR17, VGPR18, VGPR19, VGPR20, VGPR21, VGPR22, VGPR23, @@ -223,11 +223,11 @@ def CC_AMDGPU : CallingConv<[ ]>; def CC_AMDGPU_CS_CHAIN : CallingConv<[ - CCIfInReg<CCIfType<[f32, i32, f16, i16, v2i16, v2f16] , CCAssignToReg< + CCIfInReg<CCIfType<[f32, i32, f16, i16, v2i16, v2f16, bf16, v2bf16] , CCAssignToReg< !foreach(i, !range(105), !cast<Register>("SGPR"#i)) >>>, - CCIfNotInReg<CCIfType<[f32, i32, f16, i16, v2i16, v2f16] , CCAssignToReg< + CCIfNotInReg<CCIfType<[f32, i32, f16, i16, v2i16, v2f16, bf16, v2bf16] , CCAssignToReg< !foreach(i, !range(8, 255), !cast<Register>("VGPR"#i)) >>> ]>; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCombine.td b/llvm/lib/Target/AMDGPU/AMDGPUCombine.td index 8d4cad4c07bc..0c77fe725958 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUCombine.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUCombine.td @@ -104,6 +104,13 @@ def foldable_fneg : GICombineRule< [{ return Helper.matchFoldableFneg(*${ffn}, ${matchinfo}); }]), (apply [{ Helper.applyFoldableFneg(*${ffn}, ${matchinfo}); }])>; +// Detects s_mul_u64 instructions whose higher bits are zero/sign extended. +def smulu64 : GICombineRule< + (defs root:$smul, unsigned_matchinfo:$matchinfo), + (match (wip_match_opcode G_MUL):$smul, + [{ return matchCombine_s_mul_u64(*${smul}, ${matchinfo}); }]), + (apply [{ applyCombine_s_mul_u64(*${smul}, ${matchinfo}); }])>; + def sign_exension_in_reg_matchdata : GIDefMatchData<"MachineInstr *">; def sign_extension_in_reg : GICombineRule< @@ -149,7 +156,7 @@ def AMDGPUPostLegalizerCombiner: GICombiner< "AMDGPUPostLegalizerCombinerImpl", [all_combines, gfx6gfx7_combines, gfx8_combines, uchar_to_float, cvt_f32_ubyteN, remove_fcanonicalize, foldable_fneg, - rcp_sqrt_to_rsq, sign_extension_in_reg]> { + rcp_sqrt_to_rsq, sign_extension_in_reg, smulu64]> { let CombineAllMethodName = "tryCombineAllImpl"; } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp index b51a876750b5..74e9cd7d0965 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp @@ -646,7 +646,15 @@ void MetadataStreamerMsgPackV5::emitHiddenKernelArgs( Offset += 8; // Skipped. } - Offset += 72; // Reserved. + // Emit argument for hidden dynamic lds size + if (MFI.isDynamicLDSUsed()) { + emitKernelArg(DL, Int32Ty, Align(4), "hidden_dynamic_lds_size", Offset, + Args); + } else { + Offset += 4; // skipped + } + + Offset += 68; // Reserved. // hidden_private_base and hidden_shared_base are only when the subtarget has // ApertureRegs. diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp index bffea82ab8f4..719ae2e8750c 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -303,6 +303,7 @@ void AMDGPUDAGToDAGISel::PreprocessISelDAG() { switch (N->getOpcode()) { case ISD::BUILD_VECTOR: + // TODO: Match load d16 from shl (extload:i16), 16 MadeChange |= matchLoadD16FromBuildVector(N); break; default: @@ -317,26 +318,16 @@ void AMDGPUDAGToDAGISel::PreprocessISelDAG() { } } -bool AMDGPUDAGToDAGISel::isInlineImmediate(const SDNode *N, - bool Negated) const { +bool AMDGPUDAGToDAGISel::isInlineImmediate(const SDNode *N) const { if (N->isUndef()) return true; const SIInstrInfo *TII = Subtarget->getInstrInfo(); - if (Negated) { - if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N)) - return TII->isInlineConstant(-C->getAPIntValue()); + if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N)) + return TII->isInlineConstant(C->getAPIntValue()); - if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N)) - return TII->isInlineConstant(-C->getValueAPF().bitcastToAPInt()); - - } else { - if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N)) - return TII->isInlineConstant(C->getAPIntValue()); - - if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N)) - return TII->isInlineConstant(C->getValueAPF().bitcastToAPInt()); - } + if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N)) + return TII->isInlineConstant(C->getValueAPF().bitcastToAPInt()); return false; } @@ -382,7 +373,7 @@ const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N, Subtarget->getRegisterInfo()->getRegClass(RCID); SDValue SubRegOp = N->getOperand(OpNo + 1); - unsigned SubRegIdx = cast<ConstantSDNode>(SubRegOp)->getZExtValue(); + unsigned SubRegIdx = SubRegOp->getAsZExtVal(); return Subtarget->getRegisterInfo()->getSubClassWithSubReg(SuperRC, SubRegIdx); } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h index 374108af08cd..df4a211d42a0 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h @@ -50,15 +50,13 @@ static inline bool getConstantValue(SDValue N, uint32_t &Out) { } // TODO: Handle undef as zero -static inline SDNode *packConstantV2I16(const SDNode *N, SelectionDAG &DAG, - bool Negate = false) { +static inline SDNode *packConstantV2I16(const SDNode *N, SelectionDAG &DAG) { assert(N->getOpcode() == ISD::BUILD_VECTOR && N->getNumOperands() == 2); uint32_t LHSVal, RHSVal; if (getConstantValue(N->getOperand(0), LHSVal) && getConstantValue(N->getOperand(1), RHSVal)) { SDLoc SL(N); - uint32_t K = Negate ? (-LHSVal & 0xffff) | (-RHSVal << 16) - : (LHSVal & 0xffff) | (RHSVal << 16); + uint32_t K = (LHSVal & 0xffff) | (RHSVal << 16); return DAG.getMachineNode(AMDGPU::S_MOV_B32, SL, N->getValueType(0), DAG.getTargetConstant(K, SL, MVT::i32)); } @@ -66,9 +64,6 @@ static inline SDNode *packConstantV2I16(const SDNode *N, SelectionDAG &DAG, return nullptr; } -static inline SDNode *packNegConstantV2I16(const SDNode *N, SelectionDAG &DAG) { - return packConstantV2I16(N, DAG, true); -} } // namespace /// AMDGPU specific code to select AMDGPU machine instructions for @@ -110,10 +105,7 @@ protected: private: std::pair<SDValue, SDValue> foldFrameIndex(SDValue N) const; - bool isInlineImmediate(const SDNode *N, bool Negated = false) const; - bool isNegInlineImmediate(const SDNode *N) const { - return isInlineImmediate(N, true); - } + bool isInlineImmediate(const SDNode *N) const; bool isInlineImmediate16(int64_t Imm) const { return AMDGPU::isInlinableLiteral16(Imm, Subtarget->hasInv2PiInlineImm()); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index 8fbc90a6db9f..0dbcaf5a1b13 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -387,17 +387,20 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM, MVT::v9i32, MVT::v9f32, MVT::v10i32, MVT::v10f32, MVT::v11i32, MVT::v11f32, MVT::v12i32, MVT::v12f32}, Custom); + + // FIXME: Why is v8f16/v8bf16 missing? setOperationAction( ISD::EXTRACT_SUBVECTOR, - {MVT::v2f16, MVT::v2i16, MVT::v4f16, MVT::v4i16, MVT::v2f32, - MVT::v2i32, MVT::v3f32, MVT::v3i32, MVT::v4f32, MVT::v4i32, - MVT::v5f32, MVT::v5i32, MVT::v6f32, MVT::v6i32, MVT::v7f32, - MVT::v7i32, MVT::v8f32, MVT::v8i32, MVT::v9f32, MVT::v9i32, - MVT::v10i32, MVT::v10f32, MVT::v11i32, MVT::v11f32, MVT::v12i32, - MVT::v12f32, MVT::v16f16, MVT::v16i16, MVT::v16f32, MVT::v16i32, - MVT::v32f32, MVT::v32i32, MVT::v2f64, MVT::v2i64, MVT::v3f64, - MVT::v3i64, MVT::v4f64, MVT::v4i64, MVT::v8f64, MVT::v8i64, - MVT::v16f64, MVT::v16i64, MVT::v32i16, MVT::v32f16}, + {MVT::v2f16, MVT::v2bf16, MVT::v2i16, MVT::v4f16, MVT::v4bf16, + MVT::v4i16, MVT::v2f32, MVT::v2i32, MVT::v3f32, MVT::v3i32, + MVT::v4f32, MVT::v4i32, MVT::v5f32, MVT::v5i32, MVT::v6f32, + MVT::v6i32, MVT::v7f32, MVT::v7i32, MVT::v8f32, MVT::v8i32, + MVT::v9f32, MVT::v9i32, MVT::v10i32, MVT::v10f32, MVT::v11i32, + MVT::v11f32, MVT::v12i32, MVT::v12f32, MVT::v16f16, MVT::v16bf16, + MVT::v16i16, MVT::v16f32, MVT::v16i32, MVT::v32f32, MVT::v32i32, + MVT::v2f64, MVT::v2i64, MVT::v3f64, MVT::v3i64, MVT::v4f64, + MVT::v4i64, MVT::v8f64, MVT::v8i64, MVT::v16f64, MVT::v16i64, + MVT::v32i16, MVT::v32f16, MVT::v32bf16}, Custom); setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand); @@ -3281,7 +3284,15 @@ SDValue AMDGPUTargetLowering::LowerUINT_TO_FP(SDValue Op, return DAG.getNode(ISD::UINT_TO_FP, DL, DestVT, Ext); } - assert(SrcVT == MVT::i64 && "operation should be legal"); + if (DestVT == MVT::bf16) { + SDLoc SL(Op); + SDValue ToF32 = DAG.getNode(ISD::UINT_TO_FP, SL, MVT::f32, Src); + SDValue FPRoundFlag = DAG.getIntPtrConstant(0, SL, /*isTarget=*/true); + return DAG.getNode(ISD::FP_ROUND, SL, MVT::bf16, ToF32, FPRoundFlag); + } + + if (SrcVT != MVT::i64) + return Op; if (Subtarget->has16BitInsts() && DestVT == MVT::f16) { SDLoc DL(Op); @@ -3319,7 +3330,15 @@ SDValue AMDGPUTargetLowering::LowerSINT_TO_FP(SDValue Op, return DAG.getNode(ISD::SINT_TO_FP, DL, DestVT, Ext); } - assert(SrcVT == MVT::i64 && "operation should be legal"); + if (DestVT == MVT::bf16) { + SDLoc SL(Op); + SDValue ToF32 = DAG.getNode(ISD::SINT_TO_FP, SL, MVT::f32, Src); + SDValue FPRoundFlag = DAG.getIntPtrConstant(0, SL, /*isTarget=*/true); + return DAG.getNode(ISD::FP_ROUND, SL, MVT::bf16, ToF32, FPRoundFlag); + } + + if (SrcVT != MVT::i64) + return Op; // TODO: Factor out code common with LowerUINT_TO_FP. @@ -3517,7 +3536,7 @@ SDValue AMDGPUTargetLowering::LowerFP_TO_FP16(SDValue Op, SelectionDAG &DAG) con return DAG.getZExtOrTrunc(V, DL, Op.getValueType()); } -SDValue AMDGPUTargetLowering::LowerFP_TO_INT(SDValue Op, +SDValue AMDGPUTargetLowering::LowerFP_TO_INT(const SDValue Op, SelectionDAG &DAG) const { SDValue Src = Op.getOperand(0); unsigned OpOpcode = Op.getOpcode(); @@ -3528,6 +3547,12 @@ SDValue AMDGPUTargetLowering::LowerFP_TO_INT(SDValue Op, if (SrcVT == MVT::f16 && DestVT == MVT::i16) return Op; + if (SrcVT == MVT::bf16) { + SDLoc DL(Op); + SDValue PromotedSrc = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Src); + return DAG.getNode(Op.getOpcode(), DL, DestVT, PromotedSrc); + } + // Promote i16 to i32 if (DestVT == MVT::i16 && (SrcVT == MVT::f32 || SrcVT == MVT::f64)) { SDLoc DL(Op); @@ -3536,6 +3561,9 @@ SDValue AMDGPUTargetLowering::LowerFP_TO_INT(SDValue Op, return DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, FpToInt32); } + if (DestVT != MVT::i64) + return Op; + if (SrcVT == MVT::f16 || (SrcVT == MVT::f32 && Src.getOpcode() == ISD::FP16_TO_FP)) { SDLoc DL(Op); @@ -3546,7 +3574,7 @@ SDValue AMDGPUTargetLowering::LowerFP_TO_INT(SDValue Op, return DAG.getNode(Ext, DL, MVT::i64, FpToInt32); } - if (DestVT == MVT::i64 && (SrcVT == MVT::f32 || SrcVT == MVT::f64)) + if (SrcVT == MVT::f32 || SrcVT == MVT::f64) return LowerFP_TO_INT64(Op, DAG, OpOpcode == ISD::FP_TO_SINT); return SDValue(); @@ -4947,7 +4975,9 @@ SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N, // vnt1 = build_vector (t1 (bitcast t0:x)), (t1 (bitcast t0:y)) if (DestVT.isVector()) { SDValue Src = N->getOperand(0); - if (Src.getOpcode() == ISD::BUILD_VECTOR) { + if (Src.getOpcode() == ISD::BUILD_VECTOR && + (DCI.getDAGCombineLevel() < AfterLegalizeDAG || + isOperationLegal(ISD::BUILD_VECTOR, DestVT))) { EVT SrcVT = Src.getValueType(); unsigned NElts = DestVT.getVectorNumElements(); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index 88ef4b577424..ad8dcda93c36 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -2764,7 +2764,9 @@ static bool isConstant(const MachineInstr &MI) { void AMDGPUInstructionSelector::getAddrModeInfo(const MachineInstr &Load, const MachineRegisterInfo &MRI, SmallVectorImpl<GEPInfo> &AddrInfo) const { - const MachineInstr *PtrMI = MRI.getUniqueVRegDef(Load.getOperand(1).getReg()); + unsigned OpNo = Load.getOpcode() == AMDGPU::G_PREFETCH ? 0 : 1; + const MachineInstr *PtrMI = + MRI.getUniqueVRegDef(Load.getOperand(OpNo).getReg()); assert(PtrMI); @@ -2817,6 +2819,10 @@ bool AMDGPUInstructionSelector::isInstrUniform(const MachineInstr &MI) const { if (MMO->getAddrSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT) return true; + if (MI.getOpcode() == AMDGPU::G_PREFETCH) + return RBI.getRegBank(MI.getOperand(0).getReg(), *MRI, TRI)->getID() == + AMDGPU::SGPRRegBankID; + const Instruction *I = dyn_cast<Instruction>(Ptr); return I && I->getMetadata("amdgpu.uniform"); } diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index fbee28889451..aa235c07e995 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -701,13 +701,23 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, .maxScalar(0, S32); } - getActionDefinitionsBuilder(G_MUL) - .legalFor({S32, S16, V2S16}) - .clampMaxNumElementsStrict(0, S16, 2) - .scalarize(0) - .minScalar(0, S16) - .widenScalarToNextMultipleOf(0, 32) - .custom(); + if (ST.hasScalarSMulU64()) { + getActionDefinitionsBuilder(G_MUL) + .legalFor({S64, S32, S16, V2S16}) + .clampMaxNumElementsStrict(0, S16, 2) + .scalarize(0) + .minScalar(0, S16) + .widenScalarToNextMultipleOf(0, 32) + .custom(); + } else { + getActionDefinitionsBuilder(G_MUL) + .legalFor({S32, S16, V2S16}) + .clampMaxNumElementsStrict(0, S16, 2) + .scalarize(0) + .minScalar(0, S16) + .widenScalarToNextMultipleOf(0, 32) + .custom(); + } assert(ST.hasMad64_32()); getActionDefinitionsBuilder({G_UADDSAT, G_USUBSAT, G_SADDSAT, G_SSUBSAT}) @@ -1996,8 +2006,9 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, verify(*ST.getInstrInfo()); } -bool AMDGPULegalizerInfo::legalizeCustom(LegalizerHelper &Helper, - MachineInstr &MI) const { +bool AMDGPULegalizerInfo::legalizeCustom( + LegalizerHelper &Helper, MachineInstr &MI, + LostDebugLocObserver &LocObserver) const { MachineIRBuilder &B = Helper.MIRBuilder; MachineRegisterInfo &MRI = *B.getMRI(); diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h index 855fa0ddc214..56aabd4f6ab7 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h @@ -27,7 +27,6 @@ class MachineIRBuilder; namespace AMDGPU { struct ImageDimIntrinsicInfo; } -/// This class provides the information for the target register banks. class AMDGPULegalizerInfo final : public LegalizerInfo { const GCNSubtarget &ST; @@ -35,7 +34,8 @@ public: AMDGPULegalizerInfo(const GCNSubtarget &ST, const GCNTargetMachine &TM); - bool legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI) const override; + bool legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI, + LostDebugLocObserver &LocObserver) const override; Register getSegmentAperture(unsigned AddrSpace, MachineRegisterInfo &MRI, diff --git a/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp b/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp index f03e6b8915b1..1b2f74cf153b 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp @@ -87,9 +87,6 @@ private: Constant *copr0, Constant *copr1); bool evaluateCall(CallInst *aCI, const FuncInfo &FInfo); - // sqrt - bool fold_sqrt(FPMathOperator *FPOp, IRBuilder<> &B, const FuncInfo &FInfo); - /// Insert a value to sincos function \p Fsincos. Returns (value of sin, value /// of cos, sincos call). std::tuple<Value *, Value *, Value *> insertSinCos(Value *Arg, @@ -672,8 +669,6 @@ bool AMDGPULibCalls::fold(CallInst *CI) { // Specialized optimizations for each function call. // - // TODO: Handle other simple intrinsic wrappers. Sqrt. - // // TODO: Handle native functions switch (FInfo.getId()) { case AMDGPULibFunc::EI_EXP: @@ -794,7 +789,9 @@ bool AMDGPULibCalls::fold(CallInst *CI) { case AMDGPULibFunc::EI_ROOTN: return fold_rootn(FPOp, B, FInfo); case AMDGPULibFunc::EI_SQRT: - return fold_sqrt(FPOp, B, FInfo); + // TODO: Allow with strictfp + constrained intrinsic + return tryReplaceLibcallWithSimpleIntrinsic( + B, CI, Intrinsic::sqrt, true, true, /*AllowStrictFP=*/false); case AMDGPULibFunc::EI_COS: case AMDGPULibFunc::EI_SIN: return fold_sincos(FPOp, B, FInfo); @@ -1273,29 +1270,6 @@ bool AMDGPULibCalls::tryReplaceLibcallWithSimpleIntrinsic( return true; } -// fold sqrt -> native_sqrt (x) -bool AMDGPULibCalls::fold_sqrt(FPMathOperator *FPOp, IRBuilder<> &B, - const FuncInfo &FInfo) { - if (!isUnsafeMath(FPOp)) - return false; - - if (getArgType(FInfo) == AMDGPULibFunc::F32 && (getVecSize(FInfo) == 1) && - (FInfo.getPrefix() != AMDGPULibFunc::NATIVE)) { - Module *M = B.GetInsertBlock()->getModule(); - - if (FunctionCallee FPExpr = getNativeFunction( - M, AMDGPULibFunc(AMDGPULibFunc::EI_SQRT, FInfo))) { - Value *opr0 = FPOp->getOperand(0); - LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> " - << "sqrt(" << *opr0 << ")\n"); - Value *nval = CreateCallEx(B,FPExpr, opr0, "__sqrt"); - replaceCall(FPOp, nval); - return true; - } - } - return false; -} - std::tuple<Value *, Value *, Value *> AMDGPULibCalls::insertSinCos(Value *Arg, FastMathFlags FMF, IRBuilder<> &B, FunctionCallee Fsincos) { diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp index 323462e60a29..31777295b4f8 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp @@ -19,6 +19,26 @@ using namespace llvm; +static const GlobalVariable * +getKernelDynLDSGlobalFromFunction(const Function &F) { + const Module *M = F.getParent(); + SmallString<64> KernelDynLDSName("llvm.amdgcn."); + KernelDynLDSName += F.getName(); + KernelDynLDSName += ".dynlds"; + return M->getNamedGlobal(KernelDynLDSName); +} + +static bool hasLDSKernelArgument(const Function &F) { + for (const Argument &Arg : F.args()) { + Type *ArgTy = Arg.getType(); + if (auto PtrTy = dyn_cast<PointerType>(ArgTy)) { + if (PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) + return true; + } + } + return false; +} + AMDGPUMachineFunction::AMDGPUMachineFunction(const Function &F, const AMDGPUSubtarget &ST) : IsEntryFunction(AMDGPU::isEntryFunctionCC(F.getCallingConv())), @@ -65,6 +85,10 @@ AMDGPUMachineFunction::AMDGPUMachineFunction(const Function &F, Attribute NSZAttr = F.getFnAttribute("no-signed-zeros-fp-math"); NoSignedZerosFPMath = NSZAttr.isStringAttribute() && NSZAttr.getValueAsString() == "true"; + + const GlobalVariable *DynLdsGlobal = getKernelDynLDSGlobalFromFunction(F); + if (DynLdsGlobal || hasLDSKernelArgument(F)) + UsesDynamicLDS = true; } unsigned AMDGPUMachineFunction::allocateLDSGlobal(const DataLayout &DL, @@ -139,15 +163,6 @@ unsigned AMDGPUMachineFunction::allocateLDSGlobal(const DataLayout &DL, return Offset; } -static const GlobalVariable * -getKernelDynLDSGlobalFromFunction(const Function &F) { - const Module *M = F.getParent(); - std::string KernelDynLDSName = "llvm.amdgcn."; - KernelDynLDSName += F.getName(); - KernelDynLDSName += ".dynlds"; - return M->getNamedGlobal(KernelDynLDSName); -} - std::optional<uint32_t> AMDGPUMachineFunction::getLDSKernelIdMetadata(const Function &F) { // TODO: Would be more consistent with the abs symbols to use a range @@ -210,3 +225,9 @@ void AMDGPUMachineFunction::setDynLDSAlign(const Function &F, } } } + +void AMDGPUMachineFunction::setUsesDynamicLDS(bool DynLDS) { + UsesDynamicLDS = DynLDS; +} + +bool AMDGPUMachineFunction::isDynamicLDSUsed() const { return UsesDynamicLDS; } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h b/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h index 248ee26a47eb..7efb7f825348 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h @@ -46,6 +46,9 @@ protected: /// stages. Align DynLDSAlign; + // Flag to check dynamic LDS usage by kernel. + bool UsesDynamicLDS = false; + // Kernels + shaders. i.e. functions called by the hardware and not called // by other functions. bool IsEntryFunction = false; @@ -119,6 +122,10 @@ public: Align getDynLDSAlign() const { return DynLDSAlign; } void setDynLDSAlign(const Function &F, const GlobalVariable &GV); + + void setUsesDynamicLDS(bool DynLDS); + + bool isDynamicLDSUsed() const; }; } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp index 7b18e1f805d8..21bfab52c6c4 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp @@ -104,6 +104,14 @@ public: void applyCombineSignExtendInReg(MachineInstr &MI, MachineInstr *&MatchInfo) const; + // Find the s_mul_u64 instructions where the higher bits are either + // zero-extended or sign-extended. + bool matchCombine_s_mul_u64(MachineInstr &MI, unsigned &NewOpcode) const; + // Replace the s_mul_u64 instructions with S_MUL_I64_I32_PSEUDO if the higher + // 33 bits are sign extended and with S_MUL_U64_U32_PSEUDO if the higher 32 + // bits are zero extended. + void applyCombine_s_mul_u64(MachineInstr &MI, unsigned &NewOpcode) const; + private: #define GET_GICOMBINER_CLASS_MEMBERS #define AMDGPUSubtarget GCNSubtarget @@ -419,6 +427,32 @@ void AMDGPUPostLegalizerCombinerImpl::applyCombineSignExtendInReg( MI.eraseFromParent(); } +bool AMDGPUPostLegalizerCombinerImpl::matchCombine_s_mul_u64( + MachineInstr &MI, unsigned &NewOpcode) const { + Register Src0 = MI.getOperand(1).getReg(); + Register Src1 = MI.getOperand(2).getReg(); + if (MRI.getType(Src0) != LLT::scalar(64)) + return false; + + if (KB->getKnownBits(Src1).countMinLeadingZeros() >= 32 && + KB->getKnownBits(Src0).countMinLeadingZeros() >= 32) { + NewOpcode = AMDGPU::G_AMDGPU_S_MUL_U64_U32; + return true; + } + + if (KB->computeNumSignBits(Src1) >= 33 && + KB->computeNumSignBits(Src0) >= 33) { + NewOpcode = AMDGPU::G_AMDGPU_S_MUL_I64_I32; + return true; + } + return false; +} + +void AMDGPUPostLegalizerCombinerImpl::applyCombine_s_mul_u64( + MachineInstr &MI, unsigned &NewOpcode) const { + Helper.replaceOpcodeWith(MI, NewOpcode); +} + // Pass boilerplate // ================ diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp index fba060464a6e..391c2b9ec256 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -441,7 +441,7 @@ AMDGPURegisterBankInfo::getInstrAlternativeMappingsIntrinsicWSideEffects( // FIXME: Returns uniform if there's no source value information. This is // probably wrong. -static bool isScalarLoadLegal(const MachineInstr &MI) { +bool AMDGPURegisterBankInfo::isScalarLoadLegal(const MachineInstr &MI) const { if (!MI.hasOneMemOperand()) return false; @@ -2094,6 +2094,74 @@ bool AMDGPURegisterBankInfo::foldInsertEltToCmpSelect( return true; } +// Break s_mul_u64 into 32-bit vector operations. +void AMDGPURegisterBankInfo::applyMappingSMULU64( + MachineIRBuilder &B, const OperandsMapper &OpdMapper) const { + SmallVector<Register, 2> DefRegs(OpdMapper.getVRegs(0)); + SmallVector<Register, 2> Src0Regs(OpdMapper.getVRegs(1)); + SmallVector<Register, 2> Src1Regs(OpdMapper.getVRegs(2)); + + // All inputs are SGPRs, nothing special to do. + if (DefRegs.empty()) { + assert(Src0Regs.empty() && Src1Regs.empty()); + applyDefaultMapping(OpdMapper); + return; + } + + assert(DefRegs.size() == 2); + assert(Src0Regs.size() == Src1Regs.size() && + (Src0Regs.empty() || Src0Regs.size() == 2)); + + MachineRegisterInfo &MRI = OpdMapper.getMRI(); + MachineInstr &MI = OpdMapper.getMI(); + Register DstReg = MI.getOperand(0).getReg(); + LLT HalfTy = LLT::scalar(32); + + // Depending on where the source registers came from, the generic code may + // have decided to split the inputs already or not. If not, we still need to + // extract the values. + + if (Src0Regs.empty()) + split64BitValueForMapping(B, Src0Regs, HalfTy, MI.getOperand(1).getReg()); + else + setRegsToType(MRI, Src0Regs, HalfTy); + + if (Src1Regs.empty()) + split64BitValueForMapping(B, Src1Regs, HalfTy, MI.getOperand(2).getReg()); + else + setRegsToType(MRI, Src1Regs, HalfTy); + + setRegsToType(MRI, DefRegs, HalfTy); + + // The multiplication is done as follows: + // + // Op1H Op1L + // * Op0H Op0L + // -------------------- + // Op1H*Op0L Op1L*Op0L + // + Op1H*Op0H Op1L*Op0H + // ----------------------------------------- + // (Op1H*Op0L + Op1L*Op0H + carry) Op1L*Op0L + // + // We drop Op1H*Op0H because the result of the multiplication is a 64-bit + // value and that would overflow. + // The low 32-bit value is Op1L*Op0L. + // The high 32-bit value is Op1H*Op0L + Op1L*Op0H + carry (from + // Op1L*Op0L). + + ApplyRegBankMapping ApplyBank(B, *this, MRI, &AMDGPU::VGPRRegBank); + + Register Hi = B.buildUMulH(HalfTy, Src0Regs[0], Src1Regs[0]).getReg(0); + Register MulLoHi = B.buildMul(HalfTy, Src0Regs[0], Src1Regs[1]).getReg(0); + Register Add = B.buildAdd(HalfTy, Hi, MulLoHi).getReg(0); + Register MulHiLo = B.buildMul(HalfTy, Src0Regs[1], Src1Regs[0]).getReg(0); + B.buildAdd(DefRegs[1], Add, MulHiLo); + B.buildMul(DefRegs[0], Src0Regs[0], Src1Regs[0]); + + MRI.setRegBank(DstReg, AMDGPU::VGPRRegBank); + MI.eraseFromParent(); +} + void AMDGPURegisterBankInfo::applyMappingImpl( MachineIRBuilder &B, const OperandsMapper &OpdMapper) const { MachineInstr &MI = OpdMapper.getMI(); @@ -2394,13 +2462,21 @@ void AMDGPURegisterBankInfo::applyMappingImpl( Register DstReg = MI.getOperand(0).getReg(); LLT DstTy = MRI.getType(DstReg); + // Special case for s_mul_u64. There is not a vector equivalent of + // s_mul_u64. Hence, we have to break down s_mul_u64 into 32-bit vector + // multiplications. + if (Opc == AMDGPU::G_MUL && DstTy.getSizeInBits() == 64) { + applyMappingSMULU64(B, OpdMapper); + return; + } + // 16-bit operations are VALU only, but can be promoted to 32-bit SALU. // Packed 16-bit operations need to be scalarized and promoted. if (DstTy != LLT::scalar(16) && DstTy != LLT::fixed_vector(2, 16)) break; const RegisterBank *DstBank = - OpdMapper.getInstrMapping().getOperandMapping(0).BreakDown[0].RegBank; + OpdMapper.getInstrMapping().getOperandMapping(0).BreakDown[0].RegBank; if (DstBank == &AMDGPU::VGPRRegBank) break; @@ -2451,6 +2527,72 @@ void AMDGPURegisterBankInfo::applyMappingImpl( return; } + case AMDGPU::G_AMDGPU_S_MUL_I64_I32: + case AMDGPU::G_AMDGPU_S_MUL_U64_U32: { + // This is a special case for s_mul_u64. We use + // G_AMDGPU_S_MUL_I64_I32 opcode to represent an s_mul_u64 operation + // where the 33 higher bits are sign-extended and + // G_AMDGPU_S_MUL_U64_U32 opcode to represent an s_mul_u64 operation + // where the 32 higher bits are zero-extended. In case scalar registers are + // selected, both opcodes are lowered as s_mul_u64. If the vector registers + // are selected, then G_AMDGPU_S_MUL_I64_I32 and + // G_AMDGPU_S_MUL_U64_U32 are lowered with a vector mad instruction. + + // Insert basic copies. + applyDefaultMapping(OpdMapper); + + Register DstReg = MI.getOperand(0).getReg(); + Register SrcReg0 = MI.getOperand(1).getReg(); + Register SrcReg1 = MI.getOperand(2).getReg(); + const LLT S32 = LLT::scalar(32); + const LLT S64 = LLT::scalar(64); + assert(MRI.getType(DstReg) == S64 && "This is a special case for s_mul_u64 " + "that handles only 64-bit operands."); + const RegisterBank *DstBank = + OpdMapper.getInstrMapping().getOperandMapping(0).BreakDown[0].RegBank; + + // Replace G_AMDGPU_S_MUL_I64_I32 and G_AMDGPU_S_MUL_U64_U32 + // with s_mul_u64 operation. + if (DstBank == &AMDGPU::SGPRRegBank) { + MI.setDesc(TII->get(AMDGPU::S_MUL_U64)); + MRI.setRegClass(DstReg, &AMDGPU::SGPR_64RegClass); + MRI.setRegClass(SrcReg0, &AMDGPU::SGPR_64RegClass); + MRI.setRegClass(SrcReg1, &AMDGPU::SGPR_64RegClass); + return; + } + + // Replace G_AMDGPU_S_MUL_I64_I32 and G_AMDGPU_S_MUL_U64_U32 + // with a vector mad. + assert(MRI.getRegBankOrNull(DstReg) == &AMDGPU::VGPRRegBank && + "The destination operand should be in vector registers."); + + DebugLoc DL = MI.getDebugLoc(); + + // Extract the lower subregister from the first operand. + Register Op0L = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + MRI.setRegClass(Op0L, &AMDGPU::VGPR_32RegClass); + MRI.setType(Op0L, S32); + B.buildTrunc(Op0L, SrcReg0); + + // Extract the lower subregister from the second operand. + Register Op1L = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + MRI.setRegClass(Op1L, &AMDGPU::VGPR_32RegClass); + MRI.setType(Op1L, S32); + B.buildTrunc(Op1L, SrcReg1); + + unsigned NewOpc = Opc == AMDGPU::G_AMDGPU_S_MUL_U64_U32 + ? AMDGPU::G_AMDGPU_MAD_U64_U32 + : AMDGPU::G_AMDGPU_MAD_I64_I32; + + MachineIRBuilder B(MI); + Register Zero64 = B.buildConstant(S64, 0).getReg(0); + MRI.setRegClass(Zero64, &AMDGPU::VReg_64RegClass); + Register CarryOut = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass); + MRI.setRegClass(CarryOut, &AMDGPU::VReg_64RegClass); + B.buildInstr(NewOpc, {DstReg, CarryOut}, {Op0L, Op1L, Zero64}); + MI.eraseFromParent(); + return; + } case AMDGPU::G_SEXT_INREG: { SmallVector<Register, 2> SrcRegs(OpdMapper.getVRegs(1)); if (SrcRegs.empty()) @@ -3263,17 +3405,19 @@ void AMDGPURegisterBankInfo::applyMappingImpl( MI.eraseFromParent(); return; } - unsigned PtrBank = - getRegBankID(MI.getOperand(0).getReg(), MRI, AMDGPU::SGPRRegBankID); + Register PtrReg = MI.getOperand(0).getReg(); + unsigned PtrBank = getRegBankID(PtrReg, MRI, AMDGPU::SGPRRegBankID); if (PtrBank == AMDGPU::VGPRRegBankID) { MI.eraseFromParent(); return; } - // FIXME: There is currently no support for prefetch in global isel. - // There is no node equivalence and what's worse there is no MMO produced - // for a prefetch on global isel path. - // Prefetch does not affect execution so erase it for now. - MI.eraseFromParent(); + unsigned AS = MRI.getType(PtrReg).getAddressSpace(); + if (!AMDGPU::isFlatGlobalAddrSpace(AS) && + AS != AMDGPUAS::CONSTANT_ADDRESS_32BIT) { + MI.eraseFromParent(); + return; + } + applyDefaultMapping(OpdMapper); return; } default: @@ -3667,7 +3811,8 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { case AMDGPU::G_AND: case AMDGPU::G_OR: - case AMDGPU::G_XOR: { + case AMDGPU::G_XOR: + case AMDGPU::G_MUL: { unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); if (Size == 1) { const RegisterBank *DstBank @@ -3735,7 +3880,6 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { case AMDGPU::G_PTRMASK: case AMDGPU::G_ADD: case AMDGPU::G_SUB: - case AMDGPU::G_MUL: case AMDGPU::G_SHL: case AMDGPU::G_LSHR: case AMDGPU::G_ASHR: @@ -3753,6 +3897,8 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { case AMDGPU::G_SHUFFLE_VECTOR: case AMDGPU::G_SBFX: case AMDGPU::G_UBFX: + case AMDGPU::G_AMDGPU_S_MUL_I64_I32: + case AMDGPU::G_AMDGPU_S_MUL_U64_U32: if (isSALUMapping(MI)) return getDefaultMappingSOP(MI); return getDefaultMappingVOP(MI); diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h index b5d16e70ab23..5f550b426ec0 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h +++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h @@ -84,6 +84,9 @@ public: bool applyMappingMAD_64_32(MachineIRBuilder &B, const OperandsMapper &OpdMapper) const; + void applyMappingSMULU64(MachineIRBuilder &B, + const OperandsMapper &OpdMapper) const; + Register handleD16VData(MachineIRBuilder &B, MachineRegisterInfo &MRI, Register Reg) const; @@ -173,6 +176,8 @@ public: const RegisterBank &getRegBankFromRegClass(const TargetRegisterClass &RC, LLT) const override; + bool isScalarLoadLegal(const MachineInstr &MI) const; + InstructionMappings getInstrAlternativeMappings(const MachineInstr &MI) const override; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index fdc2077868cf..0f3bb3e7b0d8 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -620,7 +620,8 @@ void AMDGPUTargetMachine::registerDefaultAliasAnalyses(AAManager &AAM) { AAM.registerFunctionAnalysis<AMDGPUAA>(); } -void AMDGPUTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) { +void AMDGPUTargetMachine::registerPassBuilderCallbacks( + PassBuilder &PB, bool PopulateClassToPassNames) { PB.registerPipelineParsingCallback( [this](StringRef PassName, ModulePassManager &PM, ArrayRef<PassBuilder::PipelineElement>) { diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h index 9051a61e6557..99c9db3e654a 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h @@ -51,7 +51,8 @@ public: return TLOF.get(); } - void registerPassBuilderCallbacks(PassBuilder &PB) override; + void registerPassBuilderCallbacks(PassBuilder &PB, + bool PopulateClassToPassNames) override; void registerDefaultAliasAnalyses(AAManager &) override; /// Get the integer value of a null pointer in the given address space. diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp index abd7e911beef..b7f043860115 100644 --- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -166,6 +166,8 @@ public: ImmTyEndpgm, ImmTyWaitVDST, ImmTyWaitEXP, + ImmTyWaitVAVDst, + ImmTyWaitVMVSrc, }; // Immediate operand kind. @@ -909,6 +911,8 @@ public: bool isEndpgm() const; bool isWaitVDST() const; bool isWaitEXP() const; + bool isWaitVAVDst() const; + bool isWaitVMVSrc() const; auto getPredicate(std::function<bool(const AMDGPUOperand &Op)> P) const { return std::bind(P, *this); @@ -1029,6 +1033,7 @@ public: } static void printImmTy(raw_ostream& OS, ImmTy Type) { + // clang-format off switch (Type) { case ImmTyNone: OS << "None"; break; case ImmTyGDS: OS << "GDS"; break; @@ -1086,7 +1091,10 @@ public: case ImmTyEndpgm: OS << "Endpgm"; break; case ImmTyWaitVDST: OS << "WaitVDST"; break; case ImmTyWaitEXP: OS << "WaitEXP"; break; + case ImmTyWaitVAVDst: OS << "WaitVAVDst"; break; + case ImmTyWaitVMVSrc: OS << "WaitVMVSrc"; break; } + // clang-format on } void print(raw_ostream &OS) const override { @@ -1857,6 +1865,9 @@ static const fltSemantics *getOpFltSemantics(uint8_t OperandType) { case AMDGPU::OPERAND_REG_IMM_V2FP32: case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: case AMDGPU::OPERAND_REG_IMM_V2INT32: + case AMDGPU::OPERAND_REG_IMM_V2INT16: + case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: + case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: case AMDGPU::OPERAND_KIMM32: case AMDGPU::OPERAND_INLINE_SPLIT_BARRIER_INT32: return &APFloat::IEEEsingle(); @@ -1871,13 +1882,10 @@ static const fltSemantics *getOpFltSemantics(uint8_t OperandType) { case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: case AMDGPU::OPERAND_REG_INLINE_C_INT16: case AMDGPU::OPERAND_REG_INLINE_C_FP16: - case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: case AMDGPU::OPERAND_REG_INLINE_AC_INT16: case AMDGPU::OPERAND_REG_INLINE_AC_FP16: - case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: - case AMDGPU::OPERAND_REG_IMM_V2INT16: case AMDGPU::OPERAND_REG_IMM_V2FP16: case AMDGPU::OPERAND_KIMM16: return &APFloat::IEEEhalf(); @@ -2025,9 +2033,14 @@ bool AMDGPUOperand::isLiteralImm(MVT type) const { // We allow fp literals with f16x2 operands assuming that the specified // literal goes into the lower half and the upper half is zero. We also // require that the literal may be losslessly converted to f16. - MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 : - (type == MVT::v2i16)? MVT::i16 : - (type == MVT::v2f32)? MVT::f32 : type; + // + // For i16x2 operands, we assume that the specified literal is encoded as a + // single-precision float. This is pretty odd, but it matches SP3 and what + // happens in hardware. + MVT ExpectedType = (type == MVT::v2f16) ? MVT::f16 + : (type == MVT::v2i16) ? MVT::f32 + : (type == MVT::v2f32) ? MVT::f32 + : type; APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); return canLosslesslyConvertToFPType(FPLiteral, ExpectedType); @@ -3393,12 +3406,12 @@ bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst, if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 || OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 || OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16) - return AMDGPU::isInlinableIntLiteralV216(Val); + return AMDGPU::isInlinableLiteralV2I16(Val); if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 || OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 || OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16) - return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm()); + return AMDGPU::isInlinableLiteralV2F16(Val); return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm()); } @@ -9192,6 +9205,14 @@ bool AMDGPUOperand::isWaitVDST() const { return isImmTy(ImmTyWaitVDST) && isUInt<4>(getImm()); } +bool AMDGPUOperand::isWaitVAVDst() const { + return isImmTy(ImmTyWaitVAVDst) && isUInt<4>(getImm()); +} + +bool AMDGPUOperand::isWaitVMVSrc() const { + return isImmTy(ImmTyWaitVMVSrc) && isUInt<1>(getImm()); +} + //===----------------------------------------------------------------------===// // VINTERP //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/AMDGPU/BUFInstructions.td b/llvm/lib/Target/AMDGPU/BUFInstructions.td index 43d35fa5291c..9e99d382ed9b 100644 --- a/llvm/lib/Target/AMDGPU/BUFInstructions.td +++ b/llvm/lib/Target/AMDGPU/BUFInstructions.td @@ -503,7 +503,6 @@ class MUBUF_Load_Pseudo <string opName, let has_vdata = !not(!or(isLds, isLdsOpc)); let mayLoad = 1; let mayStore = isLds; - let maybeAtomic = 1; let Uses = !if(!or(isLds, isLdsOpc) , [EXEC, M0], [EXEC]); let tfe = isTFE; let lds = isLds; @@ -610,7 +609,6 @@ class MUBUF_Store_Pseudo <string opName, getAddrName<addrKindCopy>.ret; let mayLoad = 0; let mayStore = 1; - let maybeAtomic = 1; let elements = getMUBUFElements<store_vt>.ret; let tfe = isTFE; } @@ -671,7 +669,6 @@ class MUBUF_Pseudo_Store_Lds<string opName> let LGKM_CNT = 1; let mayLoad = 1; let mayStore = 1; - let maybeAtomic = 1; let has_vdata = 0; let has_vaddr = 0; @@ -735,7 +732,6 @@ class MUBUF_Atomic_Pseudo<string opName, let has_glc = 0; let has_dlc = 0; let has_sccb = 1; - let maybeAtomic = 1; let AsmMatchConverter = "cvtMubufAtomic"; } @@ -1222,8 +1218,10 @@ defm BUFFER_STORE_FORMAT_D16_HI_X : MUBUF_Pseudo_Stores < } // End HasD16LoadStore -def BUFFER_WBINVL1 : MUBUF_Invalidate <"buffer_wbinvl1", - int_amdgcn_buffer_wbinvl1>; +let SubtargetPredicate = isNotGFX12Plus in +def BUFFER_WBINVL1 : MUBUF_Invalidate < + "buffer_wbinvl1", int_amdgcn_buffer_wbinvl1 +>; let SubtargetPredicate = HasAtomicFaddNoRtnInsts in defm BUFFER_ATOMIC_ADD_F32 : MUBUF_Pseudo_Atomics_NO_RTN< diff --git a/llvm/lib/Target/AMDGPU/DSDIRInstructions.td b/llvm/lib/Target/AMDGPU/DSDIRInstructions.td new file mode 100644 index 000000000000..4416da605981 --- /dev/null +++ b/llvm/lib/Target/AMDGPU/DSDIRInstructions.td @@ -0,0 +1,192 @@ +//===-- DSDIRInstructions.td - LDS/VDS Direct Instruction Definitions -----===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// LDSDIR/VDSDIR encoding (LDSDIR is gfx11, VDSDIR is gfx12+) +//===----------------------------------------------------------------------===// + +class LDSDIRe<bits<2> op, bit is_direct> : Enc32 { + // encoding fields + bits<2> attrchan; + bits<6> attr; + bits<4> waitvdst; + bits<8> vdst; + + // encoding + let Inst{31-24} = 0xce; // encoding + let Inst{23-22} = 0x0; // reserved + let Inst{21-20} = op; + let Inst{19-16} = waitvdst; + let Inst{15-10} = !if(is_direct, ?, attr); + let Inst{9-8} = !if(is_direct, ?, attrchan); + let Inst{7-0} = vdst; +} + +class VDSDIRe<bits<2> op, bit is_direct> : Enc32 { + // encoding fields + bits<2> attrchan; + bits<6> attr; + bits<4> waitvdst; + bits<8> vdst; + bits<1> waitvsrc; + + // encoding + let Inst{31-24} = 0xce; // encoding + let Inst{23} = waitvsrc; + let Inst{22} = 0x0; // reserved + let Inst{21-20} = op; + let Inst{19-16} = waitvdst; + let Inst{15-10} = !if(is_direct, ?, attr); + let Inst{9-8} = !if(is_direct, ?, attrchan); + let Inst{7-0} = vdst; +} + +//===----------------------------------------------------------------------===// +// LDSDIR/VDSDIR Classes +//===----------------------------------------------------------------------===// + +class LDSDIR_getIns<bit direct> { + dag ret = !if(direct, + (ins wait_vdst:$waitvdst), + (ins InterpAttr:$attr, InterpAttrChan:$attrchan, wait_vdst:$waitvdst) + ); +} + +class VDSDIR_getIns<bit direct> { + dag ret = !if(direct, + (ins wait_va_vdst:$waitvdst, wait_va_vsrc:$waitvsrc), + (ins InterpAttr:$attr, InterpAttrChan:$attrchan, wait_va_vdst:$waitvdst, + wait_va_vsrc:$waitvsrc) + ); +} + +class DSDIR_Common<string opName, string asm = "", dag ins, bit direct> : + InstSI<(outs VGPR_32:$vdst), ins, asm> { + let LDSDIR = 1; + let EXP_CNT = 1; + + let hasSideEffects = 0; + let mayLoad = 1; + let mayStore = 0; + let maybeAtomic = 0; + + string Mnemonic = opName; + let UseNamedOperandTable = 1; + + let Uses = [M0, EXEC]; + let DisableWQM = 0; + let SchedRW = [WriteLDS]; + + bit is_direct; + let is_direct = direct; +} + +class DSDIR_Pseudo<string opName, dag ins, bit direct> : + DSDIR_Common<opName, "", ins, direct>, + SIMCInstr<opName, SIEncodingFamily.NONE> { + let isPseudo = 1; + let isCodeGenOnly = 1; +} + +class LDSDIR_getAsm<bit direct> { + string ret = !if(direct, + " $vdst$waitvdst", + " $vdst, $attr$attrchan$waitvdst" + ); +} + +class VDSDIR_getAsm<bit direct> { + string ret = !if(direct, + " $vdst$waitvdst$waitvsrc", + " $vdst, $attr$attrchan$waitvdst$waitvsrc" + ); +} + +class DSDIR_Real<DSDIR_Pseudo lds, dag ins, string asm, int subtarget> : + DSDIR_Common<lds.Mnemonic, + lds.Mnemonic # asm, + ins, + lds.is_direct>, + SIMCInstr <lds.Mnemonic, subtarget> { + let isPseudo = 0; + let isCodeGenOnly = 0; +} + +//===----------------------------------------------------------------------===// +// LDS/VDS Direct Instructions +//===----------------------------------------------------------------------===// + +let SubtargetPredicate = isGFX11Only in { + +def LDS_DIRECT_LOAD : DSDIR_Pseudo<"lds_direct_load", LDSDIR_getIns<1>.ret, 1>; +def LDS_PARAM_LOAD : DSDIR_Pseudo<"lds_param_load", LDSDIR_getIns<0>.ret, 0>; + +def : GCNPat < + (f32 (int_amdgcn_lds_direct_load M0)), + (LDS_DIRECT_LOAD 0) +>; + +def : GCNPat < + (f32 (int_amdgcn_lds_param_load timm:$attrchan, timm:$attr, M0)), + (LDS_PARAM_LOAD timm:$attr, timm:$attrchan, 0) +>; + +} // End SubtargetPredicate = isGFX11Only + +let SubtargetPredicate = isGFX12Plus in { + +def DS_DIRECT_LOAD : DSDIR_Pseudo<"ds_direct_load", VDSDIR_getIns<1>.ret, 1>; +def DS_PARAM_LOAD : DSDIR_Pseudo<"ds_param_load", VDSDIR_getIns<0>.ret, 0>; + +def : GCNPat < + (f32 (int_amdgcn_lds_direct_load M0)), + (DS_DIRECT_LOAD 0, 1) +>; + +def : GCNPat < + (f32 (int_amdgcn_lds_param_load timm:$attrchan, timm:$attr, M0)), + (DS_PARAM_LOAD timm:$attr, timm:$attrchan, 0, 1) +>; + +} // End SubtargetPredicate = isGFX12Only + +//===----------------------------------------------------------------------===// +// GFX11 +//===----------------------------------------------------------------------===// + +multiclass DSDIR_Real_gfx11<bits<2> op, + DSDIR_Pseudo lds = !cast<DSDIR_Pseudo>(NAME)> { + def _gfx11 : DSDIR_Real<lds, lds.InOperandList, + LDSDIR_getAsm<lds.is_direct>.ret, + SIEncodingFamily.GFX11>, + LDSDIRe<op, lds.is_direct> { + let AssemblerPredicate = isGFX11Only; + let DecoderNamespace = "GFX11"; + } +} + +defm LDS_PARAM_LOAD : DSDIR_Real_gfx11<0x0>; +defm LDS_DIRECT_LOAD : DSDIR_Real_gfx11<0x1>; + +//===----------------------------------------------------------------------===// +// GFX12+ +//===----------------------------------------------------------------------===// + +multiclass DSDIR_Real_gfx12<bits<2> op, + DSDIR_Pseudo lds = !cast<DSDIR_Pseudo>(NAME)> { + def _gfx12 : DSDIR_Real<lds, lds.InOperandList, + VDSDIR_getAsm<lds.is_direct>.ret, + SIEncodingFamily.GFX12>, + VDSDIRe<op, lds.is_direct> { + let AssemblerPredicate = isGFX12Plus; + let DecoderNamespace = "GFX12"; + } +} + +defm DS_PARAM_LOAD : DSDIR_Real_gfx12<0x0>; +defm DS_DIRECT_LOAD : DSDIR_Real_gfx12<0x1>; diff --git a/llvm/lib/Target/AMDGPU/DSInstructions.td b/llvm/lib/Target/AMDGPU/DSInstructions.td index bc9049b4ef33..3cccd8c50e66 100644 --- a/llvm/lib/Target/AMDGPU/DSInstructions.td +++ b/llvm/lib/Target/AMDGPU/DSInstructions.td @@ -19,7 +19,6 @@ class DS_Pseudo <string opName, dag outs, dag ins, string asmOps, list<dag> patt // Most instruction load and store data, so set this as the default. let mayLoad = 1; let mayStore = 1; - let maybeAtomic = 1; let hasSideEffects = 0; let SchedRW = [WriteLDS]; diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp index 67be7b0fd642..9dff3f6c2efd 100644 --- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp +++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp @@ -182,6 +182,9 @@ static DecodeStatus decodeSplitBarrier(MCInst &Inst, unsigned Val, DECODE_SrcOp(decodeOperand_##RegClass##_Imm##ImmWidth, 9, OpWidth, Imm, \ false, ImmWidth) +#define DECODE_OPERAND_SRC_REG_OR_IMM_9_TYPED(Name, OpWidth, ImmWidth) \ + DECODE_SrcOp(decodeOperand_##Name, 9, OpWidth, Imm, false, ImmWidth) + // Decoder for Src(9-bit encoding) AGPR or immediate. Set Imm{9} to 1 (set acc) // and decode using 'enum10' from decodeSrcOp. #define DECODE_OPERAND_SRC_REG_OR_IMM_A9(RegClass, OpWidth, ImmWidth) \ @@ -262,6 +265,9 @@ DECODE_OPERAND_SRC_REG_OR_IMM_9(VReg_256, OPW256, 64) DECODE_OPERAND_SRC_REG_OR_IMM_9(VReg_512, OPW512, 32) DECODE_OPERAND_SRC_REG_OR_IMM_9(VReg_1024, OPW1024, 32) +DECODE_OPERAND_SRC_REG_OR_IMM_9_TYPED(VS_32_ImmV2I16, OPW32, 32) +DECODE_OPERAND_SRC_REG_OR_IMM_9_TYPED(VS_32_ImmV2F16, OPW32, 16) + DECODE_OPERAND_SRC_REG_OR_IMM_A9(AReg_64, OPW64, 64) DECODE_OPERAND_SRC_REG_OR_IMM_A9(AReg_128, OPW128, 32) DECODE_OPERAND_SRC_REG_OR_IMM_A9(AReg_256, OPW256, 64) diff --git a/llvm/lib/Target/AMDGPU/EXPInstructions.td b/llvm/lib/Target/AMDGPU/EXPInstructions.td index ff1d661ef6fe..4cfee7d013ef 100644 --- a/llvm/lib/Target/AMDGPU/EXPInstructions.td +++ b/llvm/lib/Target/AMDGPU/EXPInstructions.td @@ -20,6 +20,7 @@ class EXPCommon<bit row, bit done, string asm = ""> : InstSI< let EXP_CNT = 1; let mayLoad = done; let mayStore = 1; + let maybeAtomic = 0; let UseNamedOperandTable = 1; let Uses = !if(row, [EXEC, M0], [EXEC]); let SchedRW = [WriteExport]; diff --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td index 615f8cd54d8f..16a8b770e057 100644 --- a/llvm/lib/Target/AMDGPU/FLATInstructions.td +++ b/llvm/lib/Target/AMDGPU/FLATInstructions.td @@ -60,6 +60,7 @@ class FLAT_Pseudo<string opName, dag outs, dag ins, bits<1> has_sve = 0; // Scratch VGPR Enable bits<1> lds = 0; bits<1> sve = 0; + bits<1> has_offset = 1; let SubtargetPredicate = !if(is_flat_global, HasFlatGlobalInsts, !if(is_flat_scratch, HasFlatScratchInsts, HasFlatAddressSpace)); @@ -182,7 +183,7 @@ class VFLAT_Real <bits<8> op, FLAT_Pseudo ps, string opName = ps.Mnemonic> : let Inst{51-50} = cpol{4-3}; // scope let Inst{62-55} = !if(ps.has_data, vdata{7-0}, ?); let Inst{71-64} = !if(ps.has_vaddr, vaddr, ?); - let Inst{95-72} = offset; + let Inst{95-72} = !if(ps.has_offset, offset, ?); } class GlobalSaddrTable <bit is_saddr, string Name = ""> { @@ -214,7 +215,6 @@ class FLAT_Load_Pseudo <string opName, RegisterClass regClass, let has_saddr = HasSaddr; let enabled_saddr = EnableSaddr; let PseudoInstr = opName#!if(!and(HasSaddr, EnableSaddr), "_SADDR", ""); - let maybeAtomic = 1; let Constraints = !if(HasTiedOutput, "$vdst = $vdst_in", ""); let DisableEncoding = !if(HasTiedOutput, "$vdst_in", ""); @@ -236,7 +236,6 @@ class FLAT_Store_Pseudo <string opName, RegisterClass vdataClass, let has_saddr = HasSaddr; let enabled_saddr = EnableSaddr; let PseudoInstr = opName#!if(!and(HasSaddr, EnableSaddr), "_SADDR", ""); - let maybeAtomic = 1; } multiclass FLAT_Global_Load_Pseudo<string opName, RegisterClass regClass, bit HasTiedInput = 0> { @@ -262,7 +261,6 @@ class FLAT_Global_Load_AddTid_Pseudo <string opName, RegisterClass regClass, let has_vaddr = 0; let has_saddr = 1; let enabled_saddr = EnableSaddr; - let maybeAtomic = 1; let PseudoInstr = opName#!if(EnableSaddr, "_SADDR", ""); let Constraints = !if(HasTiedOutput, "$vdst = $vdst_in", ""); @@ -329,7 +327,6 @@ class FLAT_Global_Store_AddTid_Pseudo <string opName, RegisterClass vdataClass, let has_vaddr = 0; let has_saddr = 1; let enabled_saddr = EnableSaddr; - let maybeAtomic = 1; let PseudoInstr = opName#!if(EnableSaddr, "_SADDR", ""); } @@ -340,6 +337,34 @@ multiclass FLAT_Global_Store_AddTid_Pseudo<string opName, RegisterClass regClass GlobalSaddrTable<1, opName>; } +class FLAT_Global_Invalidate_Writeback<string opName, SDPatternOperator node = null_frag> : + FLAT_Pseudo<opName, (outs), (ins CPol:$cpol), "$cpol", [(node)]> { + + let AsmMatchConverter = ""; + + let hasSideEffects = 1; + let mayLoad = 0; + let mayStore = 0; + let is_flat_global = 1; + + let has_offset = 0; + let has_saddr = 0; + let enabled_saddr = 0; + let saddr_value = 0; + let has_vdst = 0; + let has_data = 0; + let has_vaddr = 0; + let has_glc = 0; + let has_dlc = 0; + let glcValue = 0; + let dlcValue = 0; + let has_sccb = 0; + let sccbValue = 0; + let has_sve = 0; + let lds = 0; + let sve = 0; +} + class FlatScratchInst <string sv_op, string mode> { string SVOp = sv_op; string Mode = mode; @@ -372,7 +397,6 @@ class FLAT_Scratch_Load_Pseudo <string opName, RegisterClass regClass, let has_sve = EnableSVE; let sve = EnableVaddr; let PseudoInstr = opName#!if(EnableSVE, "_SVS", !if(EnableSaddr, "_SADDR", !if(EnableVaddr, "", "_ST"))); - let maybeAtomic = 1; let Constraints = !if(HasTiedOutput, "$vdst = $vdst_in", ""); let DisableEncoding = !if(HasTiedOutput, "$vdst_in", ""); @@ -401,7 +425,6 @@ class FLAT_Scratch_Store_Pseudo <string opName, RegisterClass vdataClass, bit En let has_sve = EnableSVE; let sve = EnableVaddr; let PseudoInstr = opName#!if(EnableSVE, "_SVS", !if(EnableSaddr, "_SADDR", !if(EnableVaddr, "", "_ST"))); - let maybeAtomic = 1; } multiclass FLAT_Scratch_Load_Pseudo<string opName, RegisterClass regClass, bit HasTiedOutput = 0> { @@ -491,7 +514,6 @@ class FLAT_AtomicNoRet_Pseudo<string opName, dag outs, dag ins, let has_vdst = 0; let has_sccb = 1; let sccbValue = 0; - let maybeAtomic = 1; let IsAtomicNoRet = 1; } @@ -928,6 +950,10 @@ defm GLOBAL_LOAD_LDS_DWORD : FLAT_Global_Load_LDS_Pseudo <"global_load_lds_dwor let SubtargetPredicate = isGFX12Plus in { defm GLOBAL_ATOMIC_ORDERED_ADD_B64 : FLAT_Global_Atomic_Pseudo <"global_atomic_ordered_add_b64", VReg_64, i64>; + + def GLOBAL_INV : FLAT_Global_Invalidate_Writeback<"global_inv">; + def GLOBAL_WB : FLAT_Global_Invalidate_Writeback<"global_wb">; + def GLOBAL_WBINV : FLAT_Global_Invalidate_Writeback<"global_wbinv">; } // End SubtargetPredicate = isGFX12Plus } // End is_flat_global = 1 @@ -2662,6 +2688,10 @@ defm GLOBAL_ATOMIC_MAX_NUM_F32 : VGLOBAL_Real_Atomics_gfx12<0x052, "GLOBAL_A defm GLOBAL_ATOMIC_ADD_F32 : VGLOBAL_Real_Atomics_gfx12<0x056, "GLOBAL_ATOMIC_ADD_F32", "global_atomic_add_f32">; defm GLOBAL_ATOMIC_ORDERED_ADD_B64 : VGLOBAL_Real_Atomics_gfx12<0x073, "GLOBAL_ATOMIC_ORDERED_ADD_B64", "global_atomic_ordered_add_b64">; +defm GLOBAL_INV : VFLAT_Real_Base_gfx12<0x02b, "GLOBAL_INV", "global_inv">; +defm GLOBAL_WB : VFLAT_Real_Base_gfx12<0x02c, "GLOBAL_WB", "global_wb">; +defm GLOBAL_WBINV : VFLAT_Real_Base_gfx12<0x04f, "GLOBAL_WBINV", "global_wbinv">; + // ENC_VSCRATCH. defm SCRATCH_LOAD_U8 : VSCRATCH_Real_AllAddr_gfx12<0x10, "SCRATCH_LOAD_UBYTE", "scratch_load_u8", true>; defm SCRATCH_LOAD_I8 : VSCRATCH_Real_AllAddr_gfx12<0x11, "SCRATCH_LOAD_SBYTE", "scratch_load_i8", true>; diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h index 91a709303269..f6f37f5170a4 100644 --- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h +++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h @@ -176,6 +176,7 @@ protected: bool HasGetWaveIdInst = false; bool HasSMemTimeInst = false; bool HasShaderCyclesRegister = false; + bool HasShaderCyclesHiLoRegisters = false; bool HasVOP3Literal = false; bool HasNoDataDepHazard = false; bool FlatAddressSpace = false; @@ -682,6 +683,8 @@ public: bool hasScalarAddSub64() const { return getGeneration() >= GFX12; } + bool hasScalarSMulU64() const { return getGeneration() >= GFX12; } + bool hasUnpackedD16VMem() const { return HasUnpackedD16VMem; } @@ -819,6 +822,10 @@ public: return HasShaderCyclesRegister; } + bool hasShaderCyclesHiLoRegisters() const { + return HasShaderCyclesHiLoRegisters; + } + bool hasVOP3Literal() const { return HasVOP3Literal; } @@ -1096,7 +1103,7 @@ public: bool hasDstSelForwardingHazard() const { return GFX940Insts; } // Cannot use op_sel with v_dot instructions. - bool hasDOTOpSelHazard() const { return GFX940Insts; } + bool hasDOTOpSelHazard() const { return GFX940Insts || GFX11Insts; } // Does not have HW interlocs for VALU writing and then reading SGPRs. bool hasVDecCoExecHazard() const { diff --git a/llvm/lib/Target/AMDGPU/LDSDIRInstructions.td b/llvm/lib/Target/AMDGPU/LDSDIRInstructions.td deleted file mode 100644 index 4956a1586774..000000000000 --- a/llvm/lib/Target/AMDGPU/LDSDIRInstructions.td +++ /dev/null @@ -1,116 +0,0 @@ -//===-- LDSDIRInstructions.td - LDS Direct Instruction Definitions --------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -//===----------------------------------------------------------------------===// -// LDSDIR encoding -//===----------------------------------------------------------------------===// - -class LDSDIRe<bits<2> op, bit is_direct> : Enc32 { - // encoding fields - bits<2> attrchan; - bits<6> attr; - bits<4> waitvdst; - bits<8> vdst; - - // encoding - let Inst{31-24} = 0xce; // encoding - let Inst{23-22} = 0x0; // reserved - let Inst{21-20} = op; - let Inst{19-16} = waitvdst; - let Inst{15-10} = !if(is_direct, ?, attr); - let Inst{9-8} = !if(is_direct, ?, attrchan); - let Inst{7-0} = vdst; -} - -//===----------------------------------------------------------------------===// -// LDSDIR Classes -//===----------------------------------------------------------------------===// - -class LDSDIR_getIns<bit direct> { - dag ret = !if(direct, - (ins wait_vdst:$waitvdst), - (ins InterpAttr:$attr, InterpAttrChan:$attrchan, wait_vdst:$waitvdst) - ); -} - -class LDSDIR_Common<string opName, string asm = "", bit direct> : InstSI< - (outs VGPR_32:$vdst), - LDSDIR_getIns<direct>.ret, - asm> { - let LDSDIR = 1; - let EXP_CNT = 1; - - let hasSideEffects = 0; - let mayLoad = 1; - let mayStore = 0; - - string Mnemonic = opName; - let UseNamedOperandTable = 1; - - let Uses = [M0, EXEC]; - let DisableWQM = 0; - let SchedRW = [WriteLDS]; - - bit is_direct; - let is_direct = direct; -} - -class LDSDIR_Pseudo<string opName, bit direct> : - LDSDIR_Common<opName, "", direct>, - SIMCInstr<opName, SIEncodingFamily.NONE> { - let isPseudo = 1; - let isCodeGenOnly = 1; -} - -class LDSDIR_getAsm<bit direct> { - string ret = !if(direct, - " $vdst$waitvdst", - " $vdst, $attr$attrchan$waitvdst" - ); -} - -class LDSDIR_Real<bits<2> op, LDSDIR_Pseudo lds, int subtarget> : - LDSDIR_Common<lds.Mnemonic, - lds.Mnemonic # LDSDIR_getAsm<lds.is_direct>.ret, - lds.is_direct>, - SIMCInstr <lds.Mnemonic, subtarget>, - LDSDIRe<op, lds.is_direct> { - let isPseudo = 0; - let isCodeGenOnly = 0; -} - -//===----------------------------------------------------------------------===// -// LDS Direct Instructions -//===----------------------------------------------------------------------===// - -def LDS_DIRECT_LOAD : LDSDIR_Pseudo<"lds_direct_load", 1>; -def LDS_PARAM_LOAD : LDSDIR_Pseudo<"lds_param_load", 0>; - -def : GCNPat < - (f32 (int_amdgcn_lds_direct_load M0)), - (LDS_DIRECT_LOAD 0) ->; - -def : GCNPat < - (f32 (int_amdgcn_lds_param_load timm:$attrchan, timm:$attr, M0)), - (LDS_PARAM_LOAD timm:$attr, timm:$attrchan, 0) ->; - -//===----------------------------------------------------------------------===// -// GFX11+ -//===----------------------------------------------------------------------===// - -multiclass LDSDIR_Real_gfx11<bits<2> op, LDSDIR_Pseudo lds = !cast<LDSDIR_Pseudo>(NAME)> { - def _gfx11 : LDSDIR_Real<op, lds, SIEncodingFamily.GFX11> { - let AssemblerPredicate = isGFX11Plus; - let DecoderNamespace = "GFX11"; - } -} - -defm LDS_PARAM_LOAD : LDSDIR_Real_gfx11<0x0>; -defm LDS_DIRECT_LOAD : LDSDIR_Real_gfx11<0x1>; diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp index edc244db613d..6c7977e22599 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp @@ -460,56 +460,84 @@ void AMDGPUInstPrinter::printImmediateInt16(uint32_t Imm, } } -void AMDGPUInstPrinter::printImmediate16(uint32_t Imm, - const MCSubtargetInfo &STI, - raw_ostream &O) { - int16_t SImm = static_cast<int16_t>(Imm); - if (isInlinableIntLiteral(SImm)) { - O << SImm; - return; - } - +// This must accept a 32-bit immediate value to correctly handle packed 16-bit +// operations. +static bool printImmediateFloat16(uint32_t Imm, const MCSubtargetInfo &STI, + raw_ostream &O) { if (Imm == 0x3C00) - O<< "1.0"; + O << "1.0"; else if (Imm == 0xBC00) - O<< "-1.0"; + O << "-1.0"; else if (Imm == 0x3800) - O<< "0.5"; + O << "0.5"; else if (Imm == 0xB800) - O<< "-0.5"; + O << "-0.5"; else if (Imm == 0x4000) - O<< "2.0"; + O << "2.0"; else if (Imm == 0xC000) - O<< "-2.0"; + O << "-2.0"; else if (Imm == 0x4400) - O<< "4.0"; + O << "4.0"; else if (Imm == 0xC400) - O<< "-4.0"; - else if (Imm == 0x3118 && - STI.hasFeature(AMDGPU::FeatureInv2PiInlineImm)) { + O << "-4.0"; + else if (Imm == 0x3118 && STI.hasFeature(AMDGPU::FeatureInv2PiInlineImm)) O << "0.15915494"; - } else { - uint64_t Imm16 = static_cast<uint16_t>(Imm); - O << formatHex(Imm16); - } -} + else + return false; -void AMDGPUInstPrinter::printImmediateV216(uint32_t Imm, - const MCSubtargetInfo &STI, - raw_ostream &O) { - uint16_t Lo16 = static_cast<uint16_t>(Imm); - printImmediate16(Lo16, STI, O); + return true; } -void AMDGPUInstPrinter::printImmediate32(uint32_t Imm, +void AMDGPUInstPrinter::printImmediate16(uint32_t Imm, const MCSubtargetInfo &STI, raw_ostream &O) { + int16_t SImm = static_cast<int16_t>(Imm); + if (isInlinableIntLiteral(SImm)) { + O << SImm; + return; + } + + uint16_t HImm = static_cast<uint16_t>(Imm); + if (printImmediateFloat16(HImm, STI, O)) + return; + + uint64_t Imm16 = static_cast<uint16_t>(Imm); + O << formatHex(Imm16); +} + +void AMDGPUInstPrinter::printImmediateV216(uint32_t Imm, uint8_t OpType, + const MCSubtargetInfo &STI, + raw_ostream &O) { int32_t SImm = static_cast<int32_t>(Imm); - if (SImm >= -16 && SImm <= 64) { + if (isInlinableIntLiteral(SImm)) { O << SImm; return; } + switch (OpType) { + case AMDGPU::OPERAND_REG_IMM_V2INT16: + case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: + case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: + if (printImmediateFloat32(Imm, STI, O)) + return; + break; + case AMDGPU::OPERAND_REG_IMM_V2FP16: + case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: + case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: + if (isUInt<16>(Imm) && + printImmediateFloat16(static_cast<uint16_t>(Imm), STI, O)) + return; + break; + default: + llvm_unreachable("bad operand type"); + } + + O << formatHex(static_cast<uint64_t>(Imm)); +} + +bool AMDGPUInstPrinter::printImmediateFloat32(uint32_t Imm, + const MCSubtargetInfo &STI, + raw_ostream &O) { if (Imm == llvm::bit_cast<uint32_t>(0.0f)) O << "0.0"; else if (Imm == llvm::bit_cast<uint32_t>(1.0f)) @@ -532,7 +560,24 @@ void AMDGPUInstPrinter::printImmediate32(uint32_t Imm, STI.hasFeature(AMDGPU::FeatureInv2PiInlineImm)) O << "0.15915494"; else - O << formatHex(static_cast<uint64_t>(Imm)); + return false; + + return true; +} + +void AMDGPUInstPrinter::printImmediate32(uint32_t Imm, + const MCSubtargetInfo &STI, + raw_ostream &O) { + int32_t SImm = static_cast<int32_t>(Imm); + if (isInlinableIntLiteral(SImm)) { + O << SImm; + return; + } + + if (printImmediateFloat32(Imm, STI, O)) + return; + + O << formatHex(static_cast<uint64_t>(Imm)); } void AMDGPUInstPrinter::printImmediate64(uint64_t Imm, @@ -639,6 +684,20 @@ void AMDGPUInstPrinter::printWaitVDST(const MCInst *MI, unsigned OpNo, printU4ImmDecOperand(MI, OpNo, O); } +void AMDGPUInstPrinter::printWaitVAVDst(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, + raw_ostream &O) { + O << " wait_va_vdst:"; + printU4ImmDecOperand(MI, OpNo, O); +} + +void AMDGPUInstPrinter::printWaitVMVSrc(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, + raw_ostream &O) { + O << " wait_vm_vsrc:"; + printU4ImmDecOperand(MI, OpNo, O); +} + void AMDGPUInstPrinter::printWaitEXP(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O) { @@ -741,25 +800,11 @@ void AMDGPUInstPrinter::printRegularOperand(const MCInst *MI, unsigned OpNo, break; case AMDGPU::OPERAND_REG_IMM_V2INT16: case AMDGPU::OPERAND_REG_IMM_V2FP16: - if (!isUInt<16>(Op.getImm()) && - STI.hasFeature(AMDGPU::FeatureVOP3Literal)) { - printImmediate32(Op.getImm(), STI, O); - break; - } - - // Deal with 16-bit FP inline immediates not working. - if (OpTy == AMDGPU::OPERAND_REG_IMM_V2FP16) { - printImmediate16(static_cast<uint16_t>(Op.getImm()), STI, O); - break; - } - [[fallthrough]]; case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: - printImmediateInt16(static_cast<uint16_t>(Op.getImm()), STI, O); - break; case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: - printImmediateV216(Op.getImm(), STI, O); + printImmediateV216(Op.getImm(), OpTy, STI, O); break; case MCOI::OPERAND_UNKNOWN: case MCOI::OPERAND_PCREL: diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h index 95c26de6299e..e3958f88277d 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h @@ -88,8 +88,10 @@ private: raw_ostream &O); void printImmediate16(uint32_t Imm, const MCSubtargetInfo &STI, raw_ostream &O); - void printImmediateV216(uint32_t Imm, const MCSubtargetInfo &STI, - raw_ostream &O); + void printImmediateV216(uint32_t Imm, uint8_t OpType, + const MCSubtargetInfo &STI, raw_ostream &O); + bool printImmediateFloat32(uint32_t Imm, const MCSubtargetInfo &STI, + raw_ostream &O); void printImmediate32(uint32_t Imm, const MCSubtargetInfo &STI, raw_ostream &O); void printImmediate64(uint64_t Imm, const MCSubtargetInfo &STI, @@ -161,6 +163,10 @@ private: raw_ostream &O); void printWaitEXP(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O); + void printWaitVAVDst(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, raw_ostream &O); + void printWaitVMVSrc(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, raw_ostream &O); void printExpSrcN(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O, unsigned N); diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp index b403d69d9ff1..de1abaf29c56 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp @@ -284,22 +284,15 @@ AMDGPUMCCodeEmitter::getLitEncoding(const MCOperand &MO, // which does not have f16 support? return getLit16Encoding(static_cast<uint16_t>(Imm), STI); case AMDGPU::OPERAND_REG_IMM_V2INT16: - case AMDGPU::OPERAND_REG_IMM_V2FP16: { - if (!isUInt<16>(Imm) && STI.hasFeature(AMDGPU::FeatureVOP3Literal)) - return getLit32Encoding(static_cast<uint32_t>(Imm), STI); - if (OpInfo.OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16) - return getLit16Encoding(static_cast<uint16_t>(Imm), STI); - [[fallthrough]]; - } case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: - return getLit16IntEncoding(static_cast<uint16_t>(Imm), STI); + return AMDGPU::getInlineEncodingV2I16(static_cast<uint32_t>(Imm)) + .value_or(255); + case AMDGPU::OPERAND_REG_IMM_V2FP16: case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: - case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: { - uint16_t Lo16 = static_cast<uint16_t>(Imm); - uint32_t Encoding = getLit16Encoding(Lo16, STI); - return Encoding; - } + case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: + return AMDGPU::getInlineEncodingV2F16(static_cast<uint32_t>(Imm)) + .value_or(255); case AMDGPU::OPERAND_KIMM32: case AMDGPU::OPERAND_KIMM16: return MO.getImm(); diff --git a/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp b/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp index 9a2fb0bc37b2..674fd04f2fc1 100644 --- a/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp @@ -1651,7 +1651,7 @@ SDValue R600TargetLowering::OptimizeSwizzle(SDValue BuildVector, SDValue Swz[], BuildVector = CompactSwizzlableVector(DAG, BuildVector, SwizzleRemap); for (unsigned i = 0; i < 4; i++) { - unsigned Idx = cast<ConstantSDNode>(Swz[i])->getZExtValue(); + unsigned Idx = Swz[i]->getAsZExtVal(); if (SwizzleRemap.contains(Idx)) Swz[i] = DAG.getConstant(SwizzleRemap[Idx], DL, MVT::i32); } @@ -1659,7 +1659,7 @@ SDValue R600TargetLowering::OptimizeSwizzle(SDValue BuildVector, SDValue Swz[], SwizzleRemap.clear(); BuildVector = ReorganizeVector(DAG, BuildVector, SwizzleRemap); for (unsigned i = 0; i < 4; i++) { - unsigned Idx = cast<ConstantSDNode>(Swz[i])->getZExtValue(); + unsigned Idx = Swz[i]->getAsZExtVal(); if (SwizzleRemap.contains(Idx)) Swz[i] = DAG.getConstant(SwizzleRemap[Idx], DL, MVT::i32); } @@ -1780,7 +1780,7 @@ SDValue R600TargetLowering::PerformDAGCombine(SDNode *N, // Check that we know which element is being inserted if (!isa<ConstantSDNode>(EltNo)) return SDValue(); - unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue(); + unsigned Elt = EltNo->getAsZExtVal(); // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially // be converted to a BUILD_VECTOR). Fill in the Ops vector with the @@ -2021,7 +2021,7 @@ bool R600TargetLowering::FoldOperand(SDNode *ParentNode, unsigned SrcIdx, } case R600::MOV_IMM_GLOBAL_ADDR: // Check if the Imm slot is used. Taken from below. - if (cast<ConstantSDNode>(Imm)->getZExtValue()) + if (Imm->getAsZExtVal()) return false; Imm = Src.getOperand(0); Src = DAG.getRegister(R600::ALU_LITERAL_X, MVT::i32); diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp index 709de612d81d..aa7639a0f186 100644 --- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -208,9 +208,7 @@ bool SIFoldOperands::canUseImmWithOpSel(FoldCandidate &Fold) const { assert(Old.isReg() && Fold.isImm()); if (!(TSFlags & SIInstrFlags::IsPacked) || (TSFlags & SIInstrFlags::IsMAI) || - (ST->hasDOTOpSelHazard() && (TSFlags & SIInstrFlags::IsDOT)) || - isUInt<16>(Fold.ImmToFold) || - !AMDGPU::isFoldableLiteralV216(Fold.ImmToFold, ST->hasInv2PiInlineImm())) + (ST->hasDOTOpSelHazard() && (TSFlags & SIInstrFlags::IsDOT))) return false; unsigned Opcode = MI->getOpcode(); @@ -234,42 +232,123 @@ bool SIFoldOperands::tryFoldImmWithOpSel(FoldCandidate &Fold) const { MachineOperand &Old = MI->getOperand(Fold.UseOpNo); unsigned Opcode = MI->getOpcode(); int OpNo = MI->getOperandNo(&Old); + uint8_t OpType = TII->get(Opcode).operands()[OpNo].OperandType; + + // If the literal can be inlined as-is, apply it and short-circuit the + // tests below. The main motivation for this is to avoid unintuitive + // uses of opsel. + if (AMDGPU::isInlinableLiteralV216(Fold.ImmToFold, OpType)) { + Old.ChangeToImmediate(Fold.ImmToFold); + return true; + } - // Set op_sel/op_sel_hi on this operand or bail out if op_sel is - // already set. + // Refer to op_sel/op_sel_hi and check if we can change the immediate and + // op_sel in a way that allows an inline constant. int ModIdx = -1; - if (OpNo == AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0)) + unsigned SrcIdx = ~0; + if (OpNo == AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0)) { ModIdx = AMDGPU::OpName::src0_modifiers; - else if (OpNo == AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1)) + SrcIdx = 0; + } else if (OpNo == AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1)) { ModIdx = AMDGPU::OpName::src1_modifiers; - else if (OpNo == AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2)) + SrcIdx = 1; + } else if (OpNo == AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2)) { ModIdx = AMDGPU::OpName::src2_modifiers; + SrcIdx = 2; + } assert(ModIdx != -1); ModIdx = AMDGPU::getNamedOperandIdx(Opcode, ModIdx); MachineOperand &Mod = MI->getOperand(ModIdx); - unsigned Val = Mod.getImm(); - if ((Val & SISrcMods::OP_SEL_0) || !(Val & SISrcMods::OP_SEL_1)) + unsigned ModVal = Mod.getImm(); + + uint16_t ImmLo = static_cast<uint16_t>( + Fold.ImmToFold >> (ModVal & SISrcMods::OP_SEL_0 ? 16 : 0)); + uint16_t ImmHi = static_cast<uint16_t>( + Fold.ImmToFold >> (ModVal & SISrcMods::OP_SEL_1 ? 16 : 0)); + uint32_t Imm = (static_cast<uint32_t>(ImmHi) << 16) | ImmLo; + unsigned NewModVal = ModVal & ~(SISrcMods::OP_SEL_0 | SISrcMods::OP_SEL_1); + + // Helper function that attempts to inline the given value with a newly + // chosen opsel pattern. + auto tryFoldToInline = [&](uint32_t Imm) -> bool { + if (AMDGPU::isInlinableLiteralV216(Imm, OpType)) { + Mod.setImm(NewModVal | SISrcMods::OP_SEL_1); + Old.ChangeToImmediate(Imm); + return true; + } + + // Try to shuffle the halves around and leverage opsel to get an inline + // constant. + uint16_t Lo = static_cast<uint16_t>(Imm); + uint16_t Hi = static_cast<uint16_t>(Imm >> 16); + if (Lo == Hi) { + if (AMDGPU::isInlinableLiteralV216(Lo, OpType)) { + Mod.setImm(NewModVal); + Old.ChangeToImmediate(Lo); + return true; + } + + if (static_cast<int16_t>(Lo) < 0) { + int32_t SExt = static_cast<int16_t>(Lo); + if (AMDGPU::isInlinableLiteralV216(SExt, OpType)) { + Mod.setImm(NewModVal); + Old.ChangeToImmediate(SExt); + return true; + } + } + + // This check is only useful for integer instructions + if (OpType == AMDGPU::OPERAND_REG_IMM_V2INT16 || + OpType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16) { + if (AMDGPU::isInlinableLiteralV216(Lo << 16, OpType)) { + Mod.setImm(NewModVal | SISrcMods::OP_SEL_0 | SISrcMods::OP_SEL_1); + Old.ChangeToImmediate(static_cast<uint32_t>(Lo) << 16); + return true; + } + } + } else { + uint32_t Swapped = (static_cast<uint32_t>(Lo) << 16) | Hi; + if (AMDGPU::isInlinableLiteralV216(Swapped, OpType)) { + Mod.setImm(NewModVal | SISrcMods::OP_SEL_0); + Old.ChangeToImmediate(Swapped); + return true; + } + } + return false; + }; - // Only apply the following transformation if that operand requires - // a packed immediate. - // If upper part is all zero we do not need op_sel_hi. - if (!(Fold.ImmToFold & 0xffff)) { - MachineOperand New = - MachineOperand::CreateImm((Fold.ImmToFold >> 16) & 0xffff); - if (!TII->isOperandLegal(*MI, OpNo, &New)) - return false; - Mod.setImm(Mod.getImm() | SISrcMods::OP_SEL_0); - Mod.setImm(Mod.getImm() & ~SISrcMods::OP_SEL_1); - Old.ChangeToImmediate((Fold.ImmToFold >> 16) & 0xffff); + if (tryFoldToInline(Imm)) return true; + + // Replace integer addition by subtraction and vice versa if it allows + // folding the immediate to an inline constant. + // + // We should only ever get here for SrcIdx == 1 due to canonicalization + // earlier in the pipeline, but we double-check here to be safe / fully + // general. + bool IsUAdd = Opcode == AMDGPU::V_PK_ADD_U16; + bool IsUSub = Opcode == AMDGPU::V_PK_SUB_U16; + if (SrcIdx == 1 && (IsUAdd || IsUSub)) { + unsigned ClampIdx = + AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::clamp); + bool Clamp = MI->getOperand(ClampIdx).getImm() != 0; + + if (!Clamp) { + uint16_t NegLo = -static_cast<uint16_t>(Imm); + uint16_t NegHi = -static_cast<uint16_t>(Imm >> 16); + uint32_t NegImm = (static_cast<uint32_t>(NegHi) << 16) | NegLo; + + if (tryFoldToInline(NegImm)) { + unsigned NegOpcode = + IsUAdd ? AMDGPU::V_PK_SUB_U16 : AMDGPU::V_PK_ADD_U16; + MI->setDesc(TII->get(NegOpcode)); + return true; + } + } } - MachineOperand New = MachineOperand::CreateImm(Fold.ImmToFold & 0xffff); - if (!TII->isOperandLegal(*MI, OpNo, &New)) - return false; - Mod.setImm(Mod.getImm() & ~SISrcMods::OP_SEL_1); - Old.ChangeToImmediate(Fold.ImmToFold & 0xffff); - return true; + + return false; } bool SIFoldOperands::updateOperand(FoldCandidate &Fold) const { @@ -277,8 +356,19 @@ bool SIFoldOperands::updateOperand(FoldCandidate &Fold) const { MachineOperand &Old = MI->getOperand(Fold.UseOpNo); assert(Old.isReg()); - if (Fold.isImm() && canUseImmWithOpSel(Fold)) - return tryFoldImmWithOpSel(Fold); + if (Fold.isImm() && canUseImmWithOpSel(Fold)) { + if (tryFoldImmWithOpSel(Fold)) + return true; + + // We can't represent the candidate as an inline constant. Try as a literal + // with the original opsel, checking constant bus limitations. + MachineOperand New = MachineOperand::CreateImm(Fold.ImmToFold); + int OpNo = MI->getOperandNo(&Old); + if (!TII->isOperandLegal(*MI, OpNo, &New)) + return false; + Old.ChangeToImmediate(Fold.ImmToFold); + return true; + } if ((Fold.isImm() || Fold.isFI() || Fold.isGlobal()) && Fold.needsShrink()) { MachineBasicBlock *MBB = MI->getParent(); diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 0e857e6ac71b..6ddc7e864fb2 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -151,22 +151,29 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, if (Subtarget->useRealTrue16Insts()) { addRegisterClass(MVT::i16, &AMDGPU::VGPR_16RegClass); addRegisterClass(MVT::f16, &AMDGPU::VGPR_16RegClass); + addRegisterClass(MVT::bf16, &AMDGPU::VGPR_16RegClass); } else { addRegisterClass(MVT::i16, &AMDGPU::SReg_32RegClass); addRegisterClass(MVT::f16, &AMDGPU::SReg_32RegClass); + addRegisterClass(MVT::bf16, &AMDGPU::SReg_32RegClass); } // Unless there are also VOP3P operations, not operations are really legal. addRegisterClass(MVT::v2i16, &AMDGPU::SReg_32RegClass); addRegisterClass(MVT::v2f16, &AMDGPU::SReg_32RegClass); + addRegisterClass(MVT::v2bf16, &AMDGPU::SReg_32RegClass); addRegisterClass(MVT::v4i16, &AMDGPU::SReg_64RegClass); addRegisterClass(MVT::v4f16, &AMDGPU::SReg_64RegClass); + addRegisterClass(MVT::v4bf16, &AMDGPU::SReg_64RegClass); addRegisterClass(MVT::v8i16, &AMDGPU::SGPR_128RegClass); addRegisterClass(MVT::v8f16, &AMDGPU::SGPR_128RegClass); + addRegisterClass(MVT::v8bf16, &AMDGPU::SGPR_128RegClass); addRegisterClass(MVT::v16i16, &AMDGPU::SGPR_256RegClass); addRegisterClass(MVT::v16f16, &AMDGPU::SGPR_256RegClass); + addRegisterClass(MVT::v16bf16, &AMDGPU::SGPR_256RegClass); addRegisterClass(MVT::v32i16, &AMDGPU::SGPR_512RegClass); addRegisterClass(MVT::v32f16, &AMDGPU::SGPR_512RegClass); + addRegisterClass(MVT::v32bf16, &AMDGPU::SGPR_512RegClass); } addRegisterClass(MVT::v32i32, &AMDGPU::VReg_1024RegClass); @@ -196,6 +203,41 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, MVT::i1, MVT::v32i32}, Custom); + if (isTypeLegal(MVT::bf16)) { + for (unsigned Opc : + {ISD::FADD, ISD::FSUB, ISD::FMUL, ISD::FDIV, + ISD::FREM, ISD::FMA, ISD::FMINNUM, ISD::FMAXNUM, + ISD::FMINIMUM, ISD::FMAXIMUM, ISD::FSQRT, ISD::FCBRT, + ISD::FSIN, ISD::FCOS, ISD::FPOW, ISD::FPOWI, + ISD::FLDEXP, ISD::FFREXP, ISD::FLOG, ISD::FLOG2, + ISD::FLOG10, ISD::FEXP, ISD::FEXP2, ISD::FEXP10, + ISD::FCEIL, ISD::FTRUNC, ISD::FRINT, ISD::FNEARBYINT, + ISD::FROUND, ISD::FROUNDEVEN, ISD::FFLOOR, ISD::FCANONICALIZE, + ISD::SETCC}) { + // FIXME: The promoted to type shouldn't need to be explicit + setOperationAction(Opc, MVT::bf16, Promote); + AddPromotedToType(Opc, MVT::bf16, MVT::f32); + } + + setOperationAction(ISD::FP_ROUND, MVT::bf16, Expand); + + setOperationAction(ISD::SELECT, MVT::bf16, Promote); + AddPromotedToType(ISD::SELECT, MVT::bf16, MVT::i16); + + // TODO: Could make these legal + setOperationAction(ISD::FABS, MVT::bf16, Expand); + setOperationAction(ISD::FNEG, MVT::bf16, Expand); + setOperationAction(ISD::FCOPYSIGN, MVT::bf16, Expand); + + // We only need to custom lower because we can't specify an action for bf16 + // sources. + setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); + setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); + + setOperationAction(ISD::BUILD_VECTOR, MVT::v2bf16, Promote); + AddPromotedToType(ISD::BUILD_VECTOR, MVT::v2bf16, MVT::v2i16); + } + setTruncStoreAction(MVT::v2i32, MVT::v2i16, Expand); setTruncStoreAction(MVT::v3i32, MVT::v3i16, Expand); setTruncStoreAction(MVT::v4i32, MVT::v4i16, Expand); @@ -271,13 +313,14 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, // We only support LOAD/STORE and vector manipulation ops for vectors // with > 4 elements. for (MVT VT : - {MVT::v8i32, MVT::v8f32, MVT::v9i32, MVT::v9f32, MVT::v10i32, - MVT::v10f32, MVT::v11i32, MVT::v11f32, MVT::v12i32, MVT::v12f32, - MVT::v16i32, MVT::v16f32, MVT::v2i64, MVT::v2f64, MVT::v4i16, - MVT::v4f16, MVT::v3i64, MVT::v3f64, MVT::v6i32, MVT::v6f32, - MVT::v4i64, MVT::v4f64, MVT::v8i64, MVT::v8f64, MVT::v8i16, - MVT::v8f16, MVT::v16i16, MVT::v16f16, MVT::v16i64, MVT::v16f64, - MVT::v32i32, MVT::v32f32, MVT::v32i16, MVT::v32f16}) { + {MVT::v8i32, MVT::v8f32, MVT::v9i32, MVT::v9f32, MVT::v10i32, + MVT::v10f32, MVT::v11i32, MVT::v11f32, MVT::v12i32, MVT::v12f32, + MVT::v16i32, MVT::v16f32, MVT::v2i64, MVT::v2f64, MVT::v4i16, + MVT::v4f16, MVT::v4bf16, MVT::v3i64, MVT::v3f64, MVT::v6i32, + MVT::v6f32, MVT::v4i64, MVT::v4f64, MVT::v8i64, MVT::v8f64, + MVT::v8i16, MVT::v8f16, MVT::v8bf16, MVT::v16i16, MVT::v16f16, + MVT::v16bf16, MVT::v16i64, MVT::v16f64, MVT::v32i32, MVT::v32f32, + MVT::v32i16, MVT::v32f16, MVT::v32bf16}) { for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op) { switch (Op) { case ISD::LOAD: @@ -383,13 +426,14 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, {MVT::v8i32, MVT::v8f32, MVT::v16i32, MVT::v16f32}, Expand); - setOperationAction(ISD::BUILD_VECTOR, {MVT::v4f16, MVT::v4i16}, Custom); + setOperationAction(ISD::BUILD_VECTOR, {MVT::v4f16, MVT::v4i16, MVT::v4bf16}, + Custom); // Avoid stack access for these. // TODO: Generalize to more vector types. setOperationAction({ISD::EXTRACT_VECTOR_ELT, ISD::INSERT_VECTOR_ELT}, - {MVT::v2i16, MVT::v2f16, MVT::v2i8, MVT::v4i8, MVT::v8i8, - MVT::v4i16, MVT::v4f16}, + {MVT::v2i16, MVT::v2f16, MVT::v2bf16, MVT::v2i8, MVT::v4i8, + MVT::v8i8, MVT::v4i16, MVT::v4f16, MVT::v4bf16}, Custom); // Deal with vec3 vector operations when widened to vec4. @@ -498,6 +542,11 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, setOperationAction(ISD::BF16_TO_FP, {MVT::i16, MVT::f32, MVT::f64}, Expand); setOperationAction(ISD::FP_TO_BF16, {MVT::i16, MVT::f32, MVT::f64}, Expand); + // Custom lower these because we can't specify a rule based on an illegal + // source bf16. + setOperationAction({ISD::FP_EXTEND, ISD::STRICT_FP_EXTEND}, MVT::f32, Custom); + setOperationAction({ISD::FP_EXTEND, ISD::STRICT_FP_EXTEND}, MVT::f64, Custom); + if (Subtarget->has16BitInsts()) { setOperationAction({ISD::Constant, ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX, ISD::UADDSAT, ISD::USUBSAT}, @@ -524,9 +573,14 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, AddPromotedToType(ISD::FP_TO_FP16, MVT::i16, MVT::i32); setOperationAction({ISD::FP_TO_SINT, ISD::FP_TO_UINT}, MVT::i16, Custom); + setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP}, MVT::i16, Custom); + setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP}, MVT::i16, Custom); + + setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP}, MVT::i32, Custom); // F16 - Constant Actions. setOperationAction(ISD::ConstantFP, MVT::f16, Legal); + setOperationAction(ISD::ConstantFP, MVT::bf16, Legal); // F16 - Load/Store Actions. setOperationAction(ISD::LOAD, MVT::f16, Promote); @@ -534,16 +588,23 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, setOperationAction(ISD::STORE, MVT::f16, Promote); AddPromotedToType(ISD::STORE, MVT::f16, MVT::i16); + // BF16 - Load/Store Actions. + setOperationAction(ISD::LOAD, MVT::bf16, Promote); + AddPromotedToType(ISD::LOAD, MVT::bf16, MVT::i16); + setOperationAction(ISD::STORE, MVT::bf16, Promote); + AddPromotedToType(ISD::STORE, MVT::bf16, MVT::i16); + // F16 - VOP1 Actions. setOperationAction({ISD::FP_ROUND, ISD::STRICT_FP_ROUND, ISD::FCOS, ISD::FSIN, ISD::FROUND, ISD::FPTRUNC_ROUND}, MVT::f16, Custom); - setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP}, MVT::i16, Custom); setOperationAction({ISD::FP_TO_SINT, ISD::FP_TO_UINT}, MVT::f16, Promote); + setOperationAction({ISD::FP_TO_SINT, ISD::FP_TO_UINT}, MVT::bf16, Promote); // F16 - VOP2 Actions. - setOperationAction({ISD::BR_CC, ISD::SELECT_CC}, MVT::f16, Expand); + setOperationAction({ISD::BR_CC, ISD::SELECT_CC}, {MVT::f16, MVT::bf16}, + Expand); setOperationAction({ISD::FLDEXP, ISD::STRICT_FLDEXP}, MVT::f16, Custom); setOperationAction(ISD::FFREXP, MVT::f16, Custom); setOperationAction(ISD::FDIV, MVT::f16, Custom); @@ -554,8 +615,9 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, setOperationAction(ISD::FMAD, MVT::f16, Legal); for (MVT VT : - {MVT::v2i16, MVT::v2f16, MVT::v4i16, MVT::v4f16, MVT::v8i16, - MVT::v8f16, MVT::v16i16, MVT::v16f16, MVT::v32i16, MVT::v32f16}) { + {MVT::v2i16, MVT::v2f16, MVT::v2bf16, MVT::v4i16, MVT::v4f16, + MVT::v4bf16, MVT::v8i16, MVT::v8f16, MVT::v8bf16, MVT::v16i16, + MVT::v16f16, MVT::v16bf16, MVT::v32i16, MVT::v32f16}) { for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op) { switch (Op) { case ISD::LOAD: @@ -587,7 +649,8 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, // XXX - Do these do anything? Vector constants turn into build_vector. setOperationAction(ISD::Constant, {MVT::v2i16, MVT::v2f16}, Legal); - setOperationAction(ISD::UNDEF, {MVT::v2i16, MVT::v2f16}, Legal); + setOperationAction(ISD::UNDEF, {MVT::v2i16, MVT::v2f16, MVT::v2bf16}, + Legal); setOperationAction(ISD::STORE, MVT::v2i16, Promote); AddPromotedToType(ISD::STORE, MVT::v2i16, MVT::i32); @@ -610,16 +673,22 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, AddPromotedToType(ISD::LOAD, MVT::v4i16, MVT::v2i32); setOperationAction(ISD::LOAD, MVT::v4f16, Promote); AddPromotedToType(ISD::LOAD, MVT::v4f16, MVT::v2i32); + setOperationAction(ISD::LOAD, MVT::v4bf16, Promote); + AddPromotedToType(ISD::LOAD, MVT::v4bf16, MVT::v2i32); setOperationAction(ISD::STORE, MVT::v4i16, Promote); AddPromotedToType(ISD::STORE, MVT::v4i16, MVT::v2i32); setOperationAction(ISD::STORE, MVT::v4f16, Promote); AddPromotedToType(ISD::STORE, MVT::v4f16, MVT::v2i32); + setOperationAction(ISD::STORE, MVT::v4bf16, Promote); + AddPromotedToType(ISD::STORE, MVT::v4bf16, MVT::v2i32); setOperationAction(ISD::LOAD, MVT::v8i16, Promote); AddPromotedToType(ISD::LOAD, MVT::v8i16, MVT::v4i32); setOperationAction(ISD::LOAD, MVT::v8f16, Promote); AddPromotedToType(ISD::LOAD, MVT::v8f16, MVT::v4i32); + setOperationAction(ISD::LOAD, MVT::v8bf16, Promote); + AddPromotedToType(ISD::LOAD, MVT::v8bf16, MVT::v4i32); setOperationAction(ISD::STORE, MVT::v4i16, Promote); AddPromotedToType(ISD::STORE, MVT::v4i16, MVT::v2i32); @@ -630,26 +699,36 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, AddPromotedToType(ISD::STORE, MVT::v8i16, MVT::v4i32); setOperationAction(ISD::STORE, MVT::v8f16, Promote); AddPromotedToType(ISD::STORE, MVT::v8f16, MVT::v4i32); + setOperationAction(ISD::STORE, MVT::v8bf16, Promote); + AddPromotedToType(ISD::STORE, MVT::v8bf16, MVT::v4i32); setOperationAction(ISD::LOAD, MVT::v16i16, Promote); AddPromotedToType(ISD::LOAD, MVT::v16i16, MVT::v8i32); setOperationAction(ISD::LOAD, MVT::v16f16, Promote); AddPromotedToType(ISD::LOAD, MVT::v16f16, MVT::v8i32); + setOperationAction(ISD::LOAD, MVT::v16bf16, Promote); + AddPromotedToType(ISD::LOAD, MVT::v16bf16, MVT::v8i32); setOperationAction(ISD::STORE, MVT::v16i16, Promote); AddPromotedToType(ISD::STORE, MVT::v16i16, MVT::v8i32); setOperationAction(ISD::STORE, MVT::v16f16, Promote); AddPromotedToType(ISD::STORE, MVT::v16f16, MVT::v8i32); + setOperationAction(ISD::STORE, MVT::v16bf16, Promote); + AddPromotedToType(ISD::STORE, MVT::v16bf16, MVT::v8i32); setOperationAction(ISD::LOAD, MVT::v32i16, Promote); AddPromotedToType(ISD::LOAD, MVT::v32i16, MVT::v16i32); setOperationAction(ISD::LOAD, MVT::v32f16, Promote); AddPromotedToType(ISD::LOAD, MVT::v32f16, MVT::v16i32); + setOperationAction(ISD::LOAD, MVT::v32bf16, Promote); + AddPromotedToType(ISD::LOAD, MVT::v32bf16, MVT::v16i32); setOperationAction(ISD::STORE, MVT::v32i16, Promote); AddPromotedToType(ISD::STORE, MVT::v32i16, MVT::v16i32); setOperationAction(ISD::STORE, MVT::v32f16, Promote); AddPromotedToType(ISD::STORE, MVT::v32f16, MVT::v16i32); + setOperationAction(ISD::STORE, MVT::v32bf16, Promote); + AddPromotedToType(ISD::STORE, MVT::v32bf16, MVT::v16i32); setOperationAction({ISD::ANY_EXTEND, ISD::ZERO_EXTEND, ISD::SIGN_EXTEND}, MVT::v2i32, Expand); @@ -662,7 +741,8 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, MVT::v8i32, Expand); if (!Subtarget->hasVOP3PInsts()) - setOperationAction(ISD::BUILD_VECTOR, {MVT::v2i16, MVT::v2f16}, Custom); + setOperationAction(ISD::BUILD_VECTOR, + {MVT::v2i16, MVT::v2f16, MVT::v2bf16}, Custom); setOperationAction(ISD::FNEG, MVT::v2f16, Legal); // This isn't really legal, but this avoids the legalizer unrolling it (and @@ -680,8 +760,9 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, {MVT::v4f16, MVT::v8f16, MVT::v16f16, MVT::v32f16}, Expand); - for (MVT Vec16 : {MVT::v8i16, MVT::v8f16, MVT::v16i16, MVT::v16f16, - MVT::v32i16, MVT::v32f16}) { + for (MVT Vec16 : + {MVT::v8i16, MVT::v8f16, MVT::v8bf16, MVT::v16i16, MVT::v16f16, + MVT::v16bf16, MVT::v32i16, MVT::v32f16, MVT::v32bf16}) { setOperationAction( {ISD::BUILD_VECTOR, ISD::EXTRACT_VECTOR_ELT, ISD::SCALAR_TO_VECTOR}, Vec16, Custom); @@ -699,7 +780,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, ISD::FMAXNUM_IEEE, ISD::FCANONICALIZE}, MVT::v2f16, Legal); - setOperationAction(ISD::EXTRACT_VECTOR_ELT, {MVT::v2i16, MVT::v2f16}, + setOperationAction(ISD::EXTRACT_VECTOR_ELT, {MVT::v2i16, MVT::v2f16, MVT::v2bf16}, Custom); setOperationAction(ISD::VECTOR_SHUFFLE, @@ -724,7 +805,8 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, Custom); setOperationAction(ISD::FEXP, MVT::v2f16, Custom); - setOperationAction(ISD::SELECT, {MVT::v4i16, MVT::v4f16}, Custom); + setOperationAction(ISD::SELECT, {MVT::v4i16, MVT::v4f16, MVT::v4bf16}, + Custom); if (Subtarget->hasPackedFP32Ops()) { setOperationAction({ISD::FADD, ISD::FMUL, ISD::FMA, ISD::FNEG}, @@ -750,13 +832,17 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, } setOperationAction(ISD::SELECT, - {MVT::v4i16, MVT::v4f16, MVT::v2i8, MVT::v4i8, MVT::v8i8, - MVT::v8i16, MVT::v8f16, MVT::v16i16, MVT::v16f16, - MVT::v32i16, MVT::v32f16}, + {MVT::v4i16, MVT::v4f16, MVT::v4bf16, MVT::v2i8, MVT::v4i8, + MVT::v8i8, MVT::v8i16, MVT::v8f16, MVT::v8bf16, + MVT::v16i16, MVT::v16f16, MVT::v16bf16, MVT::v32i16, + MVT::v32f16, MVT::v32bf16}, Custom); setOperationAction({ISD::SMULO, ISD::UMULO}, MVT::i64, Custom); + if (Subtarget->hasScalarSMulU64()) + setOperationAction(ISD::MUL, MVT::i64, Custom); + if (Subtarget->hasMad64_32()) setOperationAction({ISD::SMUL_LOHI, ISD::UMUL_LOHI}, MVT::i32, Custom); @@ -3902,6 +3988,26 @@ SDValue SITargetLowering::lowerPREFETCH(SDValue Op, SelectionDAG &DAG) const { return Op; } +// Work around DAG legality rules only based on the result type. +SDValue SITargetLowering::lowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const { + bool IsStrict = Op.getOpcode() == ISD::STRICT_FP_EXTEND; + SDValue Src = Op.getOperand(IsStrict ? 1 : 0); + EVT SrcVT = Src.getValueType(); + + if (SrcVT.getScalarType() != MVT::bf16) + return Op; + + SDLoc SL(Op); + SDValue BitCast = + DAG.getNode(ISD::BITCAST, SL, SrcVT.changeTypeToInteger(), Src); + + EVT DstVT = Op.getValueType(); + if (IsStrict) + llvm_unreachable("Need STRICT_BF16_TO_FP"); + + return DAG.getNode(ISD::BF16_TO_FP, SL, DstVT, BitCast); +} + Register SITargetLowering::getRegisterByName(const char* RegName, LLT VT, const MachineFunction &MF) const { Register Reg = StringSwitch<Register>(RegName) @@ -4825,6 +4931,48 @@ MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter( MI.eraseFromParent(); return BB; } + case AMDGPU::GET_SHADERCYCLESHILO: { + assert(MF->getSubtarget<GCNSubtarget>().hasShaderCyclesHiLoRegisters()); + MachineRegisterInfo &MRI = MF->getRegInfo(); + const DebugLoc &DL = MI.getDebugLoc(); + // The algorithm is: + // + // hi1 = getreg(SHADER_CYCLES_HI) + // lo1 = getreg(SHADER_CYCLES_LO) + // hi2 = getreg(SHADER_CYCLES_HI) + // + // If hi1 == hi2 then there was no overflow and the result is hi2:lo1. + // Otherwise there was overflow and the result is hi2:0. In both cases the + // result should represent the actual time at some point during the sequence + // of three getregs. + Register RegHi1 = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); + BuildMI(*BB, MI, DL, TII->get(AMDGPU::S_GETREG_B32), RegHi1) + .addImm(AMDGPU::Hwreg::encodeHwreg(AMDGPU::Hwreg::ID_SHADER_CYCLES_HI, + 0, 32)); + Register RegLo1 = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); + BuildMI(*BB, MI, DL, TII->get(AMDGPU::S_GETREG_B32), RegLo1) + .addImm( + AMDGPU::Hwreg::encodeHwreg(AMDGPU::Hwreg::ID_SHADER_CYCLES, 0, 32)); + Register RegHi2 = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); + BuildMI(*BB, MI, DL, TII->get(AMDGPU::S_GETREG_B32), RegHi2) + .addImm(AMDGPU::Hwreg::encodeHwreg(AMDGPU::Hwreg::ID_SHADER_CYCLES_HI, + 0, 32)); + BuildMI(*BB, MI, DL, TII->get(AMDGPU::S_CMP_EQ_U32)) + .addReg(RegHi1) + .addReg(RegHi2); + Register RegLo = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); + BuildMI(*BB, MI, DL, TII->get(AMDGPU::S_CSELECT_B32), RegLo) + .addReg(RegLo1) + .addImm(0); + BuildMI(*BB, MI, DL, TII->get(AMDGPU::REG_SEQUENCE)) + .add(MI.getOperand(0)) + .addReg(RegLo) + .addImm(AMDGPU::sub0) + .addReg(RegHi2) + .addImm(AMDGPU::sub1); + MI.eraseFromParent(); + return BB; + } case AMDGPU::SI_INDIRECT_SRC_V1: case AMDGPU::SI_INDIRECT_SRC_V2: case AMDGPU::SI_INDIRECT_SRC_V4: @@ -5305,7 +5453,9 @@ SDValue SITargetLowering::splitTernaryVectorOp(SDValue Op, assert(VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v4f32 || VT == MVT::v16i16 || VT == MVT::v16f16 || VT == MVT::v8f32 || VT == MVT::v16f32 || - VT == MVT::v32f32 || VT == MVT::v32f16 || VT == MVT::v32i16); + VT == MVT::v32f32 || VT == MVT::v32f16 || VT == MVT::v32i16 || + VT == MVT::v4bf16 || VT == MVT::v8bf16 || VT == MVT::v16bf16 || + VT == MVT::v32bf16); SDValue Lo0, Hi0; SDValue Op0 = Op.getOperand(0); @@ -5424,7 +5574,6 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::SRL: case ISD::ADD: case ISD::SUB: - case ISD::MUL: case ISD::SMIN: case ISD::SMAX: case ISD::UMIN: @@ -5438,6 +5587,8 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::SADDSAT: case ISD::SSUBSAT: return splitBinaryVectorOp(Op, DAG); + case ISD::MUL: + return lowerMUL(Op, DAG); case ISD::SMULO: case ISD::UMULO: return lowerXMULO(Op, DAG); @@ -5452,6 +5603,9 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { return lowerGET_ROUNDING(Op, DAG); case ISD::PREFETCH: return lowerPREFETCH(Op, DAG); + case ISD::FP_EXTEND: + case ISD::STRICT_FP_EXTEND: + return lowerFP_EXTEND(Op, DAG); } return SDValue(); } @@ -6090,6 +6244,66 @@ SDValue SITargetLowering::lowerFLDEXP(SDValue Op, SelectionDAG &DAG) const { return DAG.getNode(ISD::FLDEXP, DL, VT, Op.getOperand(0), TruncExp); } +// Custom lowering for vector multiplications and s_mul_u64. +SDValue SITargetLowering::lowerMUL(SDValue Op, SelectionDAG &DAG) const { + EVT VT = Op.getValueType(); + + // Split vector operands. + if (VT.isVector()) + return splitBinaryVectorOp(Op, DAG); + + assert(VT == MVT::i64 && "The following code is a special for s_mul_u64"); + + // There are four ways to lower s_mul_u64: + // + // 1. If all the operands are uniform, then we lower it as it is. + // + // 2. If the operands are divergent, then we have to split s_mul_u64 in 32-bit + // multiplications because there is not a vector equivalent of s_mul_u64. + // + // 3. If the cost model decides that it is more efficient to use vector + // registers, then we have to split s_mul_u64 in 32-bit multiplications. + // This happens in splitScalarSMULU64() in SIInstrInfo.cpp . + // + // 4. If the cost model decides to use vector registers and both of the + // operands are zero-extended/sign-extended from 32-bits, then we split the + // s_mul_u64 in two 32-bit multiplications. The problem is that it is not + // possible to check if the operands are zero-extended or sign-extended in + // SIInstrInfo.cpp. For this reason, here, we replace s_mul_u64 with + // s_mul_u64_u32_pseudo if both operands are zero-extended and we replace + // s_mul_u64 with s_mul_i64_i32_pseudo if both operands are sign-extended. + // If the cost model decides that we have to use vector registers, then + // splitScalarSMulPseudo() (in SIInstrInfo.cpp) split s_mul_u64_u32/ + // s_mul_i64_i32_pseudo in two vector multiplications. If the cost model + // decides that we should use scalar registers, then s_mul_u64_u32_pseudo/ + // s_mul_i64_i32_pseudo is lowered as s_mul_u64 in expandPostRAPseudo() in + // SIInstrInfo.cpp . + + if (Op->isDivergent()) + return SDValue(); + + SDValue Op0 = Op.getOperand(0); + SDValue Op1 = Op.getOperand(1); + // If all the operands are zero-enteted to 32-bits, then we replace s_mul_u64 + // with s_mul_u64_u32_pseudo. If all the operands are sign-extended to + // 32-bits, then we replace s_mul_u64 with s_mul_i64_i32_pseudo. + KnownBits Op0KnownBits = DAG.computeKnownBits(Op0); + unsigned Op0LeadingZeros = Op0KnownBits.countMinLeadingZeros(); + KnownBits Op1KnownBits = DAG.computeKnownBits(Op1); + unsigned Op1LeadingZeros = Op1KnownBits.countMinLeadingZeros(); + SDLoc SL(Op); + if (Op0LeadingZeros >= 32 && Op1LeadingZeros >= 32) + return SDValue( + DAG.getMachineNode(AMDGPU::S_MUL_U64_U32_PSEUDO, SL, VT, Op0, Op1), 0); + unsigned Op0SignBits = DAG.ComputeNumSignBits(Op0); + unsigned Op1SignBits = DAG.ComputeNumSignBits(Op1); + if (Op0SignBits >= 33 && Op1SignBits >= 33) + return SDValue( + DAG.getMachineNode(AMDGPU::S_MUL_I64_I32_PSEUDO, SL, VT, Op0, Op1), 0); + // If all the operands are uniform, then we lower s_mul_u64 as it is. + return Op; +} + SDValue SITargetLowering::lowerXMULO(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); SDLoc SL(Op); @@ -6424,7 +6638,7 @@ SDValue SITargetLowering::lowerINSERT_SUBVECTOR(SDValue Op, EVT InsVT = Ins.getValueType(); EVT EltVT = VecVT.getVectorElementType(); unsigned InsNumElts = InsVT.getVectorNumElements(); - unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue(); + unsigned IdxVal = Idx->getAsZExtVal(); SDLoc SL(Op); if (EltVT.getScalarSizeInBits() == 16 && IdxVal % 2 == 0) { @@ -6639,7 +6853,7 @@ SDValue SITargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op, SDValue BC = DAG.getNode(ISD::BITCAST, SL, IntVT, Vec); SDValue Elt = DAG.getNode(ISD::SRL, SL, IntVT, BC, ScaledIdx); - if (ResultVT == MVT::f16) { + if (ResultVT == MVT::f16 || ResultVT == MVT::bf16) { SDValue Result = DAG.getNode(ISD::TRUNCATE, SL, MVT::i16, Elt); return DAG.getNode(ISD::BITCAST, SL, ResultVT, Result); } @@ -6725,8 +6939,8 @@ SDValue SITargetLowering::lowerBUILD_VECTOR(SDValue Op, SDLoc SL(Op); EVT VT = Op.getValueType(); - if (VT == MVT::v4i16 || VT == MVT::v4f16 || - VT == MVT::v8i16 || VT == MVT::v8f16) { + if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v8i16 || + VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { EVT HalfVT = MVT::getVectorVT(VT.getVectorElementType().getSimpleVT(), VT.getVectorNumElements() / 2); MVT HalfIntVT = MVT::getIntegerVT(HalfVT.getSizeInBits()); @@ -6749,7 +6963,7 @@ SDValue SITargetLowering::lowerBUILD_VECTOR(SDValue Op, return DAG.getNode(ISD::BITCAST, SL, VT, Blend); } - if (VT == MVT::v16i16 || VT == MVT::v16f16) { + if (VT == MVT::v16i16 || VT == MVT::v16f16 || VT == MVT::v16bf16) { EVT QuarterVT = MVT::getVectorVT(VT.getVectorElementType().getSimpleVT(), VT.getVectorNumElements() / 4); MVT QuarterIntVT = MVT::getIntegerVT(QuarterVT.getSizeInBits()); @@ -6770,7 +6984,7 @@ SDValue SITargetLowering::lowerBUILD_VECTOR(SDValue Op, return DAG.getNode(ISD::BITCAST, SL, VT, Blend); } - if (VT == MVT::v32i16 || VT == MVT::v32f16) { + if (VT == MVT::v32i16 || VT == MVT::v32f16 || VT == MVT::v32bf16) { EVT QuarterVT = MVT::getVectorVT(VT.getVectorElementType().getSimpleVT(), VT.getVectorNumElements() / 8); MVT QuarterIntVT = MVT::getIntegerVT(QuarterVT.getSizeInBits()); @@ -6791,7 +7005,7 @@ SDValue SITargetLowering::lowerBUILD_VECTOR(SDValue Op, return DAG.getNode(ISD::BITCAST, SL, VT, Blend); } - assert(VT == MVT::v2f16 || VT == MVT::v2i16); + assert(VT == MVT::v2f16 || VT == MVT::v2i16 || VT == MVT::v2bf16); assert(!Subtarget->hasVOP3PInsts() && "this should be legal"); SDValue Lo = Op.getOperand(0); @@ -6890,6 +7104,7 @@ SDValue SITargetLowering::LowerGlobalAddress(AMDGPUMachineFunction *MFI, // Adjust alignment for that dynamic shared memory array. Function &F = DAG.getMachineFunction().getFunction(); MFI->setDynLDSAlign(F, *cast<GlobalVariable>(GV)); + MFI->setUsesDynamicLDS(true); return SDValue( DAG.getMachineNode(AMDGPU::GET_GROUPSTATICSIZE, DL, PtrVT), 0); } @@ -7453,7 +7668,7 @@ SDValue SITargetLowering::lowerImage(SDValue Op, Ops.push_back(IsA16 ? True : False); if (!Subtarget->hasGFX90AInsts()) { Ops.push_back(TFE); //tfe - } else if (cast<ConstantSDNode>(TFE)->getZExtValue()) { + } else if (TFE->getAsZExtVal()) { report_fatal_error("TFE is not supported on this GPU"); } if (!IsGFX12Plus || BaseOpcode->Sampler || BaseOpcode->MSAA) @@ -7590,7 +7805,7 @@ SDValue SITargetLowering::lowerSBuffer(EVT VT, SDLoc DL, SDValue Rsrc, setBufferOffsets(Offset, DAG, &Ops[3], NumLoads > 1 ? Align(16 * NumLoads) : Align(4)); - uint64_t InstOffset = cast<ConstantSDNode>(Ops[5])->getZExtValue(); + uint64_t InstOffset = Ops[5]->getAsZExtVal(); for (unsigned i = 0; i < NumLoads; ++i) { Ops[5] = DAG.getTargetConstant(InstOffset + 16 * i, DL, MVT::i32); Loads.push_back(getMemIntrinsicNode(AMDGPUISD::BUFFER_LOAD, DL, VTList, Ops, @@ -14052,11 +14267,11 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N, EVT VT = N->getValueType(0); // v2i16 (scalar_to_vector i16:x) -> v2i16 (bitcast (any_extend i16:x)) - if (VT == MVT::v2i16 || VT == MVT::v2f16) { + if (VT == MVT::v2i16 || VT == MVT::v2f16 || VT == MVT::v2f16) { SDLoc SL(N); SDValue Src = N->getOperand(0); EVT EltVT = Src.getValueType(); - if (EltVT == MVT::f16) + if (EltVT != MVT::i16) Src = DAG.getNode(ISD::BITCAST, SL, MVT::i16, Src); SDValue Ext = DAG.getNode(ISD::ANY_EXTEND, SL, MVT::i32, Src); diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h index 5bc091d6e84d..92b38ebade62 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.h +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h @@ -146,6 +146,7 @@ private: SDValue lowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const; SDValue lowerFMINNUM_FMAXNUM(SDValue Op, SelectionDAG &DAG) const; SDValue lowerFLDEXP(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerMUL(SDValue Op, SelectionDAG &DAG) const; SDValue lowerXMULO(SDValue Op, SelectionDAG &DAG) const; SDValue lowerXMUL_LOHI(SDValue Op, SelectionDAG &DAG) const; @@ -417,6 +418,7 @@ public: SDValue lowerGET_ROUNDING(SDValue Op, SelectionDAG &DAG) const; SDValue lowerPREFETCH(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const; Register getRegisterByName(const char* RegName, LLT VT, const MachineFunction &MF) const override; diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp index 55ddb540c51e..1cb1d32707f2 100644 --- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp +++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp @@ -1424,6 +1424,12 @@ bool SIInsertWaitcnts::mayAccessScratchThroughFlat( }); } +static bool isCacheInvOrWBInst(MachineInstr &Inst) { + auto Opc = Inst.getOpcode(); + return Opc == AMDGPU::GLOBAL_INV || Opc == AMDGPU::GLOBAL_WB || + Opc == AMDGPU::GLOBAL_WBINV; +} + void SIInsertWaitcnts::updateEventWaitcntAfter(MachineInstr &Inst, WaitcntBrackets *ScoreBrackets) { // Now look at the instruction opcode. If it is a memory access @@ -1439,6 +1445,10 @@ void SIInsertWaitcnts::updateEventWaitcntAfter(MachineInstr &Inst, ScoreBrackets->updateByEvent(TII, TRI, MRI, LDS_ACCESS, Inst); } } else if (TII->isFLAT(Inst)) { + // TODO: Track this properly. + if (isCacheInvOrWBInst(Inst)) + return; + assert(Inst.mayLoadOrStore()); int FlatASCount = 0; diff --git a/llvm/lib/Target/AMDGPU/SIInstrFormats.td b/llvm/lib/Target/AMDGPU/SIInstrFormats.td index 585a3eb78618..1b66d163714f 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrFormats.td +++ b/llvm/lib/Target/AMDGPU/SIInstrFormats.td @@ -91,7 +91,7 @@ class InstSI <dag outs, dag ins, string asm = "", field bit VOP3_OPSEL = 0; // Is it possible for this instruction to be atomic? - field bit maybeAtomic = 0; + field bit maybeAtomic = 1; // This bit indicates that this is a VI instruction which is renamed // in GFX9. Required for correct mapping from pseudo to MC. diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 396d22c7ec18..fee900b3efb2 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -338,8 +338,8 @@ bool SIInstrInfo::areLoadsFromSameBasePtr(SDNode *Load0, SDNode *Load1, if (!isa<ConstantSDNode>(Off0) || !isa<ConstantSDNode>(Off1)) return false; - Offset0 = cast<ConstantSDNode>(Off0)->getZExtValue(); - Offset1 = cast<ConstantSDNode>(Off1)->getZExtValue(); + Offset0 = Off0->getAsZExtVal(); + Offset1 = Off1->getAsZExtVal(); return true; } @@ -2475,6 +2475,11 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { MI.eraseFromParent(); break; } + + case AMDGPU::S_MUL_U64_U32_PSEUDO: + case AMDGPU::S_MUL_I64_I32_PSEUDO: + MI.setDesc(get(AMDGPU::S_MUL_U64)); + break; } return true; } @@ -4153,15 +4158,15 @@ bool SIInstrInfo::isInlineConstant(const MachineOperand &MO, case AMDGPU::OPERAND_REG_IMM_V2INT16: case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: - return (isInt<16>(Imm) || isUInt<16>(Imm)) && - AMDGPU::isInlinableIntLiteral((int16_t)Imm); + return AMDGPU::isInlinableLiteralV2I16(Imm); + case AMDGPU::OPERAND_REG_IMM_V2FP16: + case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: + case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: + return AMDGPU::isInlinableLiteralV2F16(Imm); case AMDGPU::OPERAND_REG_IMM_FP16: case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: case AMDGPU::OPERAND_REG_INLINE_C_FP16: - case AMDGPU::OPERAND_REG_INLINE_AC_FP16: - case AMDGPU::OPERAND_REG_IMM_V2FP16: - case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: - case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: { + case AMDGPU::OPERAND_REG_INLINE_AC_FP16: { if (isInt<16>(Imm) || isUInt<16>(Imm)) { // A few special case instructions have 16-bit operands on subtargets // where 16-bit instructions are not legal. @@ -6845,6 +6850,21 @@ void SIInstrInfo::moveToVALUImpl(SIInstrWorklist &Worklist, // Default handling break; } + + case AMDGPU::S_MUL_U64: + // Split s_mul_u64 in 32-bit vector multiplications. + splitScalarSMulU64(Worklist, Inst, MDT); + Inst.eraseFromParent(); + return; + + case AMDGPU::S_MUL_U64_U32_PSEUDO: + case AMDGPU::S_MUL_I64_I32_PSEUDO: + // This is a special case of s_mul_u64 where all the operands are either + // zero extended or sign extended. + splitScalarSMulPseudo(Worklist, Inst, MDT); + Inst.eraseFromParent(); + return; + case AMDGPU::S_AND_B64: splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_AND_B32, MDT); Inst.eraseFromParent(); @@ -7654,6 +7674,180 @@ void SIInstrInfo::splitScalar64BitUnaryOp(SIInstrWorklist &Worklist, addUsersToMoveToVALUWorklist(FullDestReg, MRI, Worklist); } +// There is not a vector equivalent of s_mul_u64. For this reason, we need to +// split the s_mul_u64 in 32-bit vector multiplications. +void SIInstrInfo::splitScalarSMulU64(SIInstrWorklist &Worklist, + MachineInstr &Inst, + MachineDominatorTree *MDT) const { + MachineBasicBlock &MBB = *Inst.getParent(); + MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); + + Register FullDestReg = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass); + Register DestSub0 = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + Register DestSub1 = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + + MachineOperand &Dest = Inst.getOperand(0); + MachineOperand &Src0 = Inst.getOperand(1); + MachineOperand &Src1 = Inst.getOperand(2); + const DebugLoc &DL = Inst.getDebugLoc(); + MachineBasicBlock::iterator MII = Inst; + + const TargetRegisterClass *Src0RC = MRI.getRegClass(Src0.getReg()); + const TargetRegisterClass *Src1RC = MRI.getRegClass(Src1.getReg()); + const TargetRegisterClass *Src0SubRC = + RI.getSubRegisterClass(Src0RC, AMDGPU::sub0); + if (RI.isSGPRClass(Src0SubRC)) + Src0SubRC = RI.getEquivalentVGPRClass(Src0SubRC); + const TargetRegisterClass *Src1SubRC = + RI.getSubRegisterClass(Src1RC, AMDGPU::sub0); + if (RI.isSGPRClass(Src1SubRC)) + Src1SubRC = RI.getEquivalentVGPRClass(Src1SubRC); + + // First, we extract the low 32-bit and high 32-bit values from each of the + // operands. + MachineOperand Op0L = + buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC, AMDGPU::sub0, Src0SubRC); + MachineOperand Op1L = + buildExtractSubRegOrImm(MII, MRI, Src1, Src1RC, AMDGPU::sub0, Src1SubRC); + MachineOperand Op0H = + buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC, AMDGPU::sub1, Src0SubRC); + MachineOperand Op1H = + buildExtractSubRegOrImm(MII, MRI, Src1, Src1RC, AMDGPU::sub1, Src1SubRC); + + // The multilication is done as follows: + // + // Op1H Op1L + // * Op0H Op0L + // -------------------- + // Op1H*Op0L Op1L*Op0L + // + Op1H*Op0H Op1L*Op0H + // ----------------------------------------- + // (Op1H*Op0L + Op1L*Op0H + carry) Op1L*Op0L + // + // We drop Op1H*Op0H because the result of the multiplication is a 64-bit + // value and that would overflow. + // The low 32-bit value is Op1L*Op0L. + // The high 32-bit value is Op1H*Op0L + Op1L*Op0H + carry (from Op1L*Op0L). + + Register Op1L_Op0H_Reg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + MachineInstr *Op1L_Op0H = + BuildMI(MBB, MII, DL, get(AMDGPU::V_MUL_LO_U32_e64), Op1L_Op0H_Reg) + .add(Op1L) + .add(Op0H); + + Register Op1H_Op0L_Reg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + MachineInstr *Op1H_Op0L = + BuildMI(MBB, MII, DL, get(AMDGPU::V_MUL_LO_U32_e64), Op1H_Op0L_Reg) + .add(Op1H) + .add(Op0L); + + Register CarryReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + MachineInstr *Carry = + BuildMI(MBB, MII, DL, get(AMDGPU::V_MUL_HI_U32_e64), CarryReg) + .add(Op1L) + .add(Op0L); + + MachineInstr *LoHalf = + BuildMI(MBB, MII, DL, get(AMDGPU::V_MUL_LO_U32_e64), DestSub0) + .add(Op1L) + .add(Op0L); + + Register AddReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + MachineInstr *Add = BuildMI(MBB, MII, DL, get(AMDGPU::V_ADD_U32_e32), AddReg) + .addReg(Op1L_Op0H_Reg) + .addReg(Op1H_Op0L_Reg); + + MachineInstr *HiHalf = + BuildMI(MBB, MII, DL, get(AMDGPU::V_ADD_U32_e32), DestSub1) + .addReg(AddReg) + .addReg(CarryReg); + + BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), FullDestReg) + .addReg(DestSub0) + .addImm(AMDGPU::sub0) + .addReg(DestSub1) + .addImm(AMDGPU::sub1); + + MRI.replaceRegWith(Dest.getReg(), FullDestReg); + + // Try to legalize the operands in case we need to swap the order to keep it + // valid. + legalizeOperands(*Op1L_Op0H, MDT); + legalizeOperands(*Op1H_Op0L, MDT); + legalizeOperands(*Carry, MDT); + legalizeOperands(*LoHalf, MDT); + legalizeOperands(*Add, MDT); + legalizeOperands(*HiHalf, MDT); + + // Move all users of this moved value. + addUsersToMoveToVALUWorklist(FullDestReg, MRI, Worklist); +} + +// Lower S_MUL_U64_U32_PSEUDO/S_MUL_I64_I32_PSEUDO in two 32-bit vector +// multiplications. +void SIInstrInfo::splitScalarSMulPseudo(SIInstrWorklist &Worklist, + MachineInstr &Inst, + MachineDominatorTree *MDT) const { + MachineBasicBlock &MBB = *Inst.getParent(); + MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); + + Register FullDestReg = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass); + Register DestSub0 = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + Register DestSub1 = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + + MachineOperand &Dest = Inst.getOperand(0); + MachineOperand &Src0 = Inst.getOperand(1); + MachineOperand &Src1 = Inst.getOperand(2); + const DebugLoc &DL = Inst.getDebugLoc(); + MachineBasicBlock::iterator MII = Inst; + + const TargetRegisterClass *Src0RC = MRI.getRegClass(Src0.getReg()); + const TargetRegisterClass *Src1RC = MRI.getRegClass(Src1.getReg()); + const TargetRegisterClass *Src0SubRC = + RI.getSubRegisterClass(Src0RC, AMDGPU::sub0); + if (RI.isSGPRClass(Src0SubRC)) + Src0SubRC = RI.getEquivalentVGPRClass(Src0SubRC); + const TargetRegisterClass *Src1SubRC = + RI.getSubRegisterClass(Src1RC, AMDGPU::sub0); + if (RI.isSGPRClass(Src1SubRC)) + Src1SubRC = RI.getEquivalentVGPRClass(Src1SubRC); + + // First, we extract the low 32-bit and high 32-bit values from each of the + // operands. + MachineOperand Op0L = + buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC, AMDGPU::sub0, Src0SubRC); + MachineOperand Op1L = + buildExtractSubRegOrImm(MII, MRI, Src1, Src1RC, AMDGPU::sub0, Src1SubRC); + + unsigned Opc = Inst.getOpcode(); + unsigned NewOpc = Opc == AMDGPU::S_MUL_U64_U32_PSEUDO + ? AMDGPU::V_MUL_HI_U32_e64 + : AMDGPU::V_MUL_HI_I32_e64; + MachineInstr *HiHalf = + BuildMI(MBB, MII, DL, get(NewOpc), DestSub1).add(Op1L).add(Op0L); + + MachineInstr *LoHalf = + BuildMI(MBB, MII, DL, get(AMDGPU::V_MUL_LO_U32_e64), DestSub0) + .add(Op1L) + .add(Op0L); + + BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), FullDestReg) + .addReg(DestSub0) + .addImm(AMDGPU::sub0) + .addReg(DestSub1) + .addImm(AMDGPU::sub1); + + MRI.replaceRegWith(Dest.getReg(), FullDestReg); + + // Try to legalize the operands in case we need to swap the order to keep it + // valid. + legalizeOperands(*HiHalf, MDT); + legalizeOperands(*LoHalf, MDT); + + // Move all users of this moved value. + addUsersToMoveToVALUWorklist(FullDestReg, MRI, Worklist); +} + void SIInstrInfo::splitScalar64BitBinaryOp(SIInstrWorklist &Worklist, MachineInstr &Inst, unsigned Opcode, MachineDominatorTree *MDT) const { diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h index 46eee6fae0a5..37ee159362a2 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h @@ -138,6 +138,12 @@ private: unsigned Opcode, MachineDominatorTree *MDT = nullptr) const; + void splitScalarSMulU64(SIInstrWorklist &Worklist, MachineInstr &Inst, + MachineDominatorTree *MDT) const; + + void splitScalarSMulPseudo(SIInstrWorklist &Worklist, MachineInstr &Inst, + MachineDominatorTree *MDT) const; + void splitScalar64BitXnor(SIInstrWorklist &Worklist, MachineInstr &Inst, MachineDominatorTree *MDT = nullptr) const; diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td index 173c877b8d29..f07b8fa0ea4c 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -860,23 +860,6 @@ def ShiftAmt32Imm : ImmLeaf <i32, [{ return Imm < 32; }]>; -def getNegV2I16Imm : SDNodeXForm<build_vector, [{ - return SDValue(packNegConstantV2I16(N, *CurDAG), 0); -}]>; - -def NegSubInlineConstV216 : PatLeaf<(build_vector), [{ - assert(N->getNumOperands() == 2); - assert(N->getOperand(0).getValueType().getSizeInBits() == 16); - SDValue Src0 = N->getOperand(0); - SDValue Src1 = N->getOperand(1); - if (Src0 == Src1) - return isNegInlineImmediate(Src0.getNode()); - - return (isNullConstantOrUndef(Src0) && isNegInlineImmediate(Src1.getNode())) || - (isNullConstantOrUndef(Src1) && isNegInlineImmediate(Src0.getNode())); -}], getNegV2I16Imm>; - - def fp16_zeros_high_16bits : PatLeaf<(f16 VGPR_32:$src), [{ return fp16SrcZerosHighBits(N->getOpcode()); }]>; @@ -1144,6 +1127,8 @@ def exp_tgt : CustomOperand<i32, 0, "ExpTgt">; def wait_vdst : NamedIntOperand<i8, "wait_vdst", "WaitVDST">; def wait_exp : NamedIntOperand<i8, "wait_exp", "WaitEXP">; +def wait_va_vdst : NamedIntOperand<i8, "wait_va_vdst", "WaitVAVDst">; +def wait_va_vsrc : NamedIntOperand<i8, "wait_vm_vsrc", "WaitVMVSrc">; class KImmFPOperand<ValueType vt> : ImmOperand<vt> { let OperandNamespace = "AMDGPU"; diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index 8310c6b57dad..b4bd46d33c1f 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -30,7 +30,7 @@ include "SMInstructions.td" include "FLATInstructions.td" include "BUFInstructions.td" include "EXPInstructions.td" -include "LDSDIRInstructions.td" +include "DSDIRInstructions.td" include "VINTERPInstructions.td" //===----------------------------------------------------------------------===// @@ -111,7 +111,6 @@ def ATOMIC_FENCE : SPseudoInstSI< [(atomic_fence (i32 timm:$ordering), (i32 timm:$scope))], "ATOMIC_FENCE $ordering, $scope"> { let hasSideEffects = 1; - let maybeAtomic = 1; } let hasSideEffects = 0, mayLoad = 0, mayStore = 0, Uses = [EXEC] in { @@ -316,6 +315,12 @@ def S_USUBO_PSEUDO : SPseudoInstSI < (outs SReg_32:$sdst, SSrc_i1:$scc_out), (ins SSrc_b32:$src0, SSrc_b32:$src1) >; +let OtherPredicates = [HasShaderCyclesHiLoRegisters] in +def GET_SHADERCYCLESHILO : SPseudoInstSI< + (outs SReg_64:$sdst), (ins), + [(set SReg_64:$sdst, (i64 (readcyclecounter)))] +>; + } // End usesCustomInserter = 1, Defs = [SCC] let usesCustomInserter = 1 in { @@ -557,6 +562,7 @@ def SI_MASKED_UNREACHABLE : SPseudoInstSI <(outs), (ins), let hasNoSchedulingInfo = 1; let FixedSize = 1; let isMeta = 1; + let maybeAtomic = 0; } // Used as an isel pseudo to directly emit initialization with an @@ -1097,7 +1103,7 @@ def : Pat < multiclass f16_fp_Pats<Instruction cvt_f16_f32_inst_e64, Instruction cvt_f32_f16_inst_e64> { // f16_to_fp patterns def : GCNPat < - (f32 (f16_to_fp i32:$src0)), + (f32 (any_f16_to_fp i32:$src0)), (cvt_f32_f16_inst_e64 SRCMODS.NONE, $src0) >; @@ -1122,7 +1128,7 @@ multiclass f16_fp_Pats<Instruction cvt_f16_f32_inst_e64, Instruction cvt_f32_f16 >; def : GCNPat < - (f64 (fpextend f16:$src)), + (f64 (any_fpextend f16:$src)), (V_CVT_F64_F32_e32 (cvt_f32_f16_inst_e64 SRCMODS.NONE, $src)) >; @@ -1151,6 +1157,13 @@ multiclass f16_fp_Pats<Instruction cvt_f16_f32_inst_e64, Instruction cvt_f32_f16 (f16 (uint_to_fp i32:$src)), (cvt_f16_f32_inst_e64 SRCMODS.NONE, (V_CVT_F32_U32_e32 VSrc_b32:$src)) >; + + // This is only used on targets without half support + // TODO: Introduce strict variant of AMDGPUfp_to_f16 and share custom lowering + def : GCNPat < + (i32 (strict_fp_to_f16 (f32 (VOP3Mods f32:$src0, i32:$src0_modifiers)))), + (cvt_f16_f32_inst_e64 $src0_modifiers, f32:$src0) + >; } let SubtargetPredicate = NotHasTrue16BitInsts in @@ -1515,6 +1528,23 @@ def : BitConvert <v2f16, f32, SReg_32>; def : BitConvert <f32, v2f16, SReg_32>; def : BitConvert <v2i16, f32, SReg_32>; def : BitConvert <f32, v2i16, SReg_32>; +def : BitConvert <v2bf16, i32, SReg_32>; +def : BitConvert <i32, v2bf16, SReg_32>; +def : BitConvert <v2bf16, i32, VGPR_32>; +def : BitConvert <i32, v2bf16, VGPR_32>; +def : BitConvert <v2bf16, v2i16, SReg_32>; +def : BitConvert <v2i16, v2bf16, SReg_32>; +def : BitConvert <v2bf16, v2i16, VGPR_32>; +def : BitConvert <v2i16, v2bf16, VGPR_32>; +def : BitConvert <v2bf16, v2f16, SReg_32>; +def : BitConvert <v2f16, v2bf16, SReg_32>; +def : BitConvert <v2bf16, v2f16, VGPR_32>; +def : BitConvert <v2f16, v2bf16, VGPR_32>; +def : BitConvert <f32, v2bf16, VGPR_32>; +def : BitConvert <v2bf16, f32, VGPR_32>; +def : BitConvert <f32, v2bf16, SReg_32>; +def : BitConvert <v2bf16, f32, SReg_32>; + // 64-bit bitcast def : BitConvert <i64, f64, VReg_64>; @@ -1531,6 +1561,19 @@ def : BitConvert <f64, v2i32, VReg_64>; def : BitConvert <v2i32, f64, VReg_64>; def : BitConvert <v4i16, v4f16, VReg_64>; def : BitConvert <v4f16, v4i16, VReg_64>; +def : BitConvert <v4bf16, v2i32, VReg_64>; +def : BitConvert <v2i32, v4bf16, VReg_64>; +def : BitConvert <v4bf16, i64, VReg_64>; +def : BitConvert <i64, v4bf16, VReg_64>; +def : BitConvert <v4bf16, v4i16, VReg_64>; +def : BitConvert <v4i16, v4bf16, VReg_64>; +def : BitConvert <v4bf16, v4f16, VReg_64>; +def : BitConvert <v4f16, v4bf16, VReg_64>; +def : BitConvert <v4bf16, v2f32, VReg_64>; +def : BitConvert <v2f32, v4bf16, VReg_64>; +def : BitConvert <v4bf16, f64, VReg_64>; +def : BitConvert <f64, v4bf16, VReg_64>; + // FIXME: Make SGPR def : BitConvert <v2i32, v4f16, VReg_64>; @@ -1590,6 +1633,37 @@ def : BitConvert <v2f64, v8i16, SReg_128>; def : BitConvert <v2i64, v8f16, SReg_128>; def : BitConvert <v2f64, v8f16, SReg_128>; +def : BitConvert <v4i32, v8bf16, SReg_128>; +def : BitConvert <v8bf16, v4i32, SReg_128>; +def : BitConvert <v4i32, v8bf16, VReg_128>; +def : BitConvert <v8bf16, v4i32, VReg_128>; + +def : BitConvert <v4f32, v8bf16, SReg_128>; +def : BitConvert <v8bf16, v4f32, SReg_128>; +def : BitConvert <v4f32, v8bf16, VReg_128>; +def : BitConvert <v8bf16, v4f32, VReg_128>; + +def : BitConvert <v8i16, v8bf16, SReg_128>; +def : BitConvert <v8bf16, v8i16, SReg_128>; +def : BitConvert <v8i16, v8bf16, VReg_128>; +def : BitConvert <v8bf16, v8i16, VReg_128>; + +def : BitConvert <v8f16, v8bf16, SReg_128>; +def : BitConvert <v8bf16, v8f16, SReg_128>; +def : BitConvert <v8f16, v8bf16, VReg_128>; +def : BitConvert <v8bf16, v8f16, VReg_128>; + +def : BitConvert <v2f64, v8bf16, SReg_128>; +def : BitConvert <v8bf16, v2f64, SReg_128>; +def : BitConvert <v2f64, v8bf16, VReg_128>; +def : BitConvert <v8bf16, v2f64, VReg_128>; + +def : BitConvert <v2i64, v8bf16, SReg_128>; +def : BitConvert <v8bf16, v2i64, SReg_128>; +def : BitConvert <v2i64, v8bf16, VReg_128>; +def : BitConvert <v8bf16, v2i64, VReg_128>; + + // 160-bit bitcast def : BitConvert <v5i32, v5f32, SReg_160>; def : BitConvert <v5f32, v5i32, SReg_160>; @@ -1654,6 +1728,31 @@ def : BitConvert <v4i64, v16i16, VReg_256>; def : BitConvert <v4f64, v16f16, VReg_256>; def : BitConvert <v4f64, v16i16, VReg_256>; + +def : BitConvert <v8i32, v16bf16, VReg_256>; +def : BitConvert <v16bf16, v8i32, VReg_256>; +def : BitConvert <v8f32, v16bf16, VReg_256>; +def : BitConvert <v16bf16, v8f32, VReg_256>; +def : BitConvert <v4i64, v16bf16, VReg_256>; +def : BitConvert <v16bf16, v4i64, VReg_256>; +def : BitConvert <v4f64, v16bf16, VReg_256>; +def : BitConvert <v16bf16, v4f64, VReg_256>; + + + +def : BitConvert <v16i16, v16bf16, SReg_256>; +def : BitConvert <v16bf16, v16i16, SReg_256>; +def : BitConvert <v16i16, v16bf16, VReg_256>; +def : BitConvert <v16bf16, v16i16, VReg_256>; + +def : BitConvert <v16f16, v16bf16, SReg_256>; +def : BitConvert <v16bf16, v16f16, SReg_256>; +def : BitConvert <v16f16, v16bf16, VReg_256>; +def : BitConvert <v16bf16, v16f16, VReg_256>; + + + + // 288-bit bitcast def : BitConvert <v9i32, v9f32, SReg_288>; def : BitConvert <v9f32, v9i32, SReg_288>; @@ -1702,6 +1801,38 @@ def : BitConvert <v8f64, v16f32, VReg_512>; def : BitConvert <v16f32, v8i64, VReg_512>; def : BitConvert <v16f32, v8f64, VReg_512>; + + +def : BitConvert <v32bf16, v32i16, VReg_512>; +def : BitConvert <v32i16, v32bf16, VReg_512>; +def : BitConvert <v32bf16, v32i16, SReg_512>; +def : BitConvert <v32i16, v32bf16, SReg_512>; + +def : BitConvert <v32bf16, v32f16, VReg_512>; +def : BitConvert <v32f16, v32bf16, VReg_512>; +def : BitConvert <v32bf16, v32f16, SReg_512>; +def : BitConvert <v32f16, v32bf16, SReg_512>; + +def : BitConvert <v32bf16, v16i32, VReg_512>; +def : BitConvert <v16i32, v32bf16, VReg_512>; +def : BitConvert <v32bf16, v16i32, SReg_512>; +def : BitConvert <v16i32, v32bf16, SReg_512>; + +def : BitConvert <v32bf16, v16f32, VReg_512>; +def : BitConvert <v16f32, v32bf16, VReg_512>; +def : BitConvert <v32bf16, v16f32, SReg_512>; +def : BitConvert <v16f32, v32bf16, SReg_512>; + +def : BitConvert <v32bf16, v8f64, VReg_512>; +def : BitConvert <v8f64, v32bf16, VReg_512>; +def : BitConvert <v32bf16, v8f64, SReg_512>; +def : BitConvert <v8f64, v32bf16, SReg_512>; + +def : BitConvert <v32bf16, v8i64, VReg_512>; +def : BitConvert <v8i64, v32bf16, VReg_512>; +def : BitConvert <v32bf16, v8i64, SReg_512>; +def : BitConvert <v8i64, v32bf16, SReg_512>; + // 1024-bit bitcast def : BitConvert <v32i32, v32f32, VReg_1024>; def : BitConvert <v32f32, v32i32, VReg_1024>; @@ -1958,19 +2089,21 @@ def : GCNPat < let SubtargetPredicate = HasPackedFP32Ops; } +foreach fp16vt = [f16, bf16] in { + def : GCNPat < - (fcopysign f16:$src0, f16:$src1), + (fcopysign fp16vt:$src0, fp16vt:$src1), (V_BFI_B32_e64 (S_MOV_B32 (i32 0x00007fff)), $src0, $src1) >; def : GCNPat < - (fcopysign f32:$src0, f16:$src1), + (fcopysign f32:$src0, fp16vt:$src1), (V_BFI_B32_e64 (S_MOV_B32 (i32 0x7fffffff)), $src0, (V_LSHLREV_B32_e64 (i32 16), $src1)) >; def : GCNPat < - (fcopysign f64:$src0, f16:$src1), + (fcopysign f64:$src0, fp16vt:$src1), (REG_SEQUENCE SReg_64, (i32 (EXTRACT_SUBREG $src0, sub0)), sub0, (V_BFI_B32_e64 (S_MOV_B32 (i32 0x7fffffff)), (i32 (EXTRACT_SUBREG $src0, sub1)), @@ -1978,16 +2111,17 @@ def : GCNPat < >; def : GCNPat < - (fcopysign f16:$src0, f32:$src1), + (fcopysign fp16vt:$src0, f32:$src1), (V_BFI_B32_e64 (S_MOV_B32 (i32 0x00007fff)), $src0, (V_LSHRREV_B32_e64 (i32 16), $src1)) >; def : GCNPat < - (fcopysign f16:$src0, f64:$src1), + (fcopysign fp16vt:$src0, f64:$src1), (V_BFI_B32_e64 (S_MOV_B32 (i32 0x00007fff)), $src0, (V_LSHRREV_B32_e64 (i32 16), (EXTRACT_SUBREG $src1, sub1))) >; +} // End foreach fp16vt = [f16, bf16] /********** ================== **********/ /********** Immediate Patterns **********/ @@ -2026,6 +2160,11 @@ def : GCNPat < (V_MOV_B32_e32 (f16 (bitcast_fpimm_to_i32 $imm))) >; +def : GCNPat < + (VGPRImm<(bf16 fpimm)>:$imm), + (V_MOV_B32_e32 (bf16 (bitcast_fpimm_to_i32 $imm))) +>; + // V_MOV_B64_PSEUDO and S_MOV_B64_IMM_PSEUDO can be used with any 64-bit // immediate and wil be expanded as needed, but we will only use these patterns // for values which can be encoded. @@ -2060,6 +2199,11 @@ def : GCNPat < >; def : GCNPat < + (bf16 fpimm:$imm), + (S_MOV_B32 (i32 (bitcast_fpimm_to_i32 $imm))) +>; + +def : GCNPat < (p5 frameindex:$fi), (V_MOV_B32_e32 (p5 (frameindex_to_targetframeindex $fi))) >; @@ -3741,6 +3885,18 @@ def G_AMDGPU_S_BUFFER_LOAD : AMDGPUGenericInstruction { let mayStore = 0; } +def G_AMDGPU_S_MUL_U64_U32 : AMDGPUGenericInstruction { + let OutOperandList = (outs type0:$dst); + let InOperandList = (ins type0:$src0, type0:$src1); + let hasSideEffects = 0; +} + +def G_AMDGPU_S_MUL_I64_I32 : AMDGPUGenericInstruction { + let OutOperandList = (outs type0:$dst); + let InOperandList = (ins type0:$src0, type0:$src1); + let hasSideEffects = 0; +} + // This is equivalent to the G_INTRINSIC*, but the operands may have // been legalized depending on the subtarget requirements. def G_AMDGPU_INTRIN_IMAGE_LOAD : AMDGPUGenericInstruction { diff --git a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp index 10ec54d3317f..6d749ad1ad24 100644 --- a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp +++ b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp @@ -578,6 +578,14 @@ public: bool IsNonTemporal) const override; }; +class SIGfx12CacheControl : public SIGfx11CacheControl { +public: + SIGfx12CacheControl(const GCNSubtarget &ST) : SIGfx11CacheControl(ST) {} + + bool insertAcquire(MachineBasicBlock::iterator &MI, SIAtomicScope Scope, + SIAtomicAddrSpace AddrSpace, Position Pos) const override; +}; + class SIMemoryLegalizer final : public MachineFunctionPass { private: @@ -857,7 +865,9 @@ std::unique_ptr<SICacheControl> SICacheControl::create(const GCNSubtarget &ST) { return std::make_unique<SIGfx7CacheControl>(ST); if (Generation < AMDGPUSubtarget::GFX11) return std::make_unique<SIGfx10CacheControl>(ST); - return std::make_unique<SIGfx11CacheControl>(ST); + if (Generation < AMDGPUSubtarget::GFX12) + return std::make_unique<SIGfx11CacheControl>(ST); + return std::make_unique<SIGfx12CacheControl>(ST); } bool SIGfx6CacheControl::enableLoadCacheBypass( @@ -1423,7 +1433,7 @@ bool SIGfx90ACacheControl::insertRelease(MachineBasicBlock::iterator &MI, bool Changed = false; MachineBasicBlock &MBB = *MI->getParent(); - DebugLoc DL = MI->getDebugLoc(); + const DebugLoc &DL = MI->getDebugLoc(); if (Pos == Position::AFTER) ++MI; @@ -2132,6 +2142,62 @@ bool SIGfx11CacheControl::enableVolatileAndOrNonTemporal( return Changed; } +bool SIGfx12CacheControl::insertAcquire(MachineBasicBlock::iterator &MI, + SIAtomicScope Scope, + SIAtomicAddrSpace AddrSpace, + Position Pos) const { + if (!InsertCacheInv) + return false; + + MachineBasicBlock &MBB = *MI->getParent(); + DebugLoc DL = MI->getDebugLoc(); + + /// The scratch address space does not need the global memory cache + /// to be flushed as all memory operations by the same thread are + /// sequentially consistent, and no other thread can access scratch + /// memory. + + /// Other address spaces do not have a cache. + if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) == SIAtomicAddrSpace::NONE) + return false; + + AMDGPU::CPol::CPol ScopeImm = AMDGPU::CPol::SCOPE_DEV; + switch (Scope) { + case SIAtomicScope::SYSTEM: + ScopeImm = AMDGPU::CPol::SCOPE_SYS; + break; + case SIAtomicScope::AGENT: + ScopeImm = AMDGPU::CPol::SCOPE_DEV; + break; + case SIAtomicScope::WORKGROUP: + // In WGP mode the waves of a work-group can be executing on either CU of + // the WGP. Therefore we need to invalidate the L0 which is per CU. + // Otherwise in CU mode all waves of a work-group are on the same CU, and so + // the L0 does not need to be invalidated. + if (ST.isCuModeEnabled()) + return false; + + ScopeImm = AMDGPU::CPol::SCOPE_SE; + break; + case SIAtomicScope::WAVEFRONT: + case SIAtomicScope::SINGLETHREAD: + // No cache to invalidate. + return false; + default: + llvm_unreachable("Unsupported synchronization scope"); + } + + if (Pos == Position::AFTER) + ++MI; + + BuildMI(MBB, MI, DL, TII->get(AMDGPU::GLOBAL_INV)).addImm(ScopeImm); + + if (Pos == Position::AFTER) + --MI; + + return true; +} + bool SIMemoryLegalizer::removeAtomicPseudoMIs() { if (AtomicPseudoMIs.empty()) return false; diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td index c94b894c5841..f42af89cf5e6 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td @@ -916,7 +916,7 @@ defm "" : SRegClass<11, [v11i32, v11f32], SGPR_352Regs, TTMP_352Regs>; defm "" : SRegClass<12, [v12i32, v12f32], SGPR_384Regs, TTMP_384Regs>; let GlobalPriority = true in { -defm "" : SRegClass<16, [v16i32, v16f32, v8i64, v8f64, v32i16, v32f16], SGPR_512Regs, TTMP_512Regs>; +defm "" : SRegClass<16, [v16i32, v16f32, v8i64, v8f64, v32i16, v32f16, v32bf16], SGPR_512Regs, TTMP_512Regs>; defm "" : SRegClass<32, [v32i32, v32f32, v16i64, v16f64], SGPR_1024Regs>; } @@ -970,7 +970,7 @@ defm VReg_352 : VRegClass<11, [v11i32, v11f32], (add VGPR_352)>; defm VReg_384 : VRegClass<12, [v12i32, v12f32], (add VGPR_384)>; let GlobalPriority = true in { -defm VReg_512 : VRegClass<16, [v16i32, v16f32, v8i64, v8f64, v32i16, v32f16], (add VGPR_512)>; +defm VReg_512 : VRegClass<16, [v16i32, v16f32, v8i64, v8f64, v32i16, v32f16, v32bf16], (add VGPR_512)>; defm VReg_1024 : VRegClass<32, [v32i32, v32f32, v16i64, v16f64], (add VGPR_1024)>; } @@ -1152,11 +1152,11 @@ class RegOrF32 <string RegisterClass, string OperandTypePrefix> class RegOrV2B16 <string RegisterClass, string OperandTypePrefix> : RegOrImmOperand <RegisterClass, OperandTypePrefix # "_V2INT16", - !subst("_v2b16", "V2B16", NAME), "_Imm16">; + !subst("_v2b16", "V2B16", NAME), "_ImmV2I16">; class RegOrV2F16 <string RegisterClass, string OperandTypePrefix> : RegOrImmOperand <RegisterClass, OperandTypePrefix # "_V2FP16", - !subst("_v2f16", "V2F16", NAME), "_Imm16">; + !subst("_v2f16", "V2F16", NAME), "_ImmV2F16">; class RegOrF64 <string RegisterClass, string OperandTypePrefix> : RegOrImmOperand <RegisterClass, OperandTypePrefix # "_FP64", diff --git a/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp b/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp index 59d6ccf513bb..5e6c34992930 100644 --- a/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp +++ b/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp @@ -553,7 +553,9 @@ char SIWholeQuadMode::scanInstructions(MachineFunction &MF, } continue; } else if (Opcode == AMDGPU::LDS_PARAM_LOAD || - Opcode == AMDGPU::LDS_DIRECT_LOAD) { + Opcode == AMDGPU::DS_PARAM_LOAD || + Opcode == AMDGPU::LDS_DIRECT_LOAD || + Opcode == AMDGPU::DS_DIRECT_LOAD) { // Mark these STRICTWQM, but only for the instruction, not its operands. // This avoid unnecessarily marking M0 as requiring WQM. InstrInfo &II = Instructions[&MI]; diff --git a/llvm/lib/Target/AMDGPU/SMInstructions.td b/llvm/lib/Target/AMDGPU/SMInstructions.td index 3297847b0360..fc29ce8d71f2 100644 --- a/llvm/lib/Target/AMDGPU/SMInstructions.td +++ b/llvm/lib/Target/AMDGPU/SMInstructions.td @@ -29,6 +29,7 @@ class SM_Pseudo <string opName, dag outs, dag ins, string asmOps, list<dag> patt let mayStore = 0; let mayLoad = 1; let hasSideEffects = 0; + let maybeAtomic = 0; let UseNamedOperandTable = 1; let SchedRW = [WriteSMEM]; @@ -305,6 +306,10 @@ let SubtargetPredicate = HasScalarDwordx3Loads in defm S_LOAD_DWORDX4 : SM_Pseudo_Loads <SReg_64, SReg_128>; defm S_LOAD_DWORDX8 : SM_Pseudo_Loads <SReg_64, SReg_256>; defm S_LOAD_DWORDX16 : SM_Pseudo_Loads <SReg_64, SReg_512>; +defm S_LOAD_I8 : SM_Pseudo_Loads <SReg_64, SReg_32_XM0_XEXEC>; +defm S_LOAD_U8 : SM_Pseudo_Loads <SReg_64, SReg_32_XM0_XEXEC>; +defm S_LOAD_I16 : SM_Pseudo_Loads <SReg_64, SReg_32_XM0_XEXEC>; +defm S_LOAD_U16 : SM_Pseudo_Loads <SReg_64, SReg_32_XM0_XEXEC>; let is_buffer = 1 in { defm S_BUFFER_LOAD_DWORD : SM_Pseudo_Loads <SReg_128, SReg_32_XM0_XEXEC>; @@ -316,6 +321,10 @@ let SubtargetPredicate = HasScalarDwordx3Loads in defm S_BUFFER_LOAD_DWORDX4 : SM_Pseudo_Loads <SReg_128, SReg_128>; defm S_BUFFER_LOAD_DWORDX8 : SM_Pseudo_Loads <SReg_128, SReg_256>; defm S_BUFFER_LOAD_DWORDX16 : SM_Pseudo_Loads <SReg_128, SReg_512>; +defm S_BUFFER_LOAD_I8 : SM_Pseudo_Loads <SReg_128, SReg_32_XM0_XEXEC>; +defm S_BUFFER_LOAD_U8 : SM_Pseudo_Loads <SReg_128, SReg_32_XM0_XEXEC>; +defm S_BUFFER_LOAD_I16 : SM_Pseudo_Loads <SReg_128, SReg_32_XM0_XEXEC>; +defm S_BUFFER_LOAD_U16 : SM_Pseudo_Loads <SReg_128, SReg_32_XM0_XEXEC>; } let SubtargetPredicate = HasScalarStores in { @@ -977,20 +986,35 @@ def : GCNPat < } } // let OtherPredicates = [HasShaderCyclesRegister] -multiclass SMPrefetchPat<string type, int cache_type> { +def i32imm_zero : TImmLeaf <i32, [{ + return Imm == 0; +}]>; + +def i32imm_one : TImmLeaf <i32, [{ + return Imm == 1; +}]>; + +multiclass SMPrefetchPat<string type, TImmLeaf cache_type> { def : GCNPat < - (smrd_prefetch (SMRDImm i64:$sbase, i32:$offset), timm, timm, (i32 cache_type)), + (smrd_prefetch (SMRDImm i64:$sbase, i32:$offset), timm, timm, cache_type), (!cast<SM_Prefetch_Pseudo>("S_PREFETCH_"#type) $sbase, $offset, (i32 SGPR_NULL), (i8 0)) >; def : GCNPat < - (smrd_prefetch (i64 SReg_64:$sbase), timm, timm, (i32 cache_type)), + (smrd_prefetch (i64 SReg_64:$sbase), timm, timm, cache_type), (!cast<SM_Prefetch_Pseudo>("S_PREFETCH_"#type) $sbase, 0, (i32 SGPR_NULL), (i8 0)) >; + + def : GCNPat < + (smrd_prefetch (i32 SReg_32:$sbase), timm, timm, cache_type), + (!cast<SM_Prefetch_Pseudo>("S_PREFETCH_"#type) + (i64 (REG_SEQUENCE SReg_64, $sbase, sub0, (i32 (S_MOV_B32 (i32 0))), sub1)), + 0, (i32 SGPR_NULL), (i8 0)) + >; } -defm : SMPrefetchPat<"INST", 0>; -defm : SMPrefetchPat<"DATA", 1>; +defm : SMPrefetchPat<"INST", i32imm_zero>; +defm : SMPrefetchPat<"DATA", i32imm_one>; //===----------------------------------------------------------------------===// // GFX10. @@ -1321,6 +1345,11 @@ defm S_LOAD_B128 : SM_Real_Loads_gfx12<0x02, "S_LOAD_DWORDX4">; defm S_LOAD_B256 : SM_Real_Loads_gfx12<0x03, "S_LOAD_DWORDX8">; defm S_LOAD_B512 : SM_Real_Loads_gfx12<0x04, "S_LOAD_DWORDX16">; +defm S_LOAD_I8 : SM_Real_Loads_gfx12<0x08>; +defm S_LOAD_U8 : SM_Real_Loads_gfx12<0x09>; +defm S_LOAD_I16 : SM_Real_Loads_gfx12<0x0a>; +defm S_LOAD_U16 : SM_Real_Loads_gfx12<0x0b>; + defm S_BUFFER_LOAD_B32 : SM_Real_Loads_gfx12<0x10, "S_BUFFER_LOAD_DWORD">; defm S_BUFFER_LOAD_B64 : SM_Real_Loads_gfx12<0x11, "S_BUFFER_LOAD_DWORDX2">; defm S_BUFFER_LOAD_B96 : SM_Real_Loads_gfx12<0x15, "S_BUFFER_LOAD_DWORDX3">; @@ -1328,6 +1357,11 @@ defm S_BUFFER_LOAD_B128 : SM_Real_Loads_gfx12<0x12, "S_BUFFER_LOAD_DWORDX4">; defm S_BUFFER_LOAD_B256 : SM_Real_Loads_gfx12<0x13, "S_BUFFER_LOAD_DWORDX8">; defm S_BUFFER_LOAD_B512 : SM_Real_Loads_gfx12<0x14, "S_BUFFER_LOAD_DWORDX16">; +defm S_BUFFER_LOAD_I8 : SM_Real_Loads_gfx12<0x18>; +defm S_BUFFER_LOAD_U8 : SM_Real_Loads_gfx12<0x19>; +defm S_BUFFER_LOAD_I16 : SM_Real_Loads_gfx12<0x1a>; +defm S_BUFFER_LOAD_U16 : SM_Real_Loads_gfx12<0x1b>; + def S_DCACHE_INV_gfx12 : SMEM_Real_gfx12<0x021, S_DCACHE_INV>; def S_PREFETCH_INST_gfx12 : SMEM_Real_Prefetch_gfx12<0x24, S_PREFETCH_INST>; diff --git a/llvm/lib/Target/AMDGPU/SOPInstructions.td b/llvm/lib/Target/AMDGPU/SOPInstructions.td index c9687ac368d3..46fa3d57a21c 100644 --- a/llvm/lib/Target/AMDGPU/SOPInstructions.td +++ b/llvm/lib/Target/AMDGPU/SOPInstructions.td @@ -673,6 +673,16 @@ let SubtargetPredicate = isGFX12Plus in { let isCommutable = 1; } + // The higher 32-bits of the inputs contain the sign extension bits. + def S_MUL_I64_I32_PSEUDO : SPseudoInstSI < + (outs SReg_64:$sdst), (ins SSrc_b64:$src0, SSrc_b64:$src1) + >; + + // The higher 32-bits of the inputs are zero. + def S_MUL_U64_U32_PSEUDO : SPseudoInstSI < + (outs SReg_64:$sdst), (ins SSrc_b64:$src0, SSrc_b64:$src1) + >; + } // End SubtargetPredicate = isGFX12Plus let Uses = [SCC] in { @@ -1186,14 +1196,12 @@ let SubtargetPredicate = isGFX10Plus in { let SubtargetPredicate = isGFX10GFX11 in { def S_SUBVECTOR_LOOP_BEGIN : SOPK_32_BR<"s_subvector_loop_begin">; def S_SUBVECTOR_LOOP_END : SOPK_32_BR<"s_subvector_loop_end">; -} // End SubtargetPredicate = isGFX10GFX11 -let SubtargetPredicate = isGFX10Plus in { def S_WAITCNT_VSCNT : SOPK_WAITCNT<"s_waitcnt_vscnt">; def S_WAITCNT_VMCNT : SOPK_WAITCNT<"s_waitcnt_vmcnt">; def S_WAITCNT_EXPCNT : SOPK_WAITCNT<"s_waitcnt_expcnt">; def S_WAITCNT_LGKMCNT : SOPK_WAITCNT<"s_waitcnt_lgkmcnt">; -} // End SubtargetPredicate = isGFX10Plus +} // End SubtargetPredicate = isGFX10GFX11 //===----------------------------------------------------------------------===// // SOPC Instructions @@ -1702,6 +1710,27 @@ let SubtargetPredicate = HasVGPRSingleUseHintInsts in { SOPP_Pseudo<"s_singleuse_vdst", (ins s16imm:$simm16), "$simm16">; } // End SubtargetPredicate = HasVGPRSingeUseHintInsts +let SubtargetPredicate = isGFX12Plus, hasSideEffects = 1 in { + def S_WAIT_LOADCNT : + SOPP_Pseudo<"s_wait_loadcnt", (ins s16imm:$simm16), "$simm16">; + def S_WAIT_LOADCNT_DSCNT : + SOPP_Pseudo<"s_wait_loadcnt_dscnt", (ins s16imm:$simm16), "$simm16">; + def S_WAIT_STORECNT : + SOPP_Pseudo<"s_wait_storecnt", (ins s16imm:$simm16), "$simm16">; + def S_WAIT_STORECNT_DSCNT : + SOPP_Pseudo<"s_wait_storecnt_dscnt", (ins s16imm:$simm16), "$simm16">; + def S_WAIT_SAMPLECNT : + SOPP_Pseudo<"s_wait_samplecnt", (ins s16imm:$simm16), "$simm16">; + def S_WAIT_BVHCNT : + SOPP_Pseudo<"s_wait_bvhcnt", (ins s16imm:$simm16), "$simm16">; + def S_WAIT_EXPCNT : + SOPP_Pseudo<"s_wait_expcnt", (ins s16imm:$simm16), "$simm16">; + def S_WAIT_DSCNT : + SOPP_Pseudo<"s_wait_dscnt", (ins s16imm:$simm16), "$simm16">; + def S_WAIT_KMCNT : + SOPP_Pseudo<"s_wait_kmcnt", (ins s16imm:$simm16), "$simm16">; +} // End SubtargetPredicate = isGFX12Plus, hasSideEffects = 1 + //===----------------------------------------------------------------------===// // SOP1 Patterns //===----------------------------------------------------------------------===// @@ -2411,10 +2440,10 @@ defm S_SETREG_IMM32_B32 : SOPK_Real64_gfx11_gfx12<0x013>; defm S_CALL_B64 : SOPK_Real32_gfx11_gfx12<0x014>; defm S_SUBVECTOR_LOOP_BEGIN : SOPK_Real32_gfx11<0x016>; defm S_SUBVECTOR_LOOP_END : SOPK_Real32_gfx11<0x017>; -defm S_WAITCNT_VSCNT : SOPK_Real32_gfx11_gfx12<0x018>; -defm S_WAITCNT_VMCNT : SOPK_Real32_gfx11_gfx12<0x019>; -defm S_WAITCNT_EXPCNT : SOPK_Real32_gfx11_gfx12<0x01a>; -defm S_WAITCNT_LGKMCNT : SOPK_Real32_gfx11_gfx12<0x01b>; +defm S_WAITCNT_VSCNT : SOPK_Real32_gfx11<0x018>; +defm S_WAITCNT_VMCNT : SOPK_Real32_gfx11<0x019>; +defm S_WAITCNT_EXPCNT : SOPK_Real32_gfx11<0x01a>; +defm S_WAITCNT_LGKMCNT : SOPK_Real32_gfx11<0x01b>; //===----------------------------------------------------------------------===// // SOPK - GFX10. @@ -2516,6 +2545,15 @@ multiclass SOPP_Real_32_Renamed_gfx12<bits<7> op, SOPP_Pseudo backing_pseudo, st defm S_WAIT_ALU : SOPP_Real_32_Renamed_gfx12<0x008, S_WAITCNT_DEPCTR, "s_wait_alu">; defm S_BARRIER_WAIT : SOPP_Real_32_gfx12<0x014>; defm S_BARRIER_LEAVE : SOPP_Real_32_gfx12<0x015>; +defm S_WAIT_LOADCNT : SOPP_Real_32_gfx12<0x040>; +defm S_WAIT_STORECNT : SOPP_Real_32_gfx12<0x041>; +defm S_WAIT_SAMPLECNT : SOPP_Real_32_gfx12<0x042>; +defm S_WAIT_BVHCNT : SOPP_Real_32_gfx12<0x043>; +defm S_WAIT_EXPCNT : SOPP_Real_32_gfx12<0x044>; +defm S_WAIT_DSCNT : SOPP_Real_32_gfx12<0x046>; +defm S_WAIT_KMCNT : SOPP_Real_32_gfx12<0x047>; +defm S_WAIT_LOADCNT_DSCNT : SOPP_Real_32_gfx12<0x048>; +defm S_WAIT_STORECNT_DSCNT : SOPP_Real_32_gfx12<0x049>; //===----------------------------------------------------------------------===// // SOPP - GFX11, GFX12. diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index a91d77175234..26ba2575ff34 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -2506,53 +2506,95 @@ bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi) { Val == 0x3118; // 1/2pi } -bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi) { - assert(HasInv2Pi); - - if (isInt<16>(Literal) || isUInt<16>(Literal)) { - int16_t Trunc = static_cast<int16_t>(Literal); - return AMDGPU::isInlinableLiteral16(Trunc, HasInv2Pi); +std::optional<unsigned> getInlineEncodingV216(bool IsFloat, uint32_t Literal) { + // Unfortunately, the Instruction Set Architecture Reference Guide is + // misleading about how the inline operands work for (packed) 16-bit + // instructions. In a nutshell, the actual HW behavior is: + // + // - integer encodings (-16 .. 64) are always produced as sign-extended + // 32-bit values + // - float encodings are produced as: + // - for F16 instructions: corresponding half-precision float values in + // the LSBs, 0 in the MSBs + // - for UI16 instructions: corresponding single-precision float value + int32_t Signed = static_cast<int32_t>(Literal); + if (Signed >= 0 && Signed <= 64) + return 128 + Signed; + + if (Signed >= -16 && Signed <= -1) + return 192 + std::abs(Signed); + + if (IsFloat) { + // clang-format off + switch (Literal) { + case 0x3800: return 240; // 0.5 + case 0xB800: return 241; // -0.5 + case 0x3C00: return 242; // 1.0 + case 0xBC00: return 243; // -1.0 + case 0x4000: return 244; // 2.0 + case 0xC000: return 245; // -2.0 + case 0x4400: return 246; // 4.0 + case 0xC400: return 247; // -4.0 + case 0x3118: return 248; // 1.0 / (2.0 * pi) + default: break; + } + // clang-format on + } else { + // clang-format off + switch (Literal) { + case 0x3F000000: return 240; // 0.5 + case 0xBF000000: return 241; // -0.5 + case 0x3F800000: return 242; // 1.0 + case 0xBF800000: return 243; // -1.0 + case 0x40000000: return 244; // 2.0 + case 0xC0000000: return 245; // -2.0 + case 0x40800000: return 246; // 4.0 + case 0xC0800000: return 247; // -4.0 + case 0x3E22F983: return 248; // 1.0 / (2.0 * pi) + default: break; + } + // clang-format on } - if (!(Literal & 0xffff)) - return AMDGPU::isInlinableLiteral16(Literal >> 16, HasInv2Pi); - int16_t Lo16 = static_cast<int16_t>(Literal); - int16_t Hi16 = static_cast<int16_t>(Literal >> 16); - return Lo16 == Hi16 && isInlinableLiteral16(Lo16, HasInv2Pi); + return {}; } -bool isInlinableIntLiteralV216(int32_t Literal) { - int16_t Lo16 = static_cast<int16_t>(Literal); - if (isInt<16>(Literal) || isUInt<16>(Literal)) - return isInlinableIntLiteral(Lo16); +// Encoding of the literal as an inline constant for a V_PK_*_IU16 instruction +// or nullopt. +std::optional<unsigned> getInlineEncodingV2I16(uint32_t Literal) { + return getInlineEncodingV216(false, Literal); +} - int16_t Hi16 = static_cast<int16_t>(Literal >> 16); - if (!(Literal & 0xffff)) - return isInlinableIntLiteral(Hi16); - return Lo16 == Hi16 && isInlinableIntLiteral(Lo16); +// Encoding of the literal as an inline constant for a V_PK_*_F16 instruction +// or nullopt. +std::optional<unsigned> getInlineEncodingV2F16(uint32_t Literal) { + return getInlineEncodingV216(true, Literal); } -bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi, uint8_t OpType) { +// Whether the given literal can be inlined for a V_PK_* instruction. +bool isInlinableLiteralV216(uint32_t Literal, uint8_t OpType) { switch (OpType) { + case AMDGPU::OPERAND_REG_IMM_V2INT16: + case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: + case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: + return getInlineEncodingV216(false, Literal).has_value(); case AMDGPU::OPERAND_REG_IMM_V2FP16: case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: - return isInlinableLiteralV216(Literal, HasInv2Pi); + case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: + return getInlineEncodingV216(true, Literal).has_value(); default: - return isInlinableIntLiteralV216(Literal); + llvm_unreachable("bad packed operand type"); } } -bool isFoldableLiteralV216(int32_t Literal, bool HasInv2Pi) { - assert(HasInv2Pi); - - int16_t Lo16 = static_cast<int16_t>(Literal); - if (isInt<16>(Literal) || isUInt<16>(Literal)) - return true; +// Whether the given literal can be inlined for a V_PK_*_IU16 instruction. +bool isInlinableLiteralV2I16(uint32_t Literal) { + return getInlineEncodingV2I16(Literal).has_value(); +} - int16_t Hi16 = static_cast<int16_t>(Literal >> 16); - if (!(Literal & 0xffff)) - return true; - return Lo16 == Hi16; +// Whether the given literal can be inlined for a V_PK_*_F16 instruction. +bool isInlinableLiteralV2F16(uint32_t Literal) { + return getInlineEncodingV2F16(Literal).has_value(); } bool isValid32BitLiteral(uint64_t Val, bool IsFP64) { diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h index 3c9f330cbcde..50c741760d71 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -1291,16 +1291,19 @@ LLVM_READNONE bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi); LLVM_READNONE -bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi); +std::optional<unsigned> getInlineEncodingV2I16(uint32_t Literal); LLVM_READNONE -bool isInlinableIntLiteralV216(int32_t Literal); +std::optional<unsigned> getInlineEncodingV2F16(uint32_t Literal); LLVM_READNONE -bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi, uint8_t OpType); +bool isInlinableLiteralV216(uint32_t Literal, uint8_t OpType); LLVM_READNONE -bool isFoldableLiteralV216(int32_t Literal, bool HasInv2Pi); +bool isInlinableLiteralV2I16(uint32_t Literal); + +LLVM_READNONE +bool isInlinableLiteralV2F16(uint32_t Literal); LLVM_READNONE bool isValid32BitLiteral(uint64_t Val, bool IsFP64); diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td index 0aa62ea77b11..ecee61daa1c8 100644 --- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td @@ -1300,7 +1300,7 @@ class VOP2_DPP8<bits<6> op, VOP2_Pseudo ps, let OtherPredicates = ps.OtherPredicates; } - + class VOP2_DPP8_Gen<bits<6> op, VOP2_Pseudo ps, GFXGen Gen, VOPProfile p = ps.Pfl> : VOP2_DPP8<op, ps, p> { diff --git a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td index 7f52501b5d90..e9d6f67aee16 100644 --- a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td @@ -125,15 +125,6 @@ defm V_PK_LSHRREV_B16 : VOP3PInst<"v_pk_lshrrev_b16", VOP3P_Profile<VOP_V2I16_V2 let SubtargetPredicate = HasVOP3PInsts in { -// Undo sub x, c -> add x, -c canonicalization since c is more likely -// an inline immediate than -c. -// The constant will be emitted as a mov, and folded later. -// TODO: We could directly encode the immediate now -def : GCNPat< - (add (v2i16 (VOP3PMods v2i16:$src0, i32:$src0_modifiers)), NegSubInlineConstV216:$src1), - (V_PK_SUB_U16 $src0_modifiers, $src0, SRCMODS.OP_SEL_1, NegSubInlineConstV216:$src1) ->; - // Integer operations with clamp bit set. class VOP3PSatPat<SDPatternOperator pat, Instruction inst> : GCNPat< (pat (v2i16 (VOP3PMods v2i16:$src0, i32:$src0_modifiers)), @@ -632,12 +623,12 @@ multiclass MAIInst<string OpName, string P, SDPatternOperator node, // FP32 denorm mode is respected, rounding mode is not. Exceptions are not supported. let Constraints = !if(NoDstOverlap, "@earlyclobber $vdst", "") in { def _e64 : MAIInst<OpName, !cast<VOPProfileMAI>("VOPProfileMAI_" # P), - !if(NoDstOverlap, null_frag, AgprMAIFrag<node>)>, + !if(!or(NoDstOverlap, !eq(node, null_frag)), null_frag, AgprMAIFrag<node>)>, MFMATable<0, NAME # "_e64">; let SubtargetPredicate = isGFX90APlus, Mnemonic = OpName in def _vgprcd_e64 : MAIInst<OpName # "_vgprcd", !cast<VOPProfileMAI>("VOPProfileMAI_" # P # "_VCD"), - !if(NoDstOverlap, null_frag, VgprMAIFrag<node>)>, + !if(!or(NoDstOverlap, !eq(node, null_frag)), null_frag, VgprMAIFrag<node>)>, MFMATable<0, NAME # "_vgprcd_e64">; } @@ -645,12 +636,13 @@ multiclass MAIInst<string OpName, string P, SDPatternOperator node, let Constraints = !if(NoDstOverlap, "$vdst = $src2", ""), isConvertibleToThreeAddress = NoDstOverlap, Mnemonic = OpName in { - def "_mac_e64" : MAIInst<OpName # "_mac", !cast<VOPProfileMAI>("VOPProfileMAI_" # P), AgprMAIFrag<node>>, + def "_mac_e64" : MAIInst<OpName # "_mac", !cast<VOPProfileMAI>("VOPProfileMAI_" # P), + !if(!eq(node, null_frag), null_frag, AgprMAIFrag<node>)>, MFMATable<1, NAME # "_e64">; let SubtargetPredicate = isGFX90APlus in def _mac_vgprcd_e64 : MAIInst<OpName # "_mac_vgprcd", !cast<VOPProfileMAI>("VOPProfileMAI_" # P # "_VCD"), - VgprMAIFrag<node>>, + !if(!eq(node, null_frag), null_frag, VgprMAIFrag<node>)>, MFMATable<1, NAME # "_vgprcd_e64">; } } diff --git a/llvm/lib/Target/ARC/ARCISelDAGToDAG.cpp b/llvm/lib/Target/ARC/ARCISelDAGToDAG.cpp index 28e35f8f2a54..17c2d7bb13b4 100644 --- a/llvm/lib/Target/ARC/ARCISelDAGToDAG.cpp +++ b/llvm/lib/Target/ARC/ARCISelDAGToDAG.cpp @@ -170,7 +170,7 @@ bool ARCDAGToDAGISel::SelectFrameADDR_ri(SDValue Addr, SDValue &Base, void ARCDAGToDAGISel::Select(SDNode *N) { switch (N->getOpcode()) { case ISD::Constant: { - uint64_t CVal = cast<ConstantSDNode>(N)->getZExtValue(); + uint64_t CVal = N->getAsZExtVal(); ReplaceNode(N, CurDAG->getMachineNode( isInt<12>(CVal) ? ARC::MOV_rs12 : ARC::MOV_rlimm, SDLoc(N), MVT::i32, diff --git a/llvm/lib/Target/ARC/ARCISelLowering.cpp b/llvm/lib/Target/ARC/ARCISelLowering.cpp index 2265f5db6737..5dd343d97b80 100644 --- a/llvm/lib/Target/ARC/ARCISelLowering.cpp +++ b/llvm/lib/Target/ARC/ARCISelLowering.cpp @@ -174,6 +174,8 @@ ARCTargetLowering::ARCTargetLowering(const TargetMachine &TM, setOperationAction(ISD::READCYCLECOUNTER, MVT::i32, Legal); setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, isTypeLegal(MVT::i64) ? Legal : Custom); + + setMaxAtomicSizeInBitsSupported(0); } const char *ARCTargetLowering::getTargetNodeName(unsigned Opcode) const { diff --git a/llvm/lib/Target/ARC/ARCTargetMachine.cpp b/llvm/lib/Target/ARC/ARCTargetMachine.cpp index d4ae3255b32a..4f612ae623b9 100644 --- a/llvm/lib/Target/ARC/ARCTargetMachine.cpp +++ b/llvm/lib/Target/ARC/ARCTargetMachine.cpp @@ -57,6 +57,7 @@ public: return getTM<ARCTargetMachine>(); } + void addIRPasses() override; bool addInstSelector() override; void addPreEmitPass() override; void addPreRegAlloc() override; @@ -68,6 +69,12 @@ TargetPassConfig *ARCTargetMachine::createPassConfig(PassManagerBase &PM) { return new ARCPassConfig(*this, PM); } +void ARCPassConfig::addIRPasses() { + addPass(createAtomicExpandPass()); + + TargetPassConfig::addIRPasses(); +} + bool ARCPassConfig::addInstSelector() { addPass(createARCISelDag(getARCTargetMachine(), getOptLevel())); return false; diff --git a/llvm/lib/Target/ARM/ARMFastISel.cpp b/llvm/lib/Target/ARM/ARMFastISel.cpp index 1d6aaeb7433b..cb3a709f7003 100644 --- a/llvm/lib/Target/ARM/ARMFastISel.cpp +++ b/llvm/lib/Target/ARM/ARMFastISel.cpp @@ -747,7 +747,7 @@ bool ARMFastISel::ARMComputeAddress(const Value *Obj, Address &Addr) { unsigned Idx = cast<ConstantInt>(Op)->getZExtValue(); TmpOffset += SL->getElementOffset(Idx); } else { - uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType()); + uint64_t S = GTI.getSequentialElementStride(DL); while (true) { if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) { // Constant-offset addressing. diff --git a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp index adc429b61bbc..e99ee299412a 100644 --- a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -372,7 +372,7 @@ INITIALIZE_PASS(ARMDAGToDAGISel, DEBUG_TYPE, PASS_NAME, false, false) /// operand. If so Imm will receive the 32-bit value. static bool isInt32Immediate(SDNode *N, unsigned &Imm) { if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i32) { - Imm = cast<ConstantSDNode>(N)->getZExtValue(); + Imm = N->getAsZExtVal(); return true; } return false; @@ -1101,8 +1101,7 @@ bool ARMDAGToDAGISel::SelectAddrModePC(SDValue N, if (N.getOpcode() == ARMISD::PIC_ADD && N.hasOneUse()) { Offset = N.getOperand(0); SDValue N1 = N.getOperand(1); - Label = CurDAG->getTargetConstant(cast<ConstantSDNode>(N1)->getZExtValue(), - SDLoc(N), MVT::i32); + Label = CurDAG->getTargetConstant(N1->getAsZExtVal(), SDLoc(N), MVT::i32); return true; } @@ -1942,7 +1941,7 @@ SDValue ARMDAGToDAGISel::GetVLDSTAlign(SDValue Align, const SDLoc &dl, if (!is64BitVector && NumVecs < 3) NumRegs *= 2; - unsigned Alignment = cast<ConstantSDNode>(Align)->getZExtValue(); + unsigned Alignment = Align->getAsZExtVal(); if (Alignment >= 32 && NumRegs == 4) Alignment = 32; else if (Alignment >= 16 && (NumRegs == 2 || NumRegs == 4)) @@ -2428,7 +2427,7 @@ void ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating, unsigned Alignment = 0; if (NumVecs != 3) { - Alignment = cast<ConstantSDNode>(Align)->getZExtValue(); + Alignment = Align->getAsZExtVal(); unsigned NumBytes = NumVecs * VT.getScalarSizeInBits() / 8; if (Alignment > NumBytes) Alignment = NumBytes; @@ -2871,7 +2870,7 @@ void ARMDAGToDAGISel::SelectMVE_VxDUP(SDNode *N, const uint16_t *Opcodes, Ops.push_back(N->getOperand(OpIdx++)); // limit SDValue ImmOp = N->getOperand(OpIdx++); // step - int ImmValue = cast<ConstantSDNode>(ImmOp)->getZExtValue(); + int ImmValue = ImmOp->getAsZExtVal(); Ops.push_back(getI32Imm(ImmValue, Loc)); if (Predicated) @@ -2892,7 +2891,7 @@ void ARMDAGToDAGISel::SelectCDE_CXxD(SDNode *N, uint16_t Opcode, // Convert and append the immediate operand designating the coprocessor. SDValue ImmCorpoc = N->getOperand(OpIdx++); - uint32_t ImmCoprocVal = cast<ConstantSDNode>(ImmCorpoc)->getZExtValue(); + uint32_t ImmCoprocVal = ImmCorpoc->getAsZExtVal(); Ops.push_back(getI32Imm(ImmCoprocVal, Loc)); // For accumulating variants copy the low and high order parts of the @@ -2911,7 +2910,7 @@ void ARMDAGToDAGISel::SelectCDE_CXxD(SDNode *N, uint16_t Opcode, // Convert and append the immediate operand SDValue Imm = N->getOperand(OpIdx); - uint32_t ImmVal = cast<ConstantSDNode>(Imm)->getZExtValue(); + uint32_t ImmVal = Imm->getAsZExtVal(); Ops.push_back(getI32Imm(ImmVal, Loc)); // Accumulating variants are IT-predicable, add predicate operands. @@ -2965,7 +2964,7 @@ void ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool IsIntrinsic, unsigned Alignment = 0; if (NumVecs != 3) { - Alignment = cast<ConstantSDNode>(Align)->getZExtValue(); + Alignment = Align->getAsZExtVal(); unsigned NumBytes = NumVecs * VT.getScalarSizeInBits() / 8; if (Alignment > NumBytes) Alignment = NumBytes; @@ -3697,7 +3696,7 @@ void ARMDAGToDAGISel::Select(SDNode *N) { // Other cases are autogenerated. break; case ISD::Constant: { - unsigned Val = cast<ConstantSDNode>(N)->getZExtValue(); + unsigned Val = N->getAsZExtVal(); // If we can't materialize the constant we need to use a literal pool if (ConstantMaterializationCost(Val, Subtarget) > 2 && !Subtarget->genExecuteOnly()) { @@ -4132,7 +4131,7 @@ void ARMDAGToDAGISel::Select(SDNode *N) { assert(N2.getOpcode() == ISD::Constant); assert(N3.getOpcode() == ISD::Register); - unsigned CC = (unsigned) cast<ConstantSDNode>(N2)->getZExtValue(); + unsigned CC = (unsigned)N2->getAsZExtVal(); if (InGlue.getOpcode() == ARMISD::CMPZ) { if (InGlue.getOperand(0).getOpcode() == ISD::INTRINSIC_W_CHAIN) { @@ -4243,8 +4242,7 @@ void ARMDAGToDAGISel::Select(SDNode *N) { if (SwitchEQNEToPLMI) { SDValue ARMcc = N->getOperand(2); - ARMCC::CondCodes CC = - (ARMCC::CondCodes)cast<ConstantSDNode>(ARMcc)->getZExtValue(); + ARMCC::CondCodes CC = (ARMCC::CondCodes)ARMcc->getAsZExtVal(); switch (CC) { default: llvm_unreachable("CMPZ must be either NE or EQ!"); diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index 9f3bcffc7a99..568085bd0ab3 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -4820,8 +4820,7 @@ SDValue ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, // some tweaks to the heuristics for the previous and->shift transform. // FIXME: Optimize cases where the LHS isn't a shift. if (Subtarget->isThumb1Only() && LHS->getOpcode() == ISD::SHL && - isa<ConstantSDNode>(RHS) && - cast<ConstantSDNode>(RHS)->getZExtValue() == 0x80000000U && + isa<ConstantSDNode>(RHS) && RHS->getAsZExtVal() == 0x80000000U && CC == ISD::SETUGT && isa<ConstantSDNode>(LHS.getOperand(1)) && LHS.getConstantOperandVal(1) < 31) { unsigned ShiftAmt = LHS.getConstantOperandVal(1) + 1; @@ -5533,7 +5532,7 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl); // Choose GE over PL, which vsel does now support - if (cast<ConstantSDNode>(ARMcc)->getZExtValue() == ARMCC::PL) + if (ARMcc->getAsZExtVal() == ARMCC::PL) ARMcc = DAG.getConstant(ARMCC::GE, dl, MVT::i32); return getCMOV(dl, VT, FalseVal, TrueVal, ARMcc, CCR, Cmp, DAG); } @@ -7749,7 +7748,7 @@ static SDValue IsSingleInstrConstant(SDValue N, SelectionDAG &DAG, uint64_t Val; if (!isa<ConstantSDNode>(N)) return SDValue(); - Val = cast<ConstantSDNode>(N)->getZExtValue(); + Val = N->getAsZExtVal(); if (ST->isThumb1Only()) { if (Val <= 255 || ~Val <= 255) @@ -7804,7 +7803,7 @@ static SDValue LowerBUILD_VECTOR_i1(SDValue Op, SelectionDAG &DAG, SDValue V = Op.getOperand(i); if (!isa<ConstantSDNode>(V) && !V.isUndef()) continue; - bool BitSet = V.isUndef() ? false : cast<ConstantSDNode>(V)->getZExtValue(); + bool BitSet = V.isUndef() ? false : V->getAsZExtVal(); if (BitSet) Bits32 |= BoolMask << (i * BitsPerBool); } @@ -9240,7 +9239,7 @@ static SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG, EVT VT = Op.getValueType(); EVT Op1VT = V1.getValueType(); unsigned NumElts = VT.getVectorNumElements(); - unsigned Index = cast<ConstantSDNode>(V2)->getZExtValue(); + unsigned Index = V2->getAsZExtVal(); assert(VT.getScalarSizeInBits() == 1 && "Unexpected custom EXTRACT_SUBVECTOR lowering"); @@ -14618,7 +14617,7 @@ static SDValue PerformORCombineToBFI(SDNode *N, // Case (3): or (and (shl A, #shamt), mask), B => ARMbfi B, A, ~mask // where lsb(mask) == #shamt and masked bits of B are known zero. SDValue ShAmt = N00.getOperand(1); - unsigned ShAmtC = cast<ConstantSDNode>(ShAmt)->getZExtValue(); + unsigned ShAmtC = ShAmt->getAsZExtVal(); unsigned LSB = llvm::countr_zero(Mask); if (ShAmtC != LSB) return SDValue(); @@ -18339,8 +18338,7 @@ ARMTargetLowering::PerformBRCONDCombine(SDNode *N, SelectionDAG &DAG) const { SDValue Chain = N->getOperand(0); SDValue BB = N->getOperand(1); SDValue ARMcc = N->getOperand(2); - ARMCC::CondCodes CC = - (ARMCC::CondCodes)cast<ConstantSDNode>(ARMcc)->getZExtValue(); + ARMCC::CondCodes CC = (ARMCC::CondCodes)ARMcc->getAsZExtVal(); // (brcond Chain BB ne CPSR (cmpz (and (cmov 0 1 CC CPSR Cmp) 1) 0)) // -> (brcond Chain BB CC CPSR Cmp) @@ -18373,8 +18371,7 @@ ARMTargetLowering::PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const { SDValue FalseVal = N->getOperand(0); SDValue TrueVal = N->getOperand(1); SDValue ARMcc = N->getOperand(2); - ARMCC::CondCodes CC = - (ARMCC::CondCodes)cast<ConstantSDNode>(ARMcc)->getZExtValue(); + ARMCC::CondCodes CC = (ARMCC::CondCodes)ARMcc->getAsZExtVal(); // BFI is only available on V6T2+. if (!Subtarget->isThumb1Only() && Subtarget->hasV6T2Ops()) { diff --git a/llvm/lib/Target/ARM/ARMLegalizerInfo.cpp b/llvm/lib/Target/ARM/ARMLegalizerInfo.cpp index 3ffde86ce1bb..abea0fef5cdc 100644 --- a/llvm/lib/Target/ARM/ARMLegalizerInfo.cpp +++ b/llvm/lib/Target/ARM/ARMLegalizerInfo.cpp @@ -362,8 +362,8 @@ ARMLegalizerInfo::getFCmpLibcalls(CmpInst::Predicate Predicate, llvm_unreachable("Unsupported size for FCmp predicate"); } -bool ARMLegalizerInfo::legalizeCustom(LegalizerHelper &Helper, - MachineInstr &MI) const { +bool ARMLegalizerInfo::legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI, + LostDebugLocObserver &LocObserver) const { using namespace TargetOpcode; MachineIRBuilder &MIRBuilder = Helper.MIRBuilder; @@ -392,7 +392,8 @@ bool ARMLegalizerInfo::legalizeCustom(LegalizerHelper &Helper, OriginalResult}; auto Status = createLibcall(MIRBuilder, Libcall, {RetRegs, RetTy, 0}, {{MI.getOperand(1).getReg(), ArgTy, 0}, - {MI.getOperand(2).getReg(), ArgTy, 0}}); + {MI.getOperand(2).getReg(), ArgTy, 0}}, + LocObserver, &MI); if (Status != LegalizerHelper::Legalized) return false; break; @@ -428,7 +429,8 @@ bool ARMLegalizerInfo::legalizeCustom(LegalizerHelper &Helper, auto Status = createLibcall(MIRBuilder, Libcall.LibcallID, {LibcallResult, RetTy, 0}, {{MI.getOperand(2).getReg(), ArgTy, 0}, - {MI.getOperand(3).getReg(), ArgTy, 0}}); + {MI.getOperand(3).getReg(), ArgTy, 0}}, + LocObserver, &MI); if (Status != LegalizerHelper::Legalized) return false; diff --git a/llvm/lib/Target/ARM/ARMLegalizerInfo.h b/llvm/lib/Target/ARM/ARMLegalizerInfo.h index f1c2e9c94336..d6ce4eb1055b 100644 --- a/llvm/lib/Target/ARM/ARMLegalizerInfo.h +++ b/llvm/lib/Target/ARM/ARMLegalizerInfo.h @@ -23,12 +23,12 @@ namespace llvm { class ARMSubtarget; -/// This class provides the information for the target register banks. class ARMLegalizerInfo : public LegalizerInfo { public: ARMLegalizerInfo(const ARMSubtarget &ST); - bool legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI) const override; + bool legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI, + LostDebugLocObserver &LocObserver) const override; private: void setFCmpLibcallsGNU(); diff --git a/llvm/lib/Target/AVR/AVRISelLowering.cpp b/llvm/lib/Target/AVR/AVRISelLowering.cpp index d36bfb188ed3..f91e77adb8f8 100644 --- a/llvm/lib/Target/AVR/AVRISelLowering.cpp +++ b/llvm/lib/Target/AVR/AVRISelLowering.cpp @@ -660,7 +660,7 @@ SDValue AVRTargetLowering::getAVRCmp(SDValue LHS, SDValue RHS, SDValue Cmp; if (LHS.getSimpleValueType() == MVT::i16 && isa<ConstantSDNode>(RHS)) { - uint64_t Imm = cast<ConstantSDNode>(RHS)->getZExtValue(); + uint64_t Imm = RHS->getAsZExtVal(); // Generate a CPI/CPC pair if RHS is a 16-bit constant. Use the zero // register for the constant RHS if its lower or higher byte is zero. SDValue LHSlo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i8, LHS, @@ -680,7 +680,7 @@ SDValue AVRTargetLowering::getAVRCmp(SDValue LHS, SDValue RHS, } else if (RHS.getSimpleValueType() == MVT::i16 && isa<ConstantSDNode>(LHS)) { // Generate a CPI/CPC pair if LHS is a 16-bit constant. Use the zero // register for the constant LHS if its lower or higher byte is zero. - uint64_t Imm = cast<ConstantSDNode>(LHS)->getZExtValue(); + uint64_t Imm = LHS->getAsZExtVal(); SDValue LHSlo = (Imm & 0xff) == 0 ? DAG.getRegister(Subtarget.getZeroRegister(), MVT::i8) : DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i8, LHS, diff --git a/llvm/lib/Target/BPF/BPFISelLowering.cpp b/llvm/lib/Target/BPF/BPFISelLowering.cpp index 2fe86e75ddae..4d8ace7c1ece 100644 --- a/llvm/lib/Target/BPF/BPFISelLowering.cpp +++ b/llvm/lib/Target/BPF/BPFISelLowering.cpp @@ -151,6 +151,7 @@ BPFTargetLowering::BPFTargetLowering(const TargetMachine &TM, } setBooleanContents(ZeroOrOneBooleanContent); + setMaxAtomicSizeInBitsSupported(64); // Function alignments setMinFunctionAlignment(Align(8)); diff --git a/llvm/lib/Target/BPF/BPFTargetMachine.cpp b/llvm/lib/Target/BPF/BPFTargetMachine.cpp index ab0db576f7f7..8a6e7ae3663e 100644 --- a/llvm/lib/Target/BPF/BPFTargetMachine.cpp +++ b/llvm/lib/Target/BPF/BPFTargetMachine.cpp @@ -108,7 +108,8 @@ TargetPassConfig *BPFTargetMachine::createPassConfig(PassManagerBase &PM) { return new BPFPassConfig(*this, PM); } -void BPFTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) { +void BPFTargetMachine::registerPassBuilderCallbacks( + PassBuilder &PB, bool PopulateClassToPassNames) { PB.registerPipelineParsingCallback( [](StringRef PassName, FunctionPassManager &FPM, ArrayRef<PassBuilder::PipelineElement>) { @@ -148,7 +149,9 @@ void BPFTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) { } void BPFPassConfig::addIRPasses() { + addPass(createAtomicExpandPass()); addPass(createBPFCheckAndAdjustIR()); + TargetPassConfig::addIRPasses(); } diff --git a/llvm/lib/Target/BPF/BPFTargetMachine.h b/llvm/lib/Target/BPF/BPFTargetMachine.h index 4e6adc722e76..0a28394463b2 100644 --- a/llvm/lib/Target/BPF/BPFTargetMachine.h +++ b/llvm/lib/Target/BPF/BPFTargetMachine.h @@ -42,7 +42,8 @@ public: return TLOF.get(); } - void registerPassBuilderCallbacks(PassBuilder &PB) override; + void registerPassBuilderCallbacks(PassBuilder &PB, + bool PopulateClassToPassNames) override; }; } diff --git a/llvm/lib/Target/DirectX/DXILResourceAnalysis.h b/llvm/lib/Target/DirectX/DXILResourceAnalysis.h index 8ffa1d7cd9b3..bce41160b95e 100644 --- a/llvm/lib/Target/DirectX/DXILResourceAnalysis.h +++ b/llvm/lib/Target/DirectX/DXILResourceAnalysis.h @@ -36,6 +36,7 @@ class DXILResourcePrinterPass : public PassInfoMixin<DXILResourcePrinterPass> { public: explicit DXILResourcePrinterPass(raw_ostream &OS) : OS(OS) {} PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); + static bool isRequired() { return true; } }; /// The legacy pass manager's analysis pass to compute DXIL resource diff --git a/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp b/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp index d5cb488f2fde..06938f8c74f1 100644 --- a/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp +++ b/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp @@ -100,7 +100,8 @@ DirectXTargetMachine::DirectXTargetMachine(const Target &T, const Triple &TT, DirectXTargetMachine::~DirectXTargetMachine() {} -void DirectXTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) { +void DirectXTargetMachine::registerPassBuilderCallbacks( + PassBuilder &PB, bool PopulateClassToPassNames) { PB.registerPipelineParsingCallback( [](StringRef PassName, ModulePassManager &PM, ArrayRef<PassBuilder::PipelineElement>) { diff --git a/llvm/lib/Target/DirectX/DirectXTargetMachine.h b/llvm/lib/Target/DirectX/DirectXTargetMachine.h index d04c375b2736..428beaf61cd0 100644 --- a/llvm/lib/Target/DirectX/DirectXTargetMachine.h +++ b/llvm/lib/Target/DirectX/DirectXTargetMachine.h @@ -47,7 +47,8 @@ public: } TargetTransformInfo getTargetTransformInfo(const Function &F) const override; - void registerPassBuilderCallbacks(PassBuilder &PB) override; + void registerPassBuilderCallbacks(PassBuilder &PB, + bool PopulateClassToPassNames) override; }; } // namespace llvm diff --git a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp index eb5c59672224..defb1f7324f4 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp +++ b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp @@ -743,7 +743,7 @@ void HexagonDAGToDAGISel::SelectConstantFP(SDNode *N) { // void HexagonDAGToDAGISel::SelectConstant(SDNode *N) { if (N->getValueType(0) == MVT::i1) { - assert(!(cast<ConstantSDNode>(N)->getZExtValue() >> 1)); + assert(!(N->getAsZExtVal() >> 1)); unsigned Opc = (cast<ConstantSDNode>(N)->getSExtValue() != 0) ? Hexagon::PS_true : Hexagon::PS_false; diff --git a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp index 665e2d79c83d..81035849491b 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp +++ b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp @@ -1256,7 +1256,7 @@ HexagonTargetLowering::extractHvxSubvectorReg(SDValue OrigOp, SDValue VecV, SDValue IdxV, const SDLoc &dl, MVT ResTy, SelectionDAG &DAG) const { MVT VecTy = ty(VecV); unsigned HwLen = Subtarget.getVectorLength(); - unsigned Idx = cast<ConstantSDNode>(IdxV.getNode())->getZExtValue(); + unsigned Idx = IdxV.getNode()->getAsZExtVal(); MVT ElemTy = VecTy.getVectorElementType(); unsigned ElemWidth = ElemTy.getSizeInBits(); @@ -1299,7 +1299,7 @@ HexagonTargetLowering::extractHvxSubvectorPred(SDValue VecV, SDValue IdxV, MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen); SDValue ByteVec = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, VecV); // IdxV is required to be a constant. - unsigned Idx = cast<ConstantSDNode>(IdxV.getNode())->getZExtValue(); + unsigned Idx = IdxV.getNode()->getAsZExtVal(); unsigned ResLen = ResTy.getVectorNumElements(); unsigned BitBytes = HwLen / VecTy.getVectorNumElements(); @@ -1801,7 +1801,7 @@ HexagonTargetLowering::LowerHvxExtractSubvector(SDValue Op, SelectionDAG &DAG) MVT SrcTy = ty(SrcV); MVT DstTy = ty(Op); SDValue IdxV = Op.getOperand(1); - unsigned Idx = cast<ConstantSDNode>(IdxV.getNode())->getZExtValue(); + unsigned Idx = IdxV.getNode()->getAsZExtVal(); assert(Idx % DstTy.getVectorNumElements() == 0); (void)Idx; const SDLoc &dl(Op); diff --git a/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp b/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp index 590e464e1653..e7a692d67ba0 100644 --- a/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp +++ b/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp @@ -274,7 +274,8 @@ HexagonTargetMachine::getSubtargetImpl(const Function &F) const { return I.get(); } -void HexagonTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) { +void HexagonTargetMachine::registerPassBuilderCallbacks( + PassBuilder &PB, bool PopulateClassToPassNames) { PB.registerLateLoopOptimizationsEPCallback( [=](LoopPassManager &LPM, OptimizationLevel Level) { LPM.addPass(HexagonLoopIdiomRecognitionPass()); diff --git a/llvm/lib/Target/Hexagon/HexagonTargetMachine.h b/llvm/lib/Target/Hexagon/HexagonTargetMachine.h index dddd79ad1fcf..c5fed0cd65a8 100644 --- a/llvm/lib/Target/Hexagon/HexagonTargetMachine.h +++ b/llvm/lib/Target/Hexagon/HexagonTargetMachine.h @@ -34,7 +34,8 @@ public: ~HexagonTargetMachine() override; const HexagonSubtarget *getSubtargetImpl(const Function &F) const override; - void registerPassBuilderCallbacks(PassBuilder &PB) override; + void registerPassBuilderCallbacks(PassBuilder &PB, + bool PopulateClassToPassNames) override; TargetPassConfig *createPassConfig(PassManagerBase &PM) override; TargetTransformInfo getTargetTransformInfo(const Function &F) const override; diff --git a/llvm/lib/Target/Lanai/LanaiISelLowering.cpp b/llvm/lib/Target/Lanai/LanaiISelLowering.cpp index 17d7ffb586f4..06de2ff1ae3e 100644 --- a/llvm/lib/Target/Lanai/LanaiISelLowering.cpp +++ b/llvm/lib/Target/Lanai/LanaiISelLowering.cpp @@ -166,6 +166,8 @@ LanaiTargetLowering::LanaiTargetLowering(const TargetMachine &TM, // Booleans always contain 0 or 1. setBooleanContents(ZeroOrOneBooleanContent); + + setMaxAtomicSizeInBitsSupported(0); } SDValue LanaiTargetLowering::LowerOperation(SDValue Op, diff --git a/llvm/lib/Target/Lanai/LanaiTargetMachine.cpp b/llvm/lib/Target/Lanai/LanaiTargetMachine.cpp index 039182b3ffe6..33479720183b 100644 --- a/llvm/lib/Target/Lanai/LanaiTargetMachine.cpp +++ b/llvm/lib/Target/Lanai/LanaiTargetMachine.cpp @@ -93,6 +93,7 @@ public: return getTM<LanaiTargetMachine>(); } + void addIRPasses() override; bool addInstSelector() override; void addPreSched2() override; void addPreEmitPass() override; @@ -104,6 +105,12 @@ LanaiTargetMachine::createPassConfig(PassManagerBase &PassManager) { return new LanaiPassConfig(*this, &PassManager); } +void LanaiPassConfig::addIRPasses() { + addPass(createAtomicExpandPass()); + + TargetPassConfig::addIRPasses(); +} + // Install an instruction selector pass. bool LanaiPassConfig::addInstSelector() { addPass(createLanaiISelDag(getLanaiTargetMachine())); diff --git a/llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp b/llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp index 66a37fce5dda..46f63a4103f9 100644 --- a/llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp +++ b/llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp @@ -121,6 +121,10 @@ class LoongArchAsmParser : public MCTargetAsmParser { // Helper to emit pseudo instruction "li.w/d $rd, $imm". void emitLoadImm(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out); + // Helper to emit pseudo instruction "call36 sym" or "tail36 $rj, sym". + void emitFuncCall36(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out, + bool IsTailCall); + public: enum LoongArchMatchResultTy { Match_Dummy = FIRST_TARGET_MATCH_RESULT_TY, @@ -400,6 +404,22 @@ public: IsValidKind; } + bool isSImm20pcaddu18i() const { + if (!isImm()) + return false; + + int64_t Imm; + LoongArchMCExpr::VariantKind VK = LoongArchMCExpr::VK_LoongArch_None; + bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK); + bool IsValidKind = VK == LoongArchMCExpr::VK_LoongArch_None || + VK == LoongArchMCExpr::VK_LoongArch_CALL36; + + return IsConstantImm + ? isInt<20>(Imm) && IsValidKind + : LoongArchAsmParser::classifySymbolRef(getImm(), VK) && + IsValidKind; + } + bool isSImm21lsl2() const { if (!isImm()) return false; @@ -1110,6 +1130,35 @@ void LoongArchAsmParser::emitLoadImm(MCInst &Inst, SMLoc IDLoc, } } +void LoongArchAsmParser::emitFuncCall36(MCInst &Inst, SMLoc IDLoc, + MCStreamer &Out, bool IsTailCall) { + // call36 sym + // expands to: + // pcaddu18i $ra, %call36(sym) + // jirl $ra, $ra, 0 + // + // tail36 $rj, sym + // expands to: + // pcaddu18i $rj, %call36(sym) + // jirl $r0, $rj, 0 + unsigned ScratchReg = + IsTailCall ? Inst.getOperand(0).getReg() : (unsigned)LoongArch::R1; + const MCExpr *Sym = + IsTailCall ? Inst.getOperand(1).getExpr() : Inst.getOperand(0).getExpr(); + const LoongArchMCExpr *LE = LoongArchMCExpr::create( + Sym, llvm::LoongArchMCExpr::VK_LoongArch_CALL36, getContext()); + + Out.emitInstruction( + MCInstBuilder(LoongArch::PCADDU18I).addReg(ScratchReg).addExpr(LE), + getSTI()); + Out.emitInstruction( + MCInstBuilder(LoongArch::JIRL) + .addReg(IsTailCall ? (unsigned)LoongArch::R0 : ScratchReg) + .addReg(ScratchReg) + .addImm(0), + getSTI()); +} + bool LoongArchAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc, OperandVector &Operands, MCStreamer &Out) { @@ -1158,6 +1207,12 @@ bool LoongArchAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc, case LoongArch::PseudoLI_D: emitLoadImm(Inst, IDLoc, Out); return false; + case LoongArch::PseudoCALL36: + emitFuncCall36(Inst, IDLoc, Out, /*IsTailCall=*/false); + return false; + case LoongArch::PseudoTAIL36: + emitFuncCall36(Inst, IDLoc, Out, /*IsTailCall=*/true); + return false; } Out.emitInstruction(Inst, getSTI()); return false; @@ -1439,6 +1494,12 @@ bool LoongArchAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, /*Upper=*/(1 << 19) - 1, "operand must be a symbol with modifier (e.g. %pc_hi20) or an integer " "in the range"); + case Match_InvalidSImm20pcaddu18i: + return generateImmOutOfRangeError( + Operands, ErrorInfo, /*Lower=*/-(1 << 19), + /*Upper=*/(1 << 19) - 1, + "operand must be a symbol with modifier (e.g. %call36) or an integer " + "in the range"); case Match_InvalidSImm21lsl2: return generateImmOutOfRangeError( Operands, ErrorInfo, /*Lower=*/-(1 << 22), /*Upper=*/(1 << 22) - 4, diff --git a/llvm/lib/Target/LoongArch/LoongArchExpandPseudoInsts.cpp b/llvm/lib/Target/LoongArch/LoongArchExpandPseudoInsts.cpp index 72c1f1cec198..ad39658f698e 100644 --- a/llvm/lib/Target/LoongArch/LoongArchExpandPseudoInsts.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchExpandPseudoInsts.cpp @@ -62,43 +62,24 @@ private: MachineBasicBlock::iterator &NextMBBI, unsigned FlagsHi, unsigned SecondOpcode, unsigned FlagsLo); - bool expandLargeAddressLoad(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, - MachineBasicBlock::iterator &NextMBBI, - unsigned LastOpcode, unsigned IdentifyingMO); - bool expandLargeAddressLoad(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, - MachineBasicBlock::iterator &NextMBBI, - unsigned LastOpcode, unsigned IdentifyingMO, - const MachineOperand &Symbol, Register DestReg, - bool EraseFromParent); bool expandLoadAddressPcrel(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, - MachineBasicBlock::iterator &NextMBBI, - bool Large = false); + MachineBasicBlock::iterator &NextMBBI); bool expandLoadAddressGot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, - MachineBasicBlock::iterator &NextMBBI, - bool Large = false); + MachineBasicBlock::iterator &NextMBBI); bool expandLoadAddressTLSLE(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, MachineBasicBlock::iterator &NextMBBI); bool expandLoadAddressTLSIE(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, - MachineBasicBlock::iterator &NextMBBI, - bool Large = false); + MachineBasicBlock::iterator &NextMBBI); bool expandLoadAddressTLSLD(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, - MachineBasicBlock::iterator &NextMBBI, - bool Large = false); + MachineBasicBlock::iterator &NextMBBI); bool expandLoadAddressTLSGD(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, - MachineBasicBlock::iterator &NextMBBI, - bool Large = false); - bool expandFunctionCALL(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, - MachineBasicBlock::iterator &NextMBBI, - bool IsTailCall); + MachineBasicBlock::iterator &NextMBBI); }; char LoongArchPreRAExpandPseudo::ID = 0; @@ -131,30 +112,16 @@ bool LoongArchPreRAExpandPseudo::expandMI( switch (MBBI->getOpcode()) { case LoongArch::PseudoLA_PCREL: return expandLoadAddressPcrel(MBB, MBBI, NextMBBI); - case LoongArch::PseudoLA_PCREL_LARGE: - return expandLoadAddressPcrel(MBB, MBBI, NextMBBI, /*Large=*/true); case LoongArch::PseudoLA_GOT: return expandLoadAddressGot(MBB, MBBI, NextMBBI); - case LoongArch::PseudoLA_GOT_LARGE: - return expandLoadAddressGot(MBB, MBBI, NextMBBI, /*Large=*/true); case LoongArch::PseudoLA_TLS_LE: return expandLoadAddressTLSLE(MBB, MBBI, NextMBBI); case LoongArch::PseudoLA_TLS_IE: return expandLoadAddressTLSIE(MBB, MBBI, NextMBBI); - case LoongArch::PseudoLA_TLS_IE_LARGE: - return expandLoadAddressTLSIE(MBB, MBBI, NextMBBI, /*Large=*/true); case LoongArch::PseudoLA_TLS_LD: return expandLoadAddressTLSLD(MBB, MBBI, NextMBBI); - case LoongArch::PseudoLA_TLS_LD_LARGE: - return expandLoadAddressTLSLD(MBB, MBBI, NextMBBI, /*Large=*/true); case LoongArch::PseudoLA_TLS_GD: return expandLoadAddressTLSGD(MBB, MBBI, NextMBBI); - case LoongArch::PseudoLA_TLS_GD_LARGE: - return expandLoadAddressTLSGD(MBB, MBBI, NextMBBI, /*Large=*/true); - case LoongArch::PseudoCALL: - return expandFunctionCALL(MBB, MBBI, NextMBBI, /*IsTailCall=*/false); - case LoongArch::PseudoTAIL: - return expandFunctionCALL(MBB, MBBI, NextMBBI, /*IsTailCall=*/true); } return false; } @@ -187,118 +154,9 @@ bool LoongArchPreRAExpandPseudo::expandPcalau12iInstPair( return true; } -bool LoongArchPreRAExpandPseudo::expandLargeAddressLoad( - MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, - MachineBasicBlock::iterator &NextMBBI, unsigned LastOpcode, - unsigned IdentifyingMO) { - MachineInstr &MI = *MBBI; - return expandLargeAddressLoad(MBB, MBBI, NextMBBI, LastOpcode, IdentifyingMO, - MI.getOperand(2), MI.getOperand(0).getReg(), - true); -} - -bool LoongArchPreRAExpandPseudo::expandLargeAddressLoad( - MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, - MachineBasicBlock::iterator &NextMBBI, unsigned LastOpcode, - unsigned IdentifyingMO, const MachineOperand &Symbol, Register DestReg, - bool EraseFromParent) { - // Code Sequence: - // - // Part1: pcalau12i $scratch, %MO1(sym) - // Part0: addi.d $dest, $zero, %MO0(sym) - // Part2: lu32i.d $dest, %MO2(sym) - // Part3: lu52i.d $dest, $dest, %MO3(sym) - // Fin: LastOpcode $dest, $dest, $scratch - - unsigned MO0, MO1, MO2, MO3; - switch (IdentifyingMO) { - default: - llvm_unreachable("unsupported identifying MO"); - case LoongArchII::MO_PCREL_LO: - MO0 = IdentifyingMO; - MO1 = LoongArchII::MO_PCREL_HI; - MO2 = LoongArchII::MO_PCREL64_LO; - MO3 = LoongArchII::MO_PCREL64_HI; - break; - case LoongArchII::MO_GOT_PC_HI: - case LoongArchII::MO_LD_PC_HI: - case LoongArchII::MO_GD_PC_HI: - // These cases relocate just like the GOT case, except for Part1. - MO0 = LoongArchII::MO_GOT_PC_LO; - MO1 = IdentifyingMO; - MO2 = LoongArchII::MO_GOT_PC64_LO; - MO3 = LoongArchII::MO_GOT_PC64_HI; - break; - case LoongArchII::MO_IE_PC_LO: - MO0 = IdentifyingMO; - MO1 = LoongArchII::MO_IE_PC_HI; - MO2 = LoongArchII::MO_IE_PC64_LO; - MO3 = LoongArchII::MO_IE_PC64_HI; - break; - } - - MachineFunction *MF = MBB.getParent(); - MachineInstr &MI = *MBBI; - DebugLoc DL = MI.getDebugLoc(); - - assert(MF->getSubtarget<LoongArchSubtarget>().is64Bit() && - "Large code model requires LA64"); - - Register TmpPart1 = - MF->getRegInfo().createVirtualRegister(&LoongArch::GPRRegClass); - Register TmpPart0 = - DestReg.isVirtual() - ? MF->getRegInfo().createVirtualRegister(&LoongArch::GPRRegClass) - : DestReg; - Register TmpParts02 = - DestReg.isVirtual() - ? MF->getRegInfo().createVirtualRegister(&LoongArch::GPRRegClass) - : DestReg; - Register TmpParts023 = - DestReg.isVirtual() - ? MF->getRegInfo().createVirtualRegister(&LoongArch::GPRRegClass) - : DestReg; - - auto Part1 = BuildMI(MBB, MBBI, DL, TII->get(LoongArch::PCALAU12I), TmpPart1); - auto Part0 = BuildMI(MBB, MBBI, DL, TII->get(LoongArch::ADDI_D), TmpPart0) - .addReg(LoongArch::R0); - auto Part2 = BuildMI(MBB, MBBI, DL, TII->get(LoongArch::LU32I_D), TmpParts02) - // "rj" is needed due to InstrInfo pattern requirement. - .addReg(TmpPart0, RegState::Kill); - auto Part3 = BuildMI(MBB, MBBI, DL, TII->get(LoongArch::LU52I_D), TmpParts023) - .addReg(TmpParts02, RegState::Kill); - BuildMI(MBB, MBBI, DL, TII->get(LastOpcode), DestReg) - .addReg(TmpParts023) - .addReg(TmpPart1, RegState::Kill); - - if (Symbol.getType() == MachineOperand::MO_ExternalSymbol) { - const char *SymName = Symbol.getSymbolName(); - Part0.addExternalSymbol(SymName, MO0); - Part1.addExternalSymbol(SymName, MO1); - Part2.addExternalSymbol(SymName, MO2); - Part3.addExternalSymbol(SymName, MO3); - } else { - Part0.addDisp(Symbol, 0, MO0); - Part1.addDisp(Symbol, 0, MO1); - Part2.addDisp(Symbol, 0, MO2); - Part3.addDisp(Symbol, 0, MO3); - } - - if (EraseFromParent) - MI.eraseFromParent(); - - return true; -} - bool LoongArchPreRAExpandPseudo::expandLoadAddressPcrel( MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, - MachineBasicBlock::iterator &NextMBBI, bool Large) { - if (Large) - // Emit the 5-insn large address load sequence with the `%pc` family of - // relocs. - return expandLargeAddressLoad(MBB, MBBI, NextMBBI, LoongArch::ADD_D, - LoongArchII::MO_PCREL_LO); - + MachineBasicBlock::iterator &NextMBBI) { // Code Sequence: // pcalau12i $rd, %pc_hi20(sym) // addi.w/d $rd, $rd, %pc_lo12(sym) @@ -311,13 +169,7 @@ bool LoongArchPreRAExpandPseudo::expandLoadAddressPcrel( bool LoongArchPreRAExpandPseudo::expandLoadAddressGot( MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, - MachineBasicBlock::iterator &NextMBBI, bool Large) { - if (Large) - // Emit the 5-insn large address load sequence with the `%got_pc` family - // of relocs, loading the result from GOT with `ldx.d` in the end. - return expandLargeAddressLoad(MBB, MBBI, NextMBBI, LoongArch::LDX_D, - LoongArchII::MO_GOT_PC_HI); - + MachineBasicBlock::iterator &NextMBBI) { // Code Sequence: // pcalau12i $rd, %got_pc_hi20(sym) // ld.w/d $rd, $rd, %got_pc_lo12(sym) @@ -378,13 +230,7 @@ bool LoongArchPreRAExpandPseudo::expandLoadAddressTLSLE( bool LoongArchPreRAExpandPseudo::expandLoadAddressTLSIE( MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, - MachineBasicBlock::iterator &NextMBBI, bool Large) { - if (Large) - // Emit the 5-insn large address load sequence with the `%ie_pc` family - // of relocs, loading the result with `ldx.d` in the end. - return expandLargeAddressLoad(MBB, MBBI, NextMBBI, LoongArch::LDX_D, - LoongArchII::MO_IE_PC_LO); - + MachineBasicBlock::iterator &NextMBBI) { // Code Sequence: // pcalau12i $rd, %ie_pc_hi20(sym) // ld.w/d $rd, $rd, %ie_pc_lo12(sym) @@ -397,13 +243,7 @@ bool LoongArchPreRAExpandPseudo::expandLoadAddressTLSIE( bool LoongArchPreRAExpandPseudo::expandLoadAddressTLSLD( MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, - MachineBasicBlock::iterator &NextMBBI, bool Large) { - if (Large) - // Emit the 5-insn large address load sequence with the `%got_pc` family - // of relocs, with the `pcalau12i` insn relocated with `%ld_pc_hi20`. - return expandLargeAddressLoad(MBB, MBBI, NextMBBI, LoongArch::ADD_D, - LoongArchII::MO_LD_PC_HI); - + MachineBasicBlock::iterator &NextMBBI) { // Code Sequence: // pcalau12i $rd, %ld_pc_hi20(sym) // addi.w/d $rd, $rd, %got_pc_lo12(sym) @@ -416,13 +256,7 @@ bool LoongArchPreRAExpandPseudo::expandLoadAddressTLSLD( bool LoongArchPreRAExpandPseudo::expandLoadAddressTLSGD( MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, - MachineBasicBlock::iterator &NextMBBI, bool Large) { - if (Large) - // Emit the 5-insn large address load sequence with the `%got_pc` family - // of relocs, with the `pcalau12i` insn relocated with `%gd_pc_hi20`. - return expandLargeAddressLoad(MBB, MBBI, NextMBBI, LoongArch::ADD_D, - LoongArchII::MO_GD_PC_HI); - + MachineBasicBlock::iterator &NextMBBI) { // Code Sequence: // pcalau12i $rd, %gd_pc_hi20(sym) // addi.w/d $rd, $rd, %got_pc_lo12(sym) @@ -433,88 +267,6 @@ bool LoongArchPreRAExpandPseudo::expandLoadAddressTLSGD( SecondOpcode, LoongArchII::MO_GOT_PC_LO); } -bool LoongArchPreRAExpandPseudo::expandFunctionCALL( - MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, - MachineBasicBlock::iterator &NextMBBI, bool IsTailCall) { - MachineFunction *MF = MBB.getParent(); - MachineInstr &MI = *MBBI; - DebugLoc DL = MI.getDebugLoc(); - const MachineOperand &Func = MI.getOperand(0); - MachineInstrBuilder CALL; - unsigned Opcode; - - switch (MF->getTarget().getCodeModel()) { - default: - report_fatal_error("Unsupported code model"); - break; - case CodeModel::Small: { - // CALL: - // bl func - // TAIL: - // b func - Opcode = IsTailCall ? LoongArch::PseudoB_TAIL : LoongArch::BL; - CALL = BuildMI(MBB, MBBI, DL, TII->get(Opcode)).add(Func); - break; - } - case CodeModel::Medium: { - // CALL: - // pcalau12i $ra, %pc_hi20(func) - // jirl $ra, $ra, %pc_lo12(func) - // TAIL: - // pcalau12i $scratch, %pc_hi20(func) - // jirl $r0, $scratch, %pc_lo12(func) - Opcode = - IsTailCall ? LoongArch::PseudoJIRL_TAIL : LoongArch::PseudoJIRL_CALL; - Register ScratchReg = - IsTailCall - ? MF->getRegInfo().createVirtualRegister(&LoongArch::GPRRegClass) - : LoongArch::R1; - MachineInstrBuilder MIB = - BuildMI(MBB, MBBI, DL, TII->get(LoongArch::PCALAU12I), ScratchReg); - CALL = BuildMI(MBB, MBBI, DL, TII->get(Opcode)).addReg(ScratchReg); - if (Func.isSymbol()) { - const char *FnName = Func.getSymbolName(); - MIB.addExternalSymbol(FnName, LoongArchII::MO_PCREL_HI); - CALL.addExternalSymbol(FnName, LoongArchII::MO_PCREL_LO); - break; - } - assert(Func.isGlobal() && "Expected a GlobalValue at this time"); - const GlobalValue *GV = Func.getGlobal(); - MIB.addGlobalAddress(GV, 0, LoongArchII::MO_PCREL_HI); - CALL.addGlobalAddress(GV, 0, LoongArchII::MO_PCREL_LO); - break; - } - case CodeModel::Large: { - // Emit the 5-insn large address load sequence, either directly or - // indirectly in case of going through the GOT, then JIRL_TAIL or - // JIRL_CALL to $addr. - Opcode = - IsTailCall ? LoongArch::PseudoJIRL_TAIL : LoongArch::PseudoJIRL_CALL; - Register AddrReg = - IsTailCall - ? MF->getRegInfo().createVirtualRegister(&LoongArch::GPRRegClass) - : LoongArch::R1; - - bool UseGOT = Func.isGlobal() && !Func.getGlobal()->isDSOLocal(); - unsigned MO = UseGOT ? LoongArchII::MO_GOT_PC_HI : LoongArchII::MO_PCREL_LO; - unsigned LAOpcode = UseGOT ? LoongArch::LDX_D : LoongArch::ADD_D; - expandLargeAddressLoad(MBB, MBBI, NextMBBI, LAOpcode, MO, Func, AddrReg, - false); - CALL = BuildMI(MBB, MBBI, DL, TII->get(Opcode)).addReg(AddrReg).addImm(0); - break; - } - } - - // Transfer implicit operands. - CALL.copyImplicitOps(MI); - - // Transfer MI flags. - CALL.setMIFlags(MI.getFlags()); - - MI.eraseFromParent(); - return true; -} - class LoongArchExpandPseudo : public MachineFunctionPass { public: const LoongArchInstrInfo *TII; @@ -536,6 +288,35 @@ private: MachineBasicBlock::iterator &NextMBBI); bool expandCopyCFR(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, MachineBasicBlock::iterator &NextMBBI); + bool expandLargeAddressLoad(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI, + unsigned LastOpcode, unsigned IdentifyingMO); + bool expandLargeAddressLoad(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI, + unsigned LastOpcode, unsigned IdentifyingMO, + const MachineOperand &Symbol, Register DestReg, + bool EraseFromParent); + bool expandLoadAddressPcrelLarge(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI); + bool expandLoadAddressGotLarge(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI); + bool expandLoadAddressTLSIELarge(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI); + bool expandLoadAddressTLSLDLarge(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI); + bool expandLoadAddressTLSGDLarge(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI); + bool expandFunctionCALL(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI, + bool IsTailCall); }; char LoongArchExpandPseudo::ID = 0; @@ -570,6 +351,24 @@ bool LoongArchExpandPseudo::expandMI(MachineBasicBlock &MBB, switch (MBBI->getOpcode()) { case LoongArch::PseudoCopyCFR: return expandCopyCFR(MBB, MBBI, NextMBBI); + case LoongArch::PseudoLA_PCREL_LARGE: + return expandLoadAddressPcrelLarge(MBB, MBBI, NextMBBI); + case LoongArch::PseudoLA_GOT_LARGE: + return expandLoadAddressGotLarge(MBB, MBBI, NextMBBI); + case LoongArch::PseudoLA_TLS_IE_LARGE: + return expandLoadAddressTLSIELarge(MBB, MBBI, NextMBBI); + case LoongArch::PseudoLA_TLS_LD_LARGE: + return expandLoadAddressTLSLDLarge(MBB, MBBI, NextMBBI); + case LoongArch::PseudoLA_TLS_GD_LARGE: + return expandLoadAddressTLSGDLarge(MBB, MBBI, NextMBBI); + case LoongArch::PseudoCALL: + case LoongArch::PseudoCALL_MEDIUM: + case LoongArch::PseudoCALL_LARGE: + return expandFunctionCALL(MBB, MBBI, NextMBBI, /*IsTailCall=*/false); + case LoongArch::PseudoTAIL: + case LoongArch::PseudoTAIL_MEDIUM: + case LoongArch::PseudoTAIL_LARGE: + return expandFunctionCALL(MBB, MBBI, NextMBBI, /*IsTailCall=*/true); } return false; @@ -628,6 +427,212 @@ bool LoongArchExpandPseudo::expandCopyCFR( return true; } +bool LoongArchExpandPseudo::expandLargeAddressLoad( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI, unsigned LastOpcode, + unsigned IdentifyingMO) { + MachineInstr &MI = *MBBI; + return expandLargeAddressLoad(MBB, MBBI, NextMBBI, LastOpcode, IdentifyingMO, + MI.getOperand(2), MI.getOperand(0).getReg(), + true); +} + +bool LoongArchExpandPseudo::expandLargeAddressLoad( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI, unsigned LastOpcode, + unsigned IdentifyingMO, const MachineOperand &Symbol, Register DestReg, + bool EraseFromParent) { + // Code Sequence: + // + // Part1: pcalau12i $dst, %MO1(sym) + // Part0: addi.d $t8, $zero, %MO0(sym) + // Part2: lu32i.d $t8, %MO2(sym) + // Part3: lu52i.d $t8, $t8, %MO3(sym) + // Fin: LastOpcode $dst, $t8, $dst + + unsigned MO0, MO1, MO2, MO3; + switch (IdentifyingMO) { + default: + llvm_unreachable("unsupported identifying MO"); + case LoongArchII::MO_PCREL_LO: + MO0 = IdentifyingMO; + MO1 = LoongArchII::MO_PCREL_HI; + MO2 = LoongArchII::MO_PCREL64_LO; + MO3 = LoongArchII::MO_PCREL64_HI; + break; + case LoongArchII::MO_GOT_PC_HI: + case LoongArchII::MO_LD_PC_HI: + case LoongArchII::MO_GD_PC_HI: + // These cases relocate just like the GOT case, except for Part1. + MO0 = LoongArchII::MO_GOT_PC_LO; + MO1 = IdentifyingMO; + MO2 = LoongArchII::MO_GOT_PC64_LO; + MO3 = LoongArchII::MO_GOT_PC64_HI; + break; + case LoongArchII::MO_IE_PC_LO: + MO0 = IdentifyingMO; + MO1 = LoongArchII::MO_IE_PC_HI; + MO2 = LoongArchII::MO_IE_PC64_LO; + MO3 = LoongArchII::MO_IE_PC64_HI; + break; + } + + MachineInstr &MI = *MBBI; + DebugLoc DL = MI.getDebugLoc(); + Register ScratchReg = LoongArch::R20; // $t8 + + assert(MBB.getParent()->getSubtarget<LoongArchSubtarget>().is64Bit() && + "Large code model requires LA64"); + + auto Part1 = BuildMI(MBB, MBBI, DL, TII->get(LoongArch::PCALAU12I), DestReg); + auto Part0 = BuildMI(MBB, MBBI, DL, TII->get(LoongArch::ADDI_D), ScratchReg) + .addReg(LoongArch::R0); + auto Part2 = BuildMI(MBB, MBBI, DL, TII->get(LoongArch::LU32I_D), ScratchReg) + // "rj" is needed due to InstrInfo pattern requirement. + .addReg(ScratchReg); + auto Part3 = BuildMI(MBB, MBBI, DL, TII->get(LoongArch::LU52I_D), ScratchReg) + .addReg(ScratchReg); + BuildMI(MBB, MBBI, DL, TII->get(LastOpcode), DestReg) + .addReg(ScratchReg) + .addReg(DestReg); + + if (Symbol.getType() == MachineOperand::MO_ExternalSymbol) { + const char *SymName = Symbol.getSymbolName(); + Part0.addExternalSymbol(SymName, MO0); + Part1.addExternalSymbol(SymName, MO1); + Part2.addExternalSymbol(SymName, MO2); + Part3.addExternalSymbol(SymName, MO3); + } else { + Part0.addDisp(Symbol, 0, MO0); + Part1.addDisp(Symbol, 0, MO1); + Part2.addDisp(Symbol, 0, MO2); + Part3.addDisp(Symbol, 0, MO3); + } + + if (EraseFromParent) + MI.eraseFromParent(); + + return true; +} + +bool LoongArchExpandPseudo::expandLoadAddressPcrelLarge( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI) { + // Emit the 5-insn large address load sequence with the `%pc` family of + // relocs. + return expandLargeAddressLoad(MBB, MBBI, NextMBBI, LoongArch::ADD_D, + LoongArchII::MO_PCREL_LO); +} + +bool LoongArchExpandPseudo::expandLoadAddressGotLarge( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI) { + // Emit the 5-insn large address load sequence with the `%got_pc` family + // of relocs, loading the result from GOT with `ldx.d` in the end. + return expandLargeAddressLoad(MBB, MBBI, NextMBBI, LoongArch::LDX_D, + LoongArchII::MO_GOT_PC_HI); +} + +bool LoongArchExpandPseudo::expandLoadAddressTLSIELarge( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI) { + // Emit the 5-insn large address load sequence with the `%ie_pc` family + // of relocs, loading the result with `ldx.d` in the end. + return expandLargeAddressLoad(MBB, MBBI, NextMBBI, LoongArch::LDX_D, + LoongArchII::MO_IE_PC_LO); +} + +bool LoongArchExpandPseudo::expandLoadAddressTLSLDLarge( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI) { + // Emit the 5-insn large address load sequence with the `%got_pc` family + // of relocs, with the `pcalau12i` insn relocated with `%ld_pc_hi20`. + return expandLargeAddressLoad(MBB, MBBI, NextMBBI, LoongArch::ADD_D, + LoongArchII::MO_LD_PC_HI); +} + +bool LoongArchExpandPseudo::expandLoadAddressTLSGDLarge( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI) { + // Emit the 5-insn large address load sequence with the `%got_pc` family + // of relocs, with the `pcalau12i` insn relocated with `%gd_pc_hi20`. + return expandLargeAddressLoad(MBB, MBBI, NextMBBI, LoongArch::ADD_D, + LoongArchII::MO_GD_PC_HI); +} + +bool LoongArchExpandPseudo::expandFunctionCALL( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI, bool IsTailCall) { + MachineFunction *MF = MBB.getParent(); + MachineInstr &MI = *MBBI; + DebugLoc DL = MI.getDebugLoc(); + const MachineOperand &Func = MI.getOperand(0); + MachineInstrBuilder CALL; + unsigned Opcode; + + switch (MF->getTarget().getCodeModel()) { + default: + report_fatal_error("Unsupported code model"); + break; + case CodeModel::Small: { + // CALL: + // bl func + // TAIL: + // b func + Opcode = IsTailCall ? LoongArch::PseudoB_TAIL : LoongArch::BL; + CALL = BuildMI(MBB, MBBI, DL, TII->get(Opcode)).add(Func); + break; + } + case CodeModel::Medium: { + // CALL: + // pcaddu18i $ra, %call36(func) + // jirl $ra, $ra, 0 + // TAIL: + // pcaddu18i $t8, %call36(func) + // jr $t8 + Opcode = + IsTailCall ? LoongArch::PseudoJIRL_TAIL : LoongArch::PseudoJIRL_CALL; + Register ScratchReg = IsTailCall ? LoongArch::R20 : LoongArch::R1; + MachineInstrBuilder MIB = + BuildMI(MBB, MBBI, DL, TII->get(LoongArch::PCADDU18I), ScratchReg); + + CALL = + BuildMI(MBB, MBBI, DL, TII->get(Opcode)).addReg(ScratchReg).addImm(0); + + if (Func.isSymbol()) + MIB.addExternalSymbol(Func.getSymbolName(), LoongArchII::MO_CALL36); + else + MIB.addDisp(Func, 0, LoongArchII::MO_CALL36); + break; + } + case CodeModel::Large: { + // Emit the 5-insn large address load sequence, either directly or + // indirectly in case of going through the GOT, then JIRL_TAIL or + // JIRL_CALL to $addr. + Opcode = + IsTailCall ? LoongArch::PseudoJIRL_TAIL : LoongArch::PseudoJIRL_CALL; + Register AddrReg = IsTailCall ? LoongArch::R19 : LoongArch::R1; + + bool UseGOT = Func.isGlobal() && !Func.getGlobal()->isDSOLocal(); + unsigned MO = UseGOT ? LoongArchII::MO_GOT_PC_HI : LoongArchII::MO_PCREL_LO; + unsigned LAOpcode = UseGOT ? LoongArch::LDX_D : LoongArch::ADD_D; + expandLargeAddressLoad(MBB, MBBI, NextMBBI, LAOpcode, MO, Func, AddrReg, + false); + CALL = BuildMI(MBB, MBBI, DL, TII->get(Opcode)).addReg(AddrReg).addImm(0); + break; + } + } + + // Transfer implicit operands. + CALL.copyImplicitOps(MI); + + // Transfer MI flags. + CALL.setMIFlags(MI.getFlags()); + + MI.eraseFromParent(); + return true; +} + } // end namespace INITIALIZE_PASS(LoongArchPreRAExpandPseudo, "loongarch-prera-expand-pseudo", diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp index e14bbadf9ed2..70f782b81270 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp @@ -525,8 +525,7 @@ LoongArchTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op, if (isa<ConstantSDNode>(Idx) && (EltTy == MVT::i32 || EltTy == MVT::i64 || EltTy == MVT::f32 || - EltTy == MVT::f64 || - cast<ConstantSDNode>(Idx)->getZExtValue() < NumElts / 2)) + EltTy == MVT::f64 || Idx->getAsZExtVal() < NumElts / 2)) return Op; return SDValue(); @@ -762,12 +761,13 @@ static SDValue getTargetNode(JumpTableSDNode *N, SDLoc DL, EVT Ty, template <class NodeTy> SDValue LoongArchTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG, + CodeModel::Model M, bool IsLocal) const { SDLoc DL(N); EVT Ty = getPointerTy(DAG.getDataLayout()); SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0); - switch (DAG.getTarget().getCodeModel()) { + switch (M) { default: report_fatal_error("Unsupported code model"); @@ -808,24 +808,35 @@ SDValue LoongArchTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG, SDValue LoongArchTargetLowering::lowerBlockAddress(SDValue Op, SelectionDAG &DAG) const { - return getAddr(cast<BlockAddressSDNode>(Op), DAG); + return getAddr(cast<BlockAddressSDNode>(Op), DAG, + DAG.getTarget().getCodeModel()); } SDValue LoongArchTargetLowering::lowerJumpTable(SDValue Op, SelectionDAG &DAG) const { - return getAddr(cast<JumpTableSDNode>(Op), DAG); + return getAddr(cast<JumpTableSDNode>(Op), DAG, + DAG.getTarget().getCodeModel()); } SDValue LoongArchTargetLowering::lowerConstantPool(SDValue Op, SelectionDAG &DAG) const { - return getAddr(cast<ConstantPoolSDNode>(Op), DAG); + return getAddr(cast<ConstantPoolSDNode>(Op), DAG, + DAG.getTarget().getCodeModel()); } SDValue LoongArchTargetLowering::lowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const { GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op); assert(N->getOffset() == 0 && "unexpected offset in global node"); - return getAddr(N, DAG, N->getGlobal()->isDSOLocal()); + auto CM = DAG.getTarget().getCodeModel(); + const GlobalValue *GV = N->getGlobal(); + + if (GV->isDSOLocal() && isa<GlobalVariable>(GV)) { + if (auto GCM = dyn_cast<GlobalVariable>(GV)->getCodeModel()) + CM = *GCM; + } + + return getAddr(N, DAG, CM, GV->isDSOLocal()); } SDValue LoongArchTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N, @@ -1383,28 +1394,28 @@ SDValue LoongArchTargetLowering::lowerINTRINSIC_VOID(SDValue Op, if (IntrinsicEnum == Intrinsic::loongarch_cacop_w && Subtarget.is64Bit()) return emitIntrinsicErrorMessage(Op, ErrorMsgReqLA32, DAG); // call void @llvm.loongarch.cacop.[d/w](uimm5, rj, simm12) - unsigned Imm1 = cast<ConstantSDNode>(Op2)->getZExtValue(); + unsigned Imm1 = Op2->getAsZExtVal(); int Imm2 = cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue(); if (!isUInt<5>(Imm1) || !isInt<12>(Imm2)) return emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG); return Op; } case Intrinsic::loongarch_dbar: { - unsigned Imm = cast<ConstantSDNode>(Op2)->getZExtValue(); + unsigned Imm = Op2->getAsZExtVal(); return !isUInt<15>(Imm) ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG) : DAG.getNode(LoongArchISD::DBAR, DL, MVT::Other, Chain, DAG.getConstant(Imm, DL, GRLenVT)); } case Intrinsic::loongarch_ibar: { - unsigned Imm = cast<ConstantSDNode>(Op2)->getZExtValue(); + unsigned Imm = Op2->getAsZExtVal(); return !isUInt<15>(Imm) ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG) : DAG.getNode(LoongArchISD::IBAR, DL, MVT::Other, Chain, DAG.getConstant(Imm, DL, GRLenVT)); } case Intrinsic::loongarch_break: { - unsigned Imm = cast<ConstantSDNode>(Op2)->getZExtValue(); + unsigned Imm = Op2->getAsZExtVal(); return !isUInt<15>(Imm) ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG) : DAG.getNode(LoongArchISD::BREAK, DL, MVT::Other, Chain, @@ -1413,7 +1424,7 @@ SDValue LoongArchTargetLowering::lowerINTRINSIC_VOID(SDValue Op, case Intrinsic::loongarch_movgr2fcsr: { if (!Subtarget.hasBasicF()) return emitIntrinsicErrorMessage(Op, ErrorMsgReqF, DAG); - unsigned Imm = cast<ConstantSDNode>(Op2)->getZExtValue(); + unsigned Imm = Op2->getAsZExtVal(); return !isUInt<2>(Imm) ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG) : DAG.getNode(LoongArchISD::MOVGR2FCSR, DL, MVT::Other, Chain, @@ -1422,7 +1433,7 @@ SDValue LoongArchTargetLowering::lowerINTRINSIC_VOID(SDValue Op, Op.getOperand(3))); } case Intrinsic::loongarch_syscall: { - unsigned Imm = cast<ConstantSDNode>(Op2)->getZExtValue(); + unsigned Imm = Op2->getAsZExtVal(); return !isUInt<15>(Imm) ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG) : DAG.getNode(LoongArchISD::SYSCALL, DL, MVT::Other, Chain, @@ -1925,7 +1936,7 @@ void LoongArchTargetLowering::ReplaceNodeResults( emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqF); return; } - unsigned Imm = cast<ConstantSDNode>(Op2)->getZExtValue(); + unsigned Imm = Op2->getAsZExtVal(); if (!isUInt<2>(Imm)) { emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR); return; @@ -1981,7 +1992,7 @@ void LoongArchTargetLowering::ReplaceNodeResults( CSR_CASE(iocsrrd_d); #undef CSR_CASE case Intrinsic::loongarch_csrrd_w: { - unsigned Imm = cast<ConstantSDNode>(Op2)->getZExtValue(); + unsigned Imm = Op2->getAsZExtVal(); if (!isUInt<14>(Imm)) { emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR); return; @@ -3381,8 +3392,12 @@ const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const { // TODO: Add more target-dependent nodes later. NODE_NAME_CASE(CALL) + NODE_NAME_CASE(CALL_MEDIUM) + NODE_NAME_CASE(CALL_LARGE) NODE_NAME_CASE(RET) NODE_NAME_CASE(TAIL) + NODE_NAME_CASE(TAIL_MEDIUM) + NODE_NAME_CASE(TAIL_LARGE) NODE_NAME_CASE(SLL_W) NODE_NAME_CASE(SRA_W) NODE_NAME_CASE(SRL_W) @@ -4240,15 +4255,31 @@ LoongArchTargetLowering::LowerCall(CallLoweringInfo &CLI, // Emit the call. SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); + unsigned Op; + switch (DAG.getTarget().getCodeModel()) { + default: + report_fatal_error("Unsupported code model"); + case CodeModel::Small: + Op = IsTailCall ? LoongArchISD::TAIL : LoongArchISD::CALL; + break; + case CodeModel::Medium: + assert(Subtarget.is64Bit() && "Medium code model requires LA64"); + Op = IsTailCall ? LoongArchISD::TAIL_MEDIUM : LoongArchISD::CALL_MEDIUM; + break; + case CodeModel::Large: + assert(Subtarget.is64Bit() && "Large code model requires LA64"); + Op = IsTailCall ? LoongArchISD::TAIL_LARGE : LoongArchISD::CALL_LARGE; + break; + } if (IsTailCall) { MF.getFrameInfo().setHasTailCall(); - SDValue Ret = DAG.getNode(LoongArchISD::TAIL, DL, NodeTys, Ops); + SDValue Ret = DAG.getNode(Op, DL, NodeTys, Ops); DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge); return Ret; } - Chain = DAG.getNode(LoongArchISD::CALL, DL, NodeTys, Ops); + Chain = DAG.getNode(Op, DL, NodeTys, Ops); DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge); Glue = Chain.getValue(1); diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h index 6f8878f9ccd5..72182623b2c3 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h @@ -27,8 +27,12 @@ enum NodeType : unsigned { // TODO: add more LoongArchISDs CALL, + CALL_MEDIUM, + CALL_LARGE, RET, TAIL, + TAIL_MEDIUM, + TAIL_LARGE, // 32-bit shifts, directly matching the semantics of the named LoongArch // instructions. @@ -250,7 +254,8 @@ private: LoongArchCCAssignFn Fn) const; template <class NodeTy> - SDValue getAddr(NodeTy *N, SelectionDAG &DAG, bool IsLocal = true) const; + SDValue getAddr(NodeTy *N, SelectionDAG &DAG, CodeModel::Model M, + bool IsLocal = true) const; SDValue getStaticTLSAddr(GlobalAddressSDNode *N, SelectionDAG &DAG, unsigned Opc, bool Large = false) const; SDValue getDynamicTLSAddr(GlobalAddressSDNode *N, SelectionDAG &DAG, diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td index 2fea0f33e9eb..78074c012876 100644 --- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td @@ -69,6 +69,18 @@ def loongarch_ret : SDNode<"LoongArchISD::RET", SDTNone, def loongarch_tail : SDNode<"LoongArchISD::TAIL", SDT_LoongArchCall, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>; +def loongarch_call_medium : SDNode<"LoongArchISD::CALL_MEDIUM", SDT_LoongArchCall, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, + SDNPVariadic]>; +def loongarch_tail_medium : SDNode<"LoongArchISD::TAIL_MEDIUM", SDT_LoongArchCall, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, + SDNPVariadic]>; +def loongarch_call_large : SDNode<"LoongArchISD::CALL_LARGE", SDT_LoongArchCall, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, + SDNPVariadic]>; +def loongarch_tail_large : SDNode<"LoongArchISD::TAIL_LARGE", SDT_LoongArchCall, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, + SDNPVariadic]>; def loongarch_sll_w : SDNode<"LoongArchISD::SLL_W", SDT_LoongArchIntBinOpW>; def loongarch_sra_w : SDNode<"LoongArchISD::SRA_W", SDT_LoongArchIntBinOpW>; def loongarch_srl_w : SDNode<"LoongArchISD::SRL_W", SDT_LoongArchIntBinOpW>; @@ -377,6 +389,10 @@ def simm20_lu32id : SImm20Operand { let ParserMatchClass = SImmAsmOperand<20, "lu32id">; } +def simm20_pcaddu18i : SImm20Operand { + let ParserMatchClass = SImmAsmOperand<20, "pcaddu18i">; +} + def simm21_lsl2 : Operand<OtherVT> { let ParserMatchClass = SImmAsmOperand<21, "lsl2">; let EncoderMethod = "getImmOpValueAsr<2>"; @@ -832,7 +848,7 @@ def LU32I_D : Fmt1RI20<0x16000000, (outs GPR:$dst), "$rd, $imm20">; } def LU52I_D : ALU_2RI12<0x03000000, simm12_lu52id>; -def PCADDU18I : ALU_1RI20<0x1e000000, simm20>; +def PCADDU18I : ALU_1RI20<0x1e000000, simm20_pcaddu18i>; def MUL_D : ALU_3R<0x001d8000>; def MULH_D : ALU_3R<0x001e0000>; def MULH_DU : ALU_3R<0x001e8000>; @@ -1395,16 +1411,43 @@ def : Pat<(brind GPR:$rj), (PseudoBRIND GPR:$rj, 0)>; def : Pat<(brind (add GPR:$rj, simm16_lsl2:$imm16)), (PseudoBRIND GPR:$rj, simm16_lsl2:$imm16)>; +// Function call with 'Small' code model. let isCall = 1, Defs = [R1] in -def PseudoCALL : Pseudo<(outs), (ins simm26_symbol:$func)>; +def PseudoCALL : Pseudo<(outs), (ins bare_symbol:$func)>; def : Pat<(loongarch_call tglobaladdr:$func), (PseudoCALL tglobaladdr:$func)>; def : Pat<(loongarch_call texternalsym:$func), (PseudoCALL texternalsym:$func)>; +// Function call with 'Medium' code model. +let isCall = 1, Defs = [R1, R20], Size = 8 in +def PseudoCALL_MEDIUM : Pseudo<(outs), (ins bare_symbol:$func)>; + +let Predicates = [IsLA64] in { +def : Pat<(loongarch_call_medium tglobaladdr:$func), + (PseudoCALL_MEDIUM tglobaladdr:$func)>; +def : Pat<(loongarch_call_medium texternalsym:$func), + (PseudoCALL_MEDIUM texternalsym:$func)>; +} // Predicates = [IsLA64] + +// Function call with 'Large' code model. +let isCall = 1, Defs = [R1, R20], Size = 24 in +def PseudoCALL_LARGE: Pseudo<(outs), (ins bare_symbol:$func)>; + +let Predicates = [IsLA64] in { +def : Pat<(loongarch_call_large tglobaladdr:$func), + (PseudoCALL_LARGE tglobaladdr:$func)>; +def : Pat<(loongarch_call_large texternalsym:$func), + (PseudoCALL_LARGE texternalsym:$func)>; +} // Predicates = [IsLA64] + let isCall = 1, Defs = [R1] in def PseudoCALLIndirect : Pseudo<(outs), (ins GPR:$rj), [(loongarch_call GPR:$rj)]>, PseudoInstExpansion<(JIRL R1, GPR:$rj, 0)>; +let Predicates = [IsLA64] in { +def : Pat<(loongarch_call_medium GPR:$rj), (PseudoCALLIndirect GPR:$rj)>; +def : Pat<(loongarch_call_large GPR:$rj), (PseudoCALLIndirect GPR:$rj)>; +} let isCall = 1, hasSideEffects = 0, mayStore = 0, mayLoad = 0, Defs = [R1] in def PseudoJIRL_CALL : Pseudo<(outs), (ins GPR:$rj, simm16_lsl2:$imm16)>, @@ -1415,18 +1458,47 @@ let isBarrier = 1, isReturn = 1, isTerminator = 1 in def PseudoRET : Pseudo<(outs), (ins), [(loongarch_ret)]>, PseudoInstExpansion<(JIRL R0, R1, 0)>; +// Tail call with 'Small' code model. let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [R3] in -def PseudoTAIL : Pseudo<(outs), (ins simm26_symbol:$dst)>; +def PseudoTAIL : Pseudo<(outs), (ins bare_symbol:$dst)>; def : Pat<(loongarch_tail (iPTR tglobaladdr:$dst)), (PseudoTAIL tglobaladdr:$dst)>; def : Pat<(loongarch_tail (iPTR texternalsym:$dst)), (PseudoTAIL texternalsym:$dst)>; +// Tail call with 'Medium' code model. +let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, + Uses = [R3], Defs = [R20], Size = 8 in +def PseudoTAIL_MEDIUM : Pseudo<(outs), (ins bare_symbol:$dst)>; + +let Predicates = [IsLA64] in { +def : Pat<(loongarch_tail_medium (iPTR tglobaladdr:$dst)), + (PseudoTAIL_MEDIUM tglobaladdr:$dst)>; +def : Pat<(loongarch_tail_medium (iPTR texternalsym:$dst)), + (PseudoTAIL_MEDIUM texternalsym:$dst)>; +} // Predicates = [IsLA64] + +// Tail call with 'Large' code model. +let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, + Uses = [R3], Defs = [R19, R20], Size = 24 in +def PseudoTAIL_LARGE : Pseudo<(outs), (ins bare_symbol:$dst)>; + +let Predicates = [IsLA64] in { +def : Pat<(loongarch_tail_large (iPTR tglobaladdr:$dst)), + (PseudoTAIL_LARGE tglobaladdr:$dst)>; +def : Pat<(loongarch_tail_large (iPTR texternalsym:$dst)), + (PseudoTAIL_LARGE texternalsym:$dst)>; +} // Predicates = [IsLA64] + let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [R3] in def PseudoTAILIndirect : Pseudo<(outs), (ins GPRT:$rj), [(loongarch_tail GPRT:$rj)]>, PseudoInstExpansion<(JIRL R0, GPR:$rj, 0)>; +let Predicates = [IsLA64] in { +def : Pat<(loongarch_tail_medium GPR:$rj), (PseudoTAILIndirect GPR:$rj)>; +def : Pat<(loongarch_tail_large GPR:$rj), (PseudoTAILIndirect GPR:$rj)>; +} let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, hasSideEffects = 0, mayStore = 0, mayLoad = 0, Uses = [R3] in @@ -1439,6 +1511,19 @@ def PseudoJIRL_TAIL : Pseudo<(outs), (ins GPR:$rj, simm16_lsl2:$imm16)>, PseudoInstExpansion<(JIRL R0, GPR:$rj, simm16_lsl2:$imm16)>; +/// call36/taill36 macro instructions +let isCall = 1, isBarrier = 1, isCodeGenOnly = 0, isAsmParserOnly = 1, + Defs = [R1], Size = 8, hasSideEffects = 0, mayStore = 0, mayLoad = 0 in +def PseudoCALL36 : Pseudo<(outs), (ins bare_symbol:$dst), [], + "call36", "$dst">, + Requires<[IsLA64]>; +let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [R3], + isCodeGenOnly = 0, isAsmParserOnly = 1, Size = 8, hasSideEffects = 0, + mayStore = 0, mayLoad = 0 in +def PseudoTAIL36 : Pseudo<(outs), (ins GPR:$tmp, bare_symbol:$dst), [], + "tail36", "$tmp, $dst">, + Requires<[IsLA64]>; + /// Load address (la*) macro instructions. // Define isCodeGenOnly = 0 to expose them to tablegened assembly parser. @@ -1451,6 +1536,7 @@ def PseudoLA_ABS_LARGE : Pseudo<(outs GPR:$dst), "la.abs", "$dst, $src">; def PseudoLA_PCREL : Pseudo<(outs GPR:$dst), (ins bare_symbol:$src), [], "la.pcrel", "$dst, $src">; +let Defs = [R20], Size = 20 in def PseudoLA_PCREL_LARGE : Pseudo<(outs GPR:$dst), (ins GPR:$tmp, bare_symbol:$src), [], "la.pcrel", "$dst, $tmp, $src">, @@ -1462,28 +1548,30 @@ let hasSideEffects = 0, mayLoad = 1, mayStore = 0, isCodeGenOnly = 0, isAsmParserOnly = 1 in { def PseudoLA_GOT : Pseudo<(outs GPR:$dst), (ins bare_symbol:$src), [], "la.got", "$dst, $src">; +def PseudoLA_TLS_IE : Pseudo<(outs GPR:$dst), (ins bare_symbol:$src), [], + "la.tls.ie", "$dst, $src">; +def PseudoLA_TLS_LD : Pseudo<(outs GPR:$dst), (ins bare_symbol:$src), [], + "la.tls.ld", "$dst, $src">; +def PseudoLA_TLS_GD : Pseudo<(outs GPR:$dst), (ins bare_symbol:$src), [], + "la.tls.gd", "$dst, $src">; +let Defs = [R20], Size = 20 in { def PseudoLA_GOT_LARGE : Pseudo<(outs GPR:$dst), (ins GPR:$tmp, bare_symbol:$src), [], "la.got", "$dst, $tmp, $src">, Requires<[IsLA64]>; -def PseudoLA_TLS_IE : Pseudo<(outs GPR:$dst), (ins bare_symbol:$src), [], - "la.tls.ie", "$dst, $src">; def PseudoLA_TLS_IE_LARGE : Pseudo<(outs GPR:$dst), (ins GPR:$tmp, bare_symbol:$src), [], "la.tls.ie", "$dst, $tmp, $src">, Requires<[IsLA64]>; -def PseudoLA_TLS_LD : Pseudo<(outs GPR:$dst), (ins bare_symbol:$src), [], - "la.tls.ld", "$dst, $src">; def PseudoLA_TLS_LD_LARGE : Pseudo<(outs GPR:$dst), (ins GPR:$tmp, bare_symbol:$src), [], "la.tls.ld", "$dst, $tmp, $src">, Requires<[IsLA64]>; -def PseudoLA_TLS_GD : Pseudo<(outs GPR:$dst), (ins bare_symbol:$src), [], - "la.tls.gd", "$dst, $src">; def PseudoLA_TLS_GD_LARGE : Pseudo<(outs GPR:$dst), (ins GPR:$tmp, bare_symbol:$src), [], "la.tls.gd", "$dst, $tmp, $src">, Requires<[IsLA64]>; +} // Defs = [R20], Size = 20 } // Load address inst alias: "la", "la.global" and "la.local". diff --git a/llvm/lib/Target/LoongArch/LoongArchMCInstLower.cpp b/llvm/lib/Target/LoongArch/LoongArchMCInstLower.cpp index 5daa9481c907..98ad49f25e3f 100644 --- a/llvm/lib/Target/LoongArch/LoongArchMCInstLower.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchMCInstLower.cpp @@ -95,6 +95,9 @@ static MCOperand lowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym, case LoongArchII::MO_GD_PC_HI: Kind = LoongArchMCExpr::VK_LoongArch_TLS_GD_PC_HI20; break; + case LoongArchII::MO_CALL36: + Kind = LoongArchMCExpr::VK_LoongArch_CALL36; + break; // TODO: Handle more target-flags. } diff --git a/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.cpp b/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.cpp index 257b947a3ce4..092b5f1fb442 100644 --- a/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.cpp @@ -15,6 +15,7 @@ #include "LoongArch.h" #include "LoongArchInstrInfo.h" #include "LoongArchSubtarget.h" +#include "MCTargetDesc/LoongArchBaseInfo.h" #include "MCTargetDesc/LoongArchMCTargetDesc.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" @@ -194,3 +195,25 @@ bool LoongArchRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset.getFixed()); return false; } + +bool LoongArchRegisterInfo::canRealignStack(const MachineFunction &MF) const { + if (!TargetRegisterInfo::canRealignStack(MF)) + return false; + + const MachineRegisterInfo *MRI = &MF.getRegInfo(); + const LoongArchFrameLowering *TFI = getFrameLowering(MF); + + // Stack realignment requires a frame pointer. If we already started + // register allocation with frame pointer elimination, it is too late now. + if (!MRI->canReserveReg(LoongArch::R22)) + return false; + + // We may also need a base pointer if there are dynamic allocas or stack + // pointer adjustments around calls. + if (TFI->hasReservedCallFrame(MF)) + return true; + + // A base pointer is required and allowed. Check that it isn't too late to + // reserve it. + return MRI->canReserveReg(LoongArchABI::getBPReg()); +} diff --git a/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.h b/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.h index 7e8f26b14097..d1e40254c297 100644 --- a/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.h +++ b/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.h @@ -51,6 +51,7 @@ struct LoongArchRegisterInfo : public LoongArchGenRegisterInfo { bool requiresFrameIndexScavenging(const MachineFunction &MF) const override { return true; } + bool canRealignStack(const MachineFunction &MF) const override; }; } // end namespace llvm diff --git a/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp b/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp index a5a4d78aceee..62ae1dea00d6 100644 --- a/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp @@ -63,11 +63,11 @@ getEffectiveLoongArchCodeModel(const Triple &TT, switch (*CM) { case CodeModel::Small: - case CodeModel::Medium: return *CM; + case CodeModel::Medium: case CodeModel::Large: if (!TT.isArch64Bit()) - report_fatal_error("Large code model requires LA64"); + report_fatal_error("Medium/Large code model requires LA64"); return *CM; default: report_fatal_error( diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp index 6d8ef1bf96cb..518f6b10edab 100644 --- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp +++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp @@ -91,6 +91,7 @@ static uint64_t adjustFixupValue(const MCFixup &Fixup, uint64_t Value, case FK_Data_2: case FK_Data_4: case FK_Data_8: + case FK_Data_leb128: return Value; case LoongArch::fixup_loongarch_b16: { if (!isInt<18>(Value)) @@ -128,6 +129,15 @@ static uint64_t adjustFixupValue(const MCFixup &Fixup, uint64_t Value, } } +static void fixupLeb128(MCContext &Ctx, const MCFixup &Fixup, + MutableArrayRef<char> Data, uint64_t Value) { + unsigned I; + for (I = 0; I != Data.size() && Value; ++I, Value >>= 7) + Data[I] |= uint8_t(Value & 0x7f); + if (Value) + Ctx.reportError(Fixup.getLoc(), "Invalid uleb128 value!"); +} + void LoongArchAsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, const MCValue &Target, @@ -143,6 +153,10 @@ void LoongArchAsmBackend::applyFixup(const MCAssembler &Asm, MCFixupKindInfo Info = getFixupKindInfo(Kind); MCContext &Ctx = Asm.getContext(); + // Fixup leb128 separately. + if (Fixup.getTargetKind() == FK_Data_leb128) + return fixupLeb128(Ctx, Fixup, Data, Value); + // Apply any target-specific value adjustments. Value = adjustFixupValue(Fixup, Value, Ctx); @@ -173,6 +187,7 @@ bool LoongArchAsmBackend::shouldForceRelocation(const MCAssembler &Asm, case FK_Data_2: case FK_Data_4: case FK_Data_8: + case FK_Data_leb128: return !Target.isAbsolute(); } } @@ -202,9 +217,24 @@ getRelocPairForSize(unsigned Size) { return std::make_pair( MCFixupKind(FirstLiteralRelocationKind + ELF::R_LARCH_ADD64), MCFixupKind(FirstLiteralRelocationKind + ELF::R_LARCH_SUB64)); + case 128: + return std::make_pair( + MCFixupKind(FirstLiteralRelocationKind + ELF::R_LARCH_ADD_ULEB128), + MCFixupKind(FirstLiteralRelocationKind + ELF::R_LARCH_SUB_ULEB128)); } } +std::pair<bool, bool> LoongArchAsmBackend::relaxLEB128(MCLEBFragment &LF, + MCAsmLayout &Layout, + int64_t &Value) const { + const MCExpr &Expr = LF.getValue(); + if (LF.isSigned() || !Expr.evaluateKnownAbsolute(Value, Layout)) + return std::make_pair(false, false); + LF.getFixups().push_back( + MCFixup::create(0, &Expr, FK_Data_leb128, Expr.getLoc())); + return std::make_pair(true, true); +} + bool LoongArchAsmBackend::writeNopData(raw_ostream &OS, uint64_t Count, const MCSubtargetInfo *STI) const { // We mostly follow binutils' convention here: align to 4-byte boundary with a @@ -226,21 +256,27 @@ bool LoongArchAsmBackend::handleAddSubRelocations(const MCAsmLayout &Layout, uint64_t &FixedValue) const { std::pair<MCFixupKind, MCFixupKind> FK; uint64_t FixedValueA, FixedValueB; - const MCSection &SecA = Target.getSymA()->getSymbol().getSection(); - const MCSection &SecB = Target.getSymB()->getSymbol().getSection(); - - // We need record relocation if SecA != SecB. Usually SecB is same as the - // section of Fixup, which will be record the relocation as PCRel. If SecB - // is not same as the section of Fixup, it will report error. Just return - // false and then this work can be finished by handleFixup. - if (&SecA != &SecB) - return false; - - // In SecA == SecB case. If the linker relaxation is enabled, we need record - // the ADD, SUB relocations. Otherwise the FixedValue has already been - // calculated out in evaluateFixup, return true and avoid record relocations. - if (!STI.hasFeature(LoongArch::FeatureRelax)) - return true; + const MCSymbol &SA = Target.getSymA()->getSymbol(); + const MCSymbol &SB = Target.getSymB()->getSymbol(); + + bool force = !SA.isInSection() || !SB.isInSection(); + if (!force) { + const MCSection &SecA = SA.getSection(); + const MCSection &SecB = SB.getSection(); + + // We need record relocation if SecA != SecB. Usually SecB is same as the + // section of Fixup, which will be record the relocation as PCRel. If SecB + // is not same as the section of Fixup, it will report error. Just return + // false and then this work can be finished by handleFixup. + if (&SecA != &SecB) + return false; + + // In SecA == SecB case. If the linker relaxation is enabled, we need record + // the ADD, SUB relocations. Otherwise the FixedValue has already been calc- + // ulated out in evaluateFixup, return true and avoid record relocations. + if (!STI.hasFeature(LoongArch::FeatureRelax)) + return true; + } switch (Fixup.getKind()) { case llvm::FK_Data_1: @@ -255,6 +291,9 @@ bool LoongArchAsmBackend::handleAddSubRelocations(const MCAsmLayout &Layout, case llvm::FK_Data_8: FK = getRelocPairForSize(64); break; + case llvm::FK_Data_leb128: + FK = getRelocPairForSize(128); + break; default: llvm_unreachable("unsupported fixup size"); } diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.h b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.h index fef0e84600a7..71977217f59b 100644 --- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.h +++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.h @@ -66,6 +66,9 @@ public: void relaxInstruction(MCInst &Inst, const MCSubtargetInfo &STI) const override {} + std::pair<bool, bool> relaxLEB128(MCLEBFragment &LF, MCAsmLayout &Layout, + int64_t &Value) const override; + bool writeNopData(raw_ostream &OS, uint64_t Count, const MCSubtargetInfo *STI) const override; diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchBaseInfo.h b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchBaseInfo.h index cee6dad1f095..0692cb92b694 100644 --- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchBaseInfo.h +++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchBaseInfo.h @@ -47,6 +47,7 @@ enum { MO_IE_PC64_HI, MO_LD_PC_HI, MO_GD_PC_HI, + MO_CALL36 // TODO: Add more flags. }; } // end namespace LoongArchII diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFObjectWriter.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFObjectWriter.cpp index fe19a4f2d3c8..1dec816f3473 100644 --- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFObjectWriter.cpp +++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFObjectWriter.cpp @@ -90,6 +90,8 @@ unsigned LoongArchELFObjectWriter::getRelocType(MCContext &Ctx, return ELF::R_LARCH_TLS_LE64_LO20; case LoongArch::fixup_loongarch_tls_le64_hi12: return ELF::R_LARCH_TLS_LE64_HI12; + case LoongArch::fixup_loongarch_call36: + return ELF::R_LARCH_CALL36; // TODO: Handle more fixup-kinds. } } diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchFixupKinds.h b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchFixupKinds.h index 178fa6e5262b..e827bae1f3e3 100644 --- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchFixupKinds.h +++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchFixupKinds.h @@ -108,7 +108,10 @@ enum Fixups { // 20-bit fixup corresponding to %gd_hi20(foo) for instruction lu12i.w. fixup_loongarch_tls_gd_hi20, // Generate an R_LARCH_RELAX which indicates the linker may relax here. - fixup_loongarch_relax = FirstLiteralRelocationKind + ELF::R_LARCH_RELAX + fixup_loongarch_relax = FirstLiteralRelocationKind + ELF::R_LARCH_RELAX, + // 36-bit fixup corresponding to %call36(foo) for a pair instructions: + // pcaddu18i+jirl. + fixup_loongarch_call36 = FirstLiteralRelocationKind + ELF::R_LARCH_CALL36, }; } // end namespace LoongArch } // end namespace llvm diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp index d2ea062dc09a..9ac0128f2517 100644 --- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp +++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp @@ -241,6 +241,9 @@ LoongArchMCCodeEmitter::getExprOpValue(const MCInst &MI, const MCOperand &MO, case LoongArchMCExpr::VK_LoongArch_TLS_GD_HI20: FixupKind = LoongArch::fixup_loongarch_tls_gd_hi20; break; + case LoongArchMCExpr::VK_LoongArch_CALL36: + FixupKind = LoongArch::fixup_loongarch_call36; + break; } } else if (Kind == MCExpr::SymbolRef && cast<MCSymbolRefExpr>(Expr)->getKind() == diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCExpr.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCExpr.cpp index 82c992b1cc8c..8ca8876a19b9 100644 --- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCExpr.cpp +++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCExpr.cpp @@ -138,6 +138,8 @@ StringRef LoongArchMCExpr::getVariantKindName(VariantKind Kind) { return "gd_pc_hi20"; case VK_LoongArch_TLS_GD_HI20: return "gd_hi20"; + case VK_LoongArch_CALL36: + return "call36"; } } @@ -180,6 +182,7 @@ LoongArchMCExpr::getVariantKindForName(StringRef name) { .Case("ld_hi20", VK_LoongArch_TLS_LD_HI20) .Case("gd_pc_hi20", VK_LoongArch_TLS_GD_PC_HI20) .Case("gd_hi20", VK_LoongArch_TLS_GD_HI20) + .Case("call36", VK_LoongArch_CALL36) .Default(VK_LoongArch_Invalid); } diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCExpr.h b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCExpr.h index 93251f824103..bd828116d7fa 100644 --- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCExpr.h +++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCExpr.h @@ -61,6 +61,7 @@ public: VK_LoongArch_TLS_LD_HI20, VK_LoongArch_TLS_GD_PC_HI20, VK_LoongArch_TLS_GD_HI20, + VK_LoongArch_CALL36, VK_LoongArch_Invalid // Must be the last item. }; diff --git a/llvm/lib/Target/M68k/GISel/M68kLegalizerInfo.h b/llvm/lib/Target/M68k/GISel/M68kLegalizerInfo.h index a10401ed1a9a..cbe30ec494c9 100644 --- a/llvm/lib/Target/M68k/GISel/M68kLegalizerInfo.h +++ b/llvm/lib/Target/M68k/GISel/M68kLegalizerInfo.h @@ -20,7 +20,6 @@ namespace llvm { class M68kSubtarget; -/// This struct provides the information for the target register banks. struct M68kLegalizerInfo : public LegalizerInfo { public: M68kLegalizerInfo(const M68kSubtarget &ST); diff --git a/llvm/lib/Target/M68k/M68kISelLowering.cpp b/llvm/lib/Target/M68k/M68kISelLowering.cpp index c4d7a0dec7f3..158393f02a24 100644 --- a/llvm/lib/Target/M68k/M68kISelLowering.cpp +++ b/llvm/lib/Target/M68k/M68kISelLowering.cpp @@ -2375,7 +2375,7 @@ SDValue M68kTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { // a >= b ? -1 : 0 -> RES = setcc_carry // a >= b ? 0 : -1 -> RES = ~setcc_carry if (Cond.getOpcode() == M68kISD::SUB) { - unsigned CondCode = cast<ConstantSDNode>(CC)->getZExtValue(); + unsigned CondCode = CC->getAsZExtVal(); if ((CondCode == M68k::COND_CC || CondCode == M68k::COND_CS) && (isAllOnesConstant(Op1) || isAllOnesConstant(Op2)) && @@ -2491,7 +2491,7 @@ SDValue M68kTargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const { Cond = Cmp; AddTest = false; } else { - switch (cast<ConstantSDNode>(CC)->getZExtValue()) { + switch (CC->getAsZExtVal()) { default: break; case M68k::COND_VS: diff --git a/llvm/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp b/llvm/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp index 660861a5d521..efb23b1a4e3f 100644 --- a/llvm/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp +++ b/llvm/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp @@ -308,12 +308,12 @@ static bool isValidIndexedLoad(const LoadSDNode *LD) { switch (VT.getSimpleVT().SimpleTy) { case MVT::i8: - if (cast<ConstantSDNode>(LD->getOffset())->getZExtValue() != 1) + if (LD->getOffset()->getAsZExtVal() != 1) return false; break; case MVT::i16: - if (cast<ConstantSDNode>(LD->getOffset())->getZExtValue() != 2) + if (LD->getOffset()->getAsZExtVal() != 2) return false; break; diff --git a/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp b/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp index d3b59138a5a9..e68904863cfc 100644 --- a/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp +++ b/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp @@ -333,6 +333,7 @@ MSP430TargetLowering::MSP430TargetLowering(const TargetMachine &TM, setMinFunctionAlignment(Align(2)); setPrefFunctionAlignment(Align(2)); + setMaxAtomicSizeInBitsSupported(0); } SDValue MSP430TargetLowering::LowerOperation(SDValue Op, @@ -1168,8 +1169,8 @@ SDValue MSP430TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { bool Invert = false; bool Shift = false; bool Convert = true; - switch (cast<ConstantSDNode>(TargetCC)->getZExtValue()) { - default: + switch (TargetCC->getAsZExtVal()) { + default: Convert = false; break; case MSP430CC::COND_HS: @@ -1193,7 +1194,7 @@ SDValue MSP430TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { // C = ~Z for AND instruction, thus we can put Res = ~(SR & 1), however, // Res = (SR >> 1) & 1 is 1 word shorter. break; - } + } EVT VT = Op.getValueType(); SDValue One = DAG.getConstant(1, dl, VT); if (Convert) { diff --git a/llvm/lib/Target/MSP430/MSP430TargetMachine.cpp b/llvm/lib/Target/MSP430/MSP430TargetMachine.cpp index 39e0658eb70d..283de46e57d5 100644 --- a/llvm/lib/Target/MSP430/MSP430TargetMachine.cpp +++ b/llvm/lib/Target/MSP430/MSP430TargetMachine.cpp @@ -65,6 +65,7 @@ public: return getTM<MSP430TargetMachine>(); } + void addIRPasses() override; bool addInstSelector() override; void addPreEmitPass() override; }; @@ -81,6 +82,12 @@ MachineFunctionInfo *MSP430TargetMachine::createMachineFunctionInfo( F, STI); } +void MSP430PassConfig::addIRPasses() { + addPass(createAtomicExpandPass()); + + TargetPassConfig::addIRPasses(); +} + bool MSP430PassConfig::addInstSelector() { // Install an instruction selector. addPass(createMSP430ISelDag(getMSP430TargetMachine(), getOptLevel())); diff --git a/llvm/lib/Target/Mips/MipsFastISel.cpp b/llvm/lib/Target/Mips/MipsFastISel.cpp index 7fcf375aa10b..192ed1cec79a 100644 --- a/llvm/lib/Target/Mips/MipsFastISel.cpp +++ b/llvm/lib/Target/Mips/MipsFastISel.cpp @@ -492,7 +492,7 @@ bool MipsFastISel::computeAddress(const Value *Obj, Address &Addr) { unsigned Idx = cast<ConstantInt>(Op)->getZExtValue(); TmpOffset += SL->getElementOffset(Idx); } else { - uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType()); + uint64_t S = GTI.getSequentialElementStride(DL); while (true) { if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) { // Constant-offset addressing. diff --git a/llvm/lib/Target/Mips/MipsISelLowering.cpp b/llvm/lib/Target/Mips/MipsISelLowering.cpp index 483eba4e4f47..d431d3d91494 100644 --- a/llvm/lib/Target/Mips/MipsISelLowering.cpp +++ b/llvm/lib/Target/Mips/MipsISelLowering.cpp @@ -2042,8 +2042,7 @@ SDValue MipsTargetLowering::lowerBRCOND(SDValue Op, SelectionDAG &DAG) const { return Op; SDValue CCNode = CondRes.getOperand(2); - Mips::CondCode CC = - (Mips::CondCode)cast<ConstantSDNode>(CCNode)->getZExtValue(); + Mips::CondCode CC = (Mips::CondCode)CCNode->getAsZExtVal(); unsigned Opc = invertFPCondCodeUser(CC) ? Mips::BRANCH_F : Mips::BRANCH_T; SDValue BrCode = DAG.getConstant(Opc, DL, MVT::i32); SDValue FCC0 = DAG.getRegister(Mips::FCC0, MVT::i32); diff --git a/llvm/lib/Target/Mips/MipsLegalizerInfo.cpp b/llvm/lib/Target/Mips/MipsLegalizerInfo.cpp index 14f26201e6c0..f5e94235859a 100644 --- a/llvm/lib/Target/Mips/MipsLegalizerInfo.cpp +++ b/llvm/lib/Target/Mips/MipsLegalizerInfo.cpp @@ -330,8 +330,9 @@ MipsLegalizerInfo::MipsLegalizerInfo(const MipsSubtarget &ST) { verify(*ST.getInstrInfo()); } -bool MipsLegalizerInfo::legalizeCustom(LegalizerHelper &Helper, - MachineInstr &MI) const { +bool MipsLegalizerInfo::legalizeCustom( + LegalizerHelper &Helper, MachineInstr &MI, + LostDebugLocObserver &LocObserver) const { using namespace TargetOpcode; MachineIRBuilder &MIRBuilder = Helper.MIRBuilder; diff --git a/llvm/lib/Target/Mips/MipsLegalizerInfo.h b/llvm/lib/Target/Mips/MipsLegalizerInfo.h index 05027b718a85..63daebf26470 100644 --- a/llvm/lib/Target/Mips/MipsLegalizerInfo.h +++ b/llvm/lib/Target/Mips/MipsLegalizerInfo.h @@ -25,7 +25,8 @@ class MipsLegalizerInfo : public LegalizerInfo { public: MipsLegalizerInfo(const MipsSubtarget &ST); - bool legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI) const override; + bool legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI, + LostDebugLocObserver &LocObserver) const override; bool legalizeIntrinsic(LegalizerHelper &Helper, MachineInstr &MI) const override; diff --git a/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp b/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp index 0ed87ee0809a..c0e978018919 100644 --- a/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp +++ b/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp @@ -76,7 +76,7 @@ void MipsSEDAGToDAGISel::addDSPCtrlRegOperands(bool IsDef, MachineInstr &MI, } unsigned MipsSEDAGToDAGISel::getMSACtrlReg(const SDValue RegIdx) const { - uint64_t RegNum = cast<ConstantSDNode>(RegIdx)->getZExtValue(); + uint64_t RegNum = RegIdx->getAsZExtVal(); return Mips::MSACtrlRegClass.getRegister(RegNum); } diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp index 815c46edb6fa..7abe984b34e1 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp @@ -2076,7 +2076,7 @@ bool NVPTXDAGToDAGISel::tryLoadParam(SDNode *Node) { VTs = CurDAG->getVTList(EVTs); } - unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue(); + unsigned OffsetVal = Offset->getAsZExtVal(); SmallVector<SDValue, 2> Ops; Ops.push_back(CurDAG->getTargetConstant(OffsetVal, DL, MVT::i32)); @@ -2091,7 +2091,7 @@ bool NVPTXDAGToDAGISel::tryStoreRetval(SDNode *N) { SDLoc DL(N); SDValue Chain = N->getOperand(0); SDValue Offset = N->getOperand(1); - unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue(); + unsigned OffsetVal = Offset->getAsZExtVal(); MemSDNode *Mem = cast<MemSDNode>(N); // How many elements do we have? @@ -2158,9 +2158,9 @@ bool NVPTXDAGToDAGISel::tryStoreParam(SDNode *N) { SDLoc DL(N); SDValue Chain = N->getOperand(0); SDValue Param = N->getOperand(1); - unsigned ParamVal = cast<ConstantSDNode>(Param)->getZExtValue(); + unsigned ParamVal = Param->getAsZExtVal(); SDValue Offset = N->getOperand(2); - unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue(); + unsigned OffsetVal = Offset->getAsZExtVal(); MemSDNode *Mem = cast<MemSDNode>(N); SDValue Glue = N->getOperand(N->getNumOperands() - 1); diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp index e8f36bf50a1b..c65090d915ef 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -854,6 +854,7 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM, computeRegisterProperties(STI.getRegisterInfo()); setMinCmpXchgSizeInBits(32); + setMaxAtomicSizeInBitsSupported(64); } const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const { @@ -5811,7 +5812,7 @@ static void ReplaceINTRINSIC_W_CHAIN(SDNode *N, SelectionDAG &DAG, SDLoc DL(N); // Get the intrinsic ID - unsigned IntrinNo = cast<ConstantSDNode>(Intrin.getNode())->getZExtValue(); + unsigned IntrinNo = Intrin.getNode()->getAsZExtVal(); switch (IntrinNo) { default: return; diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp index 8d895762fbe1..fad69f5e80a7 100644 --- a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp @@ -225,7 +225,8 @@ void NVPTXTargetMachine::registerDefaultAliasAnalyses(AAManager &AAM) { AAM.registerFunctionAnalysis<NVPTXAA>(); } -void NVPTXTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) { +void NVPTXTargetMachine::registerPassBuilderCallbacks( + PassBuilder &PB, bool PopulateClassToPassNames) { PB.registerPipelineParsingCallback( [](StringRef PassName, FunctionPassManager &PM, ArrayRef<PassBuilder::PipelineElement>) { diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.h b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.h index cfdd8da9b765..9e6bf929badb 100644 --- a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.h +++ b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.h @@ -69,7 +69,8 @@ public: void registerDefaultAliasAnalyses(AAManager &AAM) override; - void registerPassBuilderCallbacks(PassBuilder &PB) override; + void registerPassBuilderCallbacks(PassBuilder &PB, + bool PopulateClassToPassNames) override; TargetTransformInfo getTargetTransformInfo(const Function &F) const override; diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp index c73721da46e3..7aa63f9fc0c9 100644 --- a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp @@ -180,10 +180,6 @@ static Instruction *simplifyNvvmIntrinsic(IntrinsicInst *II, InstCombiner &IC) { return {Intrinsic::ceil, FTZ_MustBeOn}; case Intrinsic::nvvm_fabs_d: return {Intrinsic::fabs, FTZ_Any}; - case Intrinsic::nvvm_fabs_f: - return {Intrinsic::fabs, FTZ_MustBeOff}; - case Intrinsic::nvvm_fabs_ftz_f: - return {Intrinsic::fabs, FTZ_MustBeOn}; case Intrinsic::nvvm_floor_d: return {Intrinsic::floor, FTZ_Any}; case Intrinsic::nvvm_floor_f: @@ -264,12 +260,6 @@ static Instruction *simplifyNvvmIntrinsic(IntrinsicInst *II, InstCombiner &IC) { return {Intrinsic::minimum, FTZ_MustBeOff, true}; case Intrinsic::nvvm_fmin_ftz_nan_f16x2: return {Intrinsic::minimum, FTZ_MustBeOn, true}; - case Intrinsic::nvvm_round_d: - return {Intrinsic::round, FTZ_Any}; - case Intrinsic::nvvm_round_f: - return {Intrinsic::round, FTZ_MustBeOff}; - case Intrinsic::nvvm_round_ftz_f: - return {Intrinsic::round, FTZ_MustBeOn}; case Intrinsic::nvvm_sqrt_rn_d: return {Intrinsic::sqrt, FTZ_Any}; case Intrinsic::nvvm_sqrt_f: @@ -278,10 +268,6 @@ static Instruction *simplifyNvvmIntrinsic(IntrinsicInst *II, InstCombiner &IC) { // the ftz-ness of the surrounding code. sqrt_rn_f and sqrt_rn_ftz_f are // the versions with explicit ftz-ness. return {Intrinsic::sqrt, FTZ_Any}; - case Intrinsic::nvvm_sqrt_rn_f: - return {Intrinsic::sqrt, FTZ_MustBeOff}; - case Intrinsic::nvvm_sqrt_rn_ftz_f: - return {Intrinsic::sqrt, FTZ_MustBeOn}; case Intrinsic::nvvm_trunc_d: return {Intrinsic::trunc, FTZ_Any}; case Intrinsic::nvvm_trunc_f: @@ -316,24 +302,8 @@ static Instruction *simplifyNvvmIntrinsic(IntrinsicInst *II, InstCombiner &IC) { return {Instruction::UIToFP}; // NVVM intrinsics that map to LLVM binary ops. - case Intrinsic::nvvm_add_rn_d: - return {Instruction::FAdd, FTZ_Any}; - case Intrinsic::nvvm_add_rn_f: - return {Instruction::FAdd, FTZ_MustBeOff}; - case Intrinsic::nvvm_add_rn_ftz_f: - return {Instruction::FAdd, FTZ_MustBeOn}; - case Intrinsic::nvvm_mul_rn_d: - return {Instruction::FMul, FTZ_Any}; - case Intrinsic::nvvm_mul_rn_f: - return {Instruction::FMul, FTZ_MustBeOff}; - case Intrinsic::nvvm_mul_rn_ftz_f: - return {Instruction::FMul, FTZ_MustBeOn}; case Intrinsic::nvvm_div_rn_d: return {Instruction::FDiv, FTZ_Any}; - case Intrinsic::nvvm_div_rn_f: - return {Instruction::FDiv, FTZ_MustBeOff}; - case Intrinsic::nvvm_div_rn_ftz_f: - return {Instruction::FDiv, FTZ_MustBeOn}; // The remainder of cases are NVVM intrinsics that map to LLVM idioms, but // need special handling. @@ -342,10 +312,6 @@ static Instruction *simplifyNvvmIntrinsic(IntrinsicInst *II, InstCombiner &IC) { // as well. case Intrinsic::nvvm_rcp_rn_d: return {SPC_Reciprocal, FTZ_Any}; - case Intrinsic::nvvm_rcp_rn_f: - return {SPC_Reciprocal, FTZ_MustBeOff}; - case Intrinsic::nvvm_rcp_rn_ftz_f: - return {SPC_Reciprocal, FTZ_MustBeOn}; // We do not currently simplify intrinsics that give an approximate // answer. These include: diff --git a/llvm/lib/Target/PowerPC/PPCFastISel.cpp b/llvm/lib/Target/PowerPC/PPCFastISel.cpp index 42f5a4e624c4..56af80f9cede 100644 --- a/llvm/lib/Target/PowerPC/PPCFastISel.cpp +++ b/llvm/lib/Target/PowerPC/PPCFastISel.cpp @@ -350,7 +350,7 @@ bool PPCFastISel::PPCComputeAddress(const Value *Obj, Address &Addr) { unsigned Idx = cast<ConstantInt>(Op)->getZExtValue(); TmpOffset += SL->getElementOffset(Idx); } else { - uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType()); + uint64_t S = GTI.getSequentialElementStride(DL); for (;;) { if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) { // Constant-offset addressing. diff --git a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index ed96339240d9..26ed74108ec3 100644 --- a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -565,7 +565,7 @@ static bool hasTocDataAttr(SDValue Val, unsigned PointerSize) { /// operand. If so Imm will receive the 32-bit value. static bool isInt32Immediate(SDNode *N, unsigned &Imm) { if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i32) { - Imm = cast<ConstantSDNode>(N)->getZExtValue(); + Imm = N->getAsZExtVal(); return true; } return false; @@ -575,7 +575,7 @@ static bool isInt32Immediate(SDNode *N, unsigned &Imm) { /// operand. If so Imm will receive the 64-bit value. static bool isInt64Immediate(SDNode *N, uint64_t &Imm) { if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i64) { - Imm = cast<ConstantSDNode>(N)->getZExtValue(); + Imm = N->getAsZExtVal(); return true; } return false; @@ -1500,7 +1500,7 @@ static SDNode *selectI64Imm(SelectionDAG *CurDAG, SDNode *N) { SDLoc dl(N); // Get 64 bit value. - int64_t Imm = cast<ConstantSDNode>(N)->getZExtValue(); + int64_t Imm = N->getAsZExtVal(); if (unsigned MinSize = allUsesTruncate(CurDAG, N)) { uint64_t SextImm = SignExtend64(Imm, MinSize); SDValue SDImm = CurDAG->getTargetConstant(SextImm, dl, MVT::i64); @@ -4923,7 +4923,7 @@ bool PPCDAGToDAGISel::trySelectLoopCountIntrinsic(SDNode *N) { SDNode *NewDecrement = CurDAG->getMachineNode(DecrementOpcode, DecrementLoc, MVT::i1, DecrementOps); - unsigned Val = cast<ConstantSDNode>(RHS)->getZExtValue(); + unsigned Val = RHS->getAsZExtVal(); bool IsBranchOnTrue = (CC == ISD::SETEQ && Val) || (CC == ISD::SETNE && !Val); unsigned Opcode = IsBranchOnTrue ? PPC::BC : PPC::BCn; @@ -5765,7 +5765,7 @@ void PPCDAGToDAGISel::Select(SDNode *N) { break; // If the multiplier fits int16, we can handle it with mulli. - int64_t Imm = cast<ConstantSDNode>(Op1)->getZExtValue(); + int64_t Imm = Op1->getAsZExtVal(); unsigned Shift = llvm::countr_zero<uint64_t>(Imm); if (isInt<16>(Imm) || !Shift) break; @@ -6612,8 +6612,7 @@ void PPCDAGToDAGISel::foldBoolExts(SDValue &Res, SDNode *&N) { // For us to materialize these using one instruction, we must be able to // represent them as signed 16-bit integers. - uint64_t True = cast<ConstantSDNode>(TrueRes)->getZExtValue(), - False = cast<ConstantSDNode>(FalseRes)->getZExtValue(); + uint64_t True = TrueRes->getAsZExtVal(), False = FalseRes->getAsZExtVal(); if (!isInt<16>(True) || !isInt<16>(False)) break; diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 8f27e6677afa..235df1880b37 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -2566,7 +2566,7 @@ SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) { if (LeadingZero) { if (!UniquedVals[Multiple-1].getNode()) return DAG.getTargetConstant(0, SDLoc(N), MVT::i32); // 0,0,0,undef - int Val = cast<ConstantSDNode>(UniquedVals[Multiple-1])->getZExtValue(); + int Val = UniquedVals[Multiple - 1]->getAsZExtVal(); if (Val < 16) // 0,0,0,4 -> vspltisw(4) return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32); } @@ -2635,11 +2635,11 @@ bool llvm::isIntS16Immediate(SDNode *N, int16_t &Imm) { if (!isa<ConstantSDNode>(N)) return false; - Imm = (int16_t)cast<ConstantSDNode>(N)->getZExtValue(); + Imm = (int16_t)N->getAsZExtVal(); if (N->getValueType(0) == MVT::i32) - return Imm == (int32_t)cast<ConstantSDNode>(N)->getZExtValue(); + return Imm == (int32_t)N->getAsZExtVal(); else - return Imm == (int64_t)cast<ConstantSDNode>(N)->getZExtValue(); + return Imm == (int64_t)N->getAsZExtVal(); } bool llvm::isIntS16Immediate(SDValue Op, int16_t &Imm) { return isIntS16Immediate(Op.getNode(), Imm); @@ -2684,7 +2684,7 @@ bool llvm::isIntS34Immediate(SDNode *N, int64_t &Imm) { if (!isa<ConstantSDNode>(N)) return false; - Imm = (int64_t)cast<ConstantSDNode>(N)->getZExtValue(); + Imm = (int64_t)N->getAsZExtVal(); return isInt<34>(Imm); } bool llvm::isIntS34Immediate(SDValue Op, int64_t &Imm) { @@ -15580,7 +15580,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, NarrowOp.getOpcode() != ISD::ROTL && NarrowOp.getOpcode() != ISD::ROTR) break; - uint64_t Imm = cast<ConstantSDNode>(Op2)->getZExtValue(); + uint64_t Imm = Op2->getAsZExtVal(); // Make sure that the constant is narrow enough to fit in the narrow type. if (!isUInt<32>(Imm)) break; @@ -16795,7 +16795,7 @@ void PPCTargetLowering::CollectTargetIntrinsicOperands(const CallInst &I, return; if (!isa<ConstantSDNode>(Ops[1].getNode())) return; - auto IntrinsicID = cast<ConstantSDNode>(Ops[1].getNode())->getZExtValue(); + auto IntrinsicID = Ops[1].getNode()->getAsZExtVal(); if (IntrinsicID != Intrinsic::ppc_tdw && IntrinsicID != Intrinsic::ppc_tw && IntrinsicID != Intrinsic::ppc_trapd && IntrinsicID != Intrinsic::ppc_trap) return; @@ -18430,7 +18430,7 @@ PPC::AddrMode PPCTargetLowering::SelectOptimalAddrMode(const SDNode *Parent, if (Flags & PPC::MOF_RPlusSImm16) { SDValue Op0 = N.getOperand(0); SDValue Op1 = N.getOperand(1); - int16_t Imm = cast<ConstantSDNode>(Op1)->getZExtValue(); + int16_t Imm = Op1->getAsZExtVal(); if (!Align || isAligned(*Align, Imm)) { Disp = DAG.getTargetConstant(Imm, DL, N.getValueType()); Base = Op0; diff --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.td b/llvm/lib/Target/PowerPC/PPCRegisterInfo.td index 375e63654db1..8a37e40414ee 100644 --- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.td +++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.td @@ -270,12 +270,15 @@ def CR7 : CR<7, "cr7", [CR7LT, CR7GT, CR7EQ, CR7UN]>, DwarfRegNum<[75, 75]>; // Link register def LR : SPR<8, "lr">, DwarfRegNum<[-2, 65]>; -//let Aliases = [LR] in -def LR8 : SPR<8, "lr">, DwarfRegNum<[65, -2]>; +def LR8 : SPR<8, "lr">, DwarfRegNum<[65, -2]> { + let Aliases = [LR]; +} // Count register def CTR : SPR<9, "ctr">, DwarfRegNum<[-2, 66]>; -def CTR8 : SPR<9, "ctr">, DwarfRegNum<[66, -2]>; +def CTR8 : SPR<9, "ctr">, DwarfRegNum<[66, -2]> { + let Aliases = [CTR]; +} // VRsave register def VRSAVE: SPR<256, "vrsave">, DwarfRegNum<[109]>; diff --git a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp index 4759aa951664..d616aaeddf41 100644 --- a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp +++ b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp @@ -466,10 +466,6 @@ public: bool isGPRAsFPR() const { return isGPR() && Reg.IsGPRAsFPR; } - bool isGPRF64AsFPR() const { return isGPR() && Reg.IsGPRAsFPR; } - - bool isGPRPF64AsFPR() const { return isGPR() && Reg.IsGPRAsFPR; } - static bool evaluateConstantImm(const MCExpr *Expr, int64_t &Imm, RISCVMCExpr::VariantKind &VK) { if (auto *RE = dyn_cast<RISCVMCExpr>(Expr)) { @@ -2039,9 +2035,8 @@ ParseStatus RISCVAsmParser::parseCallSymbol(OperandVector &Operands) { SMLoc E = SMLoc::getFromPointer(S.getPointer() + Identifier.size()); - RISCVMCExpr::VariantKind Kind = RISCVMCExpr::VK_RISCV_CALL; - if (Identifier.consume_back("@plt")) - Kind = RISCVMCExpr::VK_RISCV_CALL_PLT; + RISCVMCExpr::VariantKind Kind = RISCVMCExpr::VK_RISCV_CALL_PLT; + (void)Identifier.consume_back("@plt"); MCSymbol *Sym = getContext().getOrCreateSymbol(Identifier); Res = MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, getContext()); diff --git a/llvm/lib/Target/RISCV/GISel/RISCVCallLowering.cpp b/llvm/lib/Target/RISCV/GISel/RISCVCallLowering.cpp index 50ed85acdec0..697ad476ff8c 100644 --- a/llvm/lib/Target/RISCV/GISel/RISCVCallLowering.cpp +++ b/llvm/lib/Target/RISCV/GISel/RISCVCallLowering.cpp @@ -579,7 +579,7 @@ bool RISCVCallLowering::lowerCall(MachineIRBuilder &MIRBuilder, // Select the recommended relocation type R_RISCV_CALL_PLT. if (!Info.Callee.isReg()) - Info.Callee.setTargetFlags(RISCVII::MO_PLT); + Info.Callee.setTargetFlags(RISCVII::MO_CALL); MachineInstrBuilder Call = MIRBuilder diff --git a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp index 079906d1958c..ab8070772fe5 100644 --- a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp +++ b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp @@ -113,7 +113,7 @@ RISCVLegalizerInfo::RISCVLegalizerInfo(const RISCVSubtarget &ST) getActionDefinitionsBuilder(G_BITREVERSE).maxScalar(0, sXLen).lower(); auto &BSWAPActions = getActionDefinitionsBuilder(G_BSWAP); - if (ST.hasStdExtZbb()) + if (ST.hasStdExtZbb() || ST.hasStdExtZbkb()) BSWAPActions.legalFor({sXLen}).clampScalar(0, sXLen, sXLen); else BSWAPActions.maxScalar(0, sXLen).lower(); @@ -411,8 +411,9 @@ bool RISCVLegalizerInfo::legalizeVAStart(MachineInstr &MI, return true; } -bool RISCVLegalizerInfo::legalizeCustom(LegalizerHelper &Helper, - MachineInstr &MI) const { +bool RISCVLegalizerInfo::legalizeCustom( + LegalizerHelper &Helper, MachineInstr &MI, + LostDebugLocObserver &LocObserver) const { MachineIRBuilder &MIRBuilder = Helper.MIRBuilder; GISelChangeObserver &Observer = Helper.Observer; switch (MI.getOpcode()) { diff --git a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.h b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.h index 48c36976501f..f3ec6be16734 100644 --- a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.h +++ b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.h @@ -21,7 +21,6 @@ class GISelChangeObserver; class MachineIRBuilder; class RISCVSubtarget; -/// This class provides the information for the target register banks. class RISCVLegalizerInfo : public LegalizerInfo { const RISCVSubtarget &STI; const unsigned XLen; @@ -30,7 +29,8 @@ class RISCVLegalizerInfo : public LegalizerInfo { public: RISCVLegalizerInfo(const RISCVSubtarget &ST); - bool legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI) const override; + bool legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI, + LostDebugLocObserver &LocObserver) const override; bool legalizeIntrinsic(LegalizerHelper &Helper, MachineInstr &MI) const override; diff --git a/llvm/lib/Target/RISCV/MCA/RISCVCustomBehaviour.cpp b/llvm/lib/Target/RISCV/MCA/RISCVCustomBehaviour.cpp index aba2511959af..8d97c5ffd20a 100644 --- a/llvm/lib/Target/RISCV/MCA/RISCVCustomBehaviour.cpp +++ b/llvm/lib/Target/RISCV/MCA/RISCVCustomBehaviour.cpp @@ -186,30 +186,37 @@ RISCVInstrumentManager::createInstruments(const MCInst &Inst) { } static std::pair<uint8_t, uint8_t> -getEEWAndEMULForUnitStrideLoadStore(unsigned Opcode, RISCVII::VLMUL LMUL, - uint8_t SEW) { +getEEWAndEMUL(unsigned Opcode, RISCVII::VLMUL LMUL, uint8_t SEW) { uint8_t EEW; switch (Opcode) { case RISCV::VLM_V: case RISCV::VSM_V: case RISCV::VLE8_V: case RISCV::VSE8_V: + case RISCV::VLSE8_V: + case RISCV::VSSE8_V: EEW = 8; break; case RISCV::VLE16_V: case RISCV::VSE16_V: + case RISCV::VLSE16_V: + case RISCV::VSSE16_V: EEW = 16; break; case RISCV::VLE32_V: case RISCV::VSE32_V: + case RISCV::VLSE32_V: + case RISCV::VSSE32_V: EEW = 32; break; case RISCV::VLE64_V: case RISCV::VSE64_V: + case RISCV::VLSE64_V: + case RISCV::VSSE64_V: EEW = 64; break; default: - llvm_unreachable("Opcode is not a vector unit stride load nor store"); + llvm_unreachable("Could not determine EEW from Opcode"); } auto EMUL = RISCVVType::getSameRatioLMUL(SEW, LMUL, EEW); @@ -218,6 +225,18 @@ getEEWAndEMULForUnitStrideLoadStore(unsigned Opcode, RISCVII::VLMUL LMUL, return std::make_pair(EEW, *EMUL); } +bool opcodeHasEEWAndEMULInfo(unsigned short Opcode) { + return Opcode == RISCV::VLM_V || Opcode == RISCV::VSM_V || + Opcode == RISCV::VLE8_V || Opcode == RISCV::VSE8_V || + Opcode == RISCV::VLE16_V || Opcode == RISCV::VSE16_V || + Opcode == RISCV::VLE32_V || Opcode == RISCV::VSE32_V || + Opcode == RISCV::VLE64_V || Opcode == RISCV::VSE64_V || + Opcode == RISCV::VLSE8_V || Opcode == RISCV::VSSE8_V || + Opcode == RISCV::VLSE16_V || Opcode == RISCV::VSSE16_V || + Opcode == RISCV::VLSE32_V || Opcode == RISCV::VSSE32_V || + Opcode == RISCV::VLSE64_V || Opcode == RISCV::VSSE64_V; +} + unsigned RISCVInstrumentManager::getSchedClassID( const MCInstrInfo &MCII, const MCInst &MCI, const llvm::SmallVector<Instrument *> &IVec) const { @@ -249,13 +268,9 @@ unsigned RISCVInstrumentManager::getSchedClassID( uint8_t SEW = SI ? SI->getSEW() : 0; const RISCVVInversePseudosTable::PseudoInfo *RVV = nullptr; - if (Opcode == RISCV::VLM_V || Opcode == RISCV::VSM_V || - Opcode == RISCV::VLE8_V || Opcode == RISCV::VSE8_V || - Opcode == RISCV::VLE16_V || Opcode == RISCV::VSE16_V || - Opcode == RISCV::VLE32_V || Opcode == RISCV::VSE32_V || - Opcode == RISCV::VLE64_V || Opcode == RISCV::VSE64_V) { + if (opcodeHasEEWAndEMULInfo(Opcode)) { RISCVII::VLMUL VLMUL = static_cast<RISCVII::VLMUL>(LMUL); - auto [EEW, EMUL] = getEEWAndEMULForUnitStrideLoadStore(Opcode, VLMUL, SEW); + auto [EEW, EMUL] = getEEWAndEMUL(Opcode, VLMUL, SEW); RVV = RISCVVInversePseudosTable::getBaseInfo(Opcode, EMUL, EEW); } else { // Check if it depends on LMUL and SEW diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp index 716fb67c5824..7ce08eabdeb6 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp @@ -329,16 +329,17 @@ bool RISCVAsmBackend::relaxDwarfCFA(MCDwarfCallFrameFragment &DF, return true; } -bool RISCVAsmBackend::relaxLEB128(MCLEBFragment &LF, MCAsmLayout &Layout, - int64_t &Value) const { +std::pair<bool, bool> RISCVAsmBackend::relaxLEB128(MCLEBFragment &LF, + MCAsmLayout &Layout, + int64_t &Value) const { if (LF.isSigned()) - return false; + return std::make_pair(false, false); const MCExpr &Expr = LF.getValue(); if (ULEB128Reloc) { LF.getFixups().push_back( MCFixup::create(0, &Expr, FK_Data_leb128, Expr.getLoc())); } - return Expr.evaluateKnownAbsolute(Value, Layout); + return std::make_pair(Expr.evaluateKnownAbsolute(Value, Layout), false); } // Given a compressed control flow instruction this function returns diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.h index 2ad6534ac8bc..902b44bba70f 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.h +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.h @@ -100,8 +100,8 @@ public: bool &WasRelaxed) const override; bool relaxDwarfCFA(MCDwarfCallFrameFragment &DF, MCAsmLayout &Layout, bool &WasRelaxed) const override; - bool relaxLEB128(MCLEBFragment &LF, MCAsmLayout &Layout, - int64_t &Value) const override; + std::pair<bool, bool> relaxLEB128(MCLEBFragment &LF, MCAsmLayout &Layout, + int64_t &Value) const override; bool writeNopData(raw_ostream &OS, uint64_t Count, const MCSubtargetInfo *STI) const override; diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h index c32210fc1419..433e2e6f80bd 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h @@ -254,7 +254,6 @@ static inline bool isFirstDefTiedToFirstUse(const MCInstrDesc &Desc) { enum { MO_None = 0, MO_CALL = 1, - MO_PLT = 2, MO_LO = 3, MO_HI = 4, MO_PCREL_LO = 5, diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCExpr.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCExpr.cpp index d67351102bc1..64ddae61b1bc 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCExpr.cpp +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCExpr.cpp @@ -41,8 +41,6 @@ void RISCVMCExpr::printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const { if (HasVariant) OS << '%' << getVariantKindName(getKind()) << '('; Expr->print(OS, MAI); - if (Kind == VK_RISCV_CALL_PLT) - OS << "@plt"; if (HasVariant) OS << ')'; } diff --git a/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp b/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp index 0fd514fa87cd..f2bd5118fc07 100644 --- a/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp +++ b/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp @@ -747,9 +747,6 @@ static MCOperand lowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym, Kind = RISCVMCExpr::VK_RISCV_None; break; case RISCVII::MO_CALL: - Kind = RISCVMCExpr::VK_RISCV_CALL; - break; - case RISCVII::MO_PLT: Kind = RISCVMCExpr::VK_RISCV_CALL_PLT; break; case RISCVII::MO_LO: diff --git a/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp b/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp index 24a13f93af88..103a2e2da7b9 100644 --- a/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp +++ b/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp @@ -109,6 +109,7 @@ bool RISCVExpandPseudo::expandMI(MachineBasicBlock &MBB, return expandRV32ZdinxStore(MBB, MBBI); case RISCV::PseudoRV32ZdinxLD: return expandRV32ZdinxLoad(MBB, MBBI); + case RISCV::PseudoCCMOVGPRNoX0: case RISCV::PseudoCCMOVGPR: case RISCV::PseudoCCADD: case RISCV::PseudoCCSUB: @@ -134,6 +135,9 @@ bool RISCVExpandPseudo::expandMI(MachineBasicBlock &MBB, case RISCV::PseudoCCSLLIW: case RISCV::PseudoCCSRLIW: case RISCV::PseudoCCSRAIW: + case RISCV::PseudoCCANDN: + case RISCV::PseudoCCORN: + case RISCV::PseudoCCXNOR: return expandCCOp(MBB, MBBI, NextMBBI); case RISCV::PseudoVSETVLI: case RISCV::PseudoVSETVLIX0: @@ -191,7 +195,8 @@ bool RISCVExpandPseudo::expandCCOp(MachineBasicBlock &MBB, Register DestReg = MI.getOperand(0).getReg(); assert(MI.getOperand(4).getReg() == DestReg); - if (MI.getOpcode() == RISCV::PseudoCCMOVGPR) { + if (MI.getOpcode() == RISCV::PseudoCCMOVGPR || + MI.getOpcode() == RISCV::PseudoCCMOVGPRNoX0) { // Add MV. BuildMI(TrueBB, DL, TII->get(RISCV::ADDI), DestReg) .add(MI.getOperand(5)) @@ -225,6 +230,9 @@ bool RISCVExpandPseudo::expandCCOp(MachineBasicBlock &MBB, case RISCV::PseudoCCSLLIW: NewOpc = RISCV::SLLIW; break; case RISCV::PseudoCCSRLIW: NewOpc = RISCV::SRLIW; break; case RISCV::PseudoCCSRAIW: NewOpc = RISCV::SRAIW; break; + case RISCV::PseudoCCANDN: NewOpc = RISCV::ANDN; break; + case RISCV::PseudoCCORN: NewOpc = RISCV::ORN; break; + case RISCV::PseudoCCXNOR: NewOpc = RISCV::XNOR; break; } BuildMI(TrueBB, DL, TII->get(NewOpc), DestReg) .add(MI.getOperand(5)) diff --git a/llvm/lib/Target/RISCV/RISCVFeatures.td b/llvm/lib/Target/RISCV/RISCVFeatures.td index 59b202606dad..bb7a3291085d 100644 --- a/llvm/lib/Target/RISCV/RISCVFeatures.td +++ b/llvm/lib/Target/RISCV/RISCVFeatures.td @@ -1021,6 +1021,12 @@ def TuneShortForwardBranchOpt def HasShortForwardBranchOpt : Predicate<"Subtarget->hasShortForwardBranchOpt()">; def NoShortForwardBranchOpt : Predicate<"!Subtarget->hasShortForwardBranchOpt()">; +def TuneConditionalCompressedMoveFusion + : SubtargetFeature<"conditional-cmv-fusion", "HasConditionalCompressedMoveFusion", + "true", "Enable branch+c.mv fusion">; +def HasConditionalMoveFusion : Predicate<"Subtarget->hasConditionalMoveFusion()">; +def NoConditionalMoveFusion : Predicate<"!Subtarget->hasConditionalMoveFusion()">; + def TuneSiFive7 : SubtargetFeature<"sifive7", "RISCVProcFamily", "SiFive7", "SiFive 7-Series processors", [TuneNoDefaultUnroll, diff --git a/llvm/lib/Target/RISCV/RISCVGatherScatterLowering.cpp b/llvm/lib/Target/RISCV/RISCVGatherScatterLowering.cpp index 5ad1e082344e..1129206800ad 100644 --- a/llvm/lib/Target/RISCV/RISCVGatherScatterLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVGatherScatterLowering.cpp @@ -362,7 +362,7 @@ RISCVGatherScatterLowering::determineBaseAndStride(Instruction *Ptr, VecOperand = i; - TypeSize TS = DL->getTypeAllocSize(GTI.getIndexedType()); + TypeSize TS = GTI.getSequentialElementStride(*DL); if (TS.isScalable()) return std::make_pair(nullptr, nullptr); diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp index bfa3bf3cc74e..0d8688ba2eae 100644 --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -763,14 +763,12 @@ bool RISCVDAGToDAGISel::tryIndexedLoad(SDNode *Node) { return false; EVT LoadVT = Ld->getMemoryVT(); - bool IsPre = (AM == ISD::PRE_INC || AM == ISD::PRE_DEC); - bool IsPost = (AM == ISD::POST_INC || AM == ISD::POST_DEC); + assert((AM == ISD::PRE_INC || AM == ISD::POST_INC) && + "Unexpected addressing mode"); + bool IsPre = AM == ISD::PRE_INC; + bool IsPost = AM == ISD::POST_INC; int64_t Offset = C->getSExtValue(); - // Convert decrements to increments by a negative quantity. - if (AM == ISD::PRE_DEC || AM == ISD::POST_DEC) - Offset = -Offset; - // The constants that can be encoded in the THeadMemIdx instructions // are of the form (sign_extend(imm5) << imm2). int64_t Shift; diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 03a59f8a8b57..0a1a466af591 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -814,8 +814,9 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setOperationAction({ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT}, VT, Custom); setOperationAction({ISD::LRINT, ISD::LLRINT}, VT, Custom); - setOperationAction( - {ISD::SADDSAT, ISD::UADDSAT, ISD::SSUBSAT, ISD::USUBSAT}, VT, Legal); + setOperationAction({ISD::AVGFLOORU, ISD::SADDSAT, ISD::UADDSAT, + ISD::SSUBSAT, ISD::USUBSAT}, + VT, Legal); // Integer VTs are lowered as a series of "RISCVISD::TRUNCATE_VECTOR_VL" // nodes which truncate by one power of two at a time. @@ -1184,9 +1185,9 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, if (VT.getVectorElementType() != MVT::i64 || Subtarget.hasStdExtV()) setOperationAction({ISD::MULHS, ISD::MULHU}, VT, Custom); - setOperationAction( - {ISD::SADDSAT, ISD::UADDSAT, ISD::SSUBSAT, ISD::USUBSAT}, VT, - Custom); + setOperationAction({ISD::AVGFLOORU, ISD::SADDSAT, ISD::UADDSAT, + ISD::SSUBSAT, ISD::USUBSAT}, + VT, Custom); setOperationAction(ISD::VSELECT, VT, Custom); setOperationAction(ISD::SELECT_CC, VT, Expand); @@ -1350,8 +1351,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, } if (Subtarget.hasVendorXTHeadMemIdx()) { - for (unsigned im = (unsigned)ISD::PRE_INC; im != (unsigned)ISD::POST_DEC; - ++im) { + for (unsigned im : {ISD::PRE_INC, ISD::POST_INC}) { setIndexedLoadAction(im, MVT::i8, Legal); setIndexedStoreAction(im, MVT::i8, Legal); setIndexedLoadAction(im, MVT::i16, Legal); @@ -1374,8 +1374,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setPrefLoopAlignment(Subtarget.getPrefLoopAlignment()); setTargetDAGCombine({ISD::INTRINSIC_VOID, ISD::INTRINSIC_W_CHAIN, - ISD::INTRINSIC_WO_CHAIN, ISD::ADD, ISD::SUB, ISD::MUL, - ISD::AND, ISD::OR, ISD::XOR, ISD::SETCC, ISD::SELECT}); + ISD::INTRINSIC_WO_CHAIN, ISD::ADD, ISD::SUB, ISD::AND, + ISD::OR, ISD::XOR, ISD::SETCC, ISD::SELECT}); if (Subtarget.is64Bit()) setTargetDAGCombine(ISD::SRA); @@ -2711,11 +2711,19 @@ InstructionCost RISCVTargetLowering::getVRGatherVICost(MVT VT) const { return getLMULCost(VT); } -/// Return the cost of a vslidedown.vi/vx or vslideup.vi/vx instruction +/// Return the cost of a vslidedown.vx or vslideup.vx instruction +/// for the type VT. (This does not cover the vslide1up or vslide1down +/// variants.) Slides may be linear in the number of vregs implied by LMUL, +/// or may track the vrgather.vv cost. It is implementation-dependent. +InstructionCost RISCVTargetLowering::getVSlideVXCost(MVT VT) const { + return getLMULCost(VT); +} + +/// Return the cost of a vslidedown.vi or vslideup.vi instruction /// for the type VT. (This does not cover the vslide1up or vslide1down /// variants.) Slides may be linear in the number of vregs implied by LMUL, /// or may track the vrgather.vv cost. It is implementation-dependent. -InstructionCost RISCVTargetLowering::getVSlideCost(MVT VT) const { +InstructionCost RISCVTargetLowering::getVSlideVICost(MVT VT) const { return getLMULCost(VT); } @@ -2811,8 +2819,8 @@ static SDValue lowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG, SDValue SplatZero = DAG.getNode( RISCVISD::VMV_V_X_VL, DL, DstContainerVT, DAG.getUNDEF(DstContainerVT), DAG.getConstant(0, DL, Subtarget.getXLenVT()), VL); - Res = DAG.getNode(RISCVISD::VSELECT_VL, DL, DstContainerVT, IsNan, SplatZero, - Res, VL); + Res = DAG.getNode(RISCVISD::VMERGE_VL, DL, DstContainerVT, IsNan, SplatZero, + Res, DAG.getUNDEF(DstContainerVT), VL); if (DstVT.isFixedLengthVector()) Res = convertFromScalableVector(DstVT, Res, DAG, Subtarget); @@ -3489,7 +3497,7 @@ static SDValue lowerBuildVectorOfConstants(SDValue Op, SelectionDAG &DAG, for (unsigned I = 0; I < NumElts;) { SDValue V = Op.getOperand(I); - bool BitValue = !V.isUndef() && cast<ConstantSDNode>(V)->getZExtValue(); + bool BitValue = !V.isUndef() && V->getAsZExtVal(); Bits |= ((uint64_t)BitValue << BitPos); ++BitPos; ++I; @@ -3620,8 +3628,8 @@ static SDValue lowerBuildVectorOfConstants(SDValue Op, SelectionDAG &DAG, for (const auto &OpIdx : enumerate(Op->op_values())) { const auto &SeqV = OpIdx.value(); if (!SeqV.isUndef()) - SplatValue |= ((cast<ConstantSDNode>(SeqV)->getZExtValue() & EltMask) - << (OpIdx.index() * EltBitSize)); + SplatValue |= + ((SeqV->getAsZExtVal() & EltMask) << (OpIdx.index() * EltBitSize)); } // On RV64, sign-extend from 32 to 64 bits where possible in order to @@ -3650,10 +3658,10 @@ static SDValue lowerBuildVectorOfConstants(SDValue Op, SelectionDAG &DAG, // would require bit-manipulation instructions to construct the splat value. SmallVector<SDValue> Sequence; const auto *BV = cast<BuildVectorSDNode>(Op); - if (VT.isInteger() && EltBitSize < 64 && + if (VT.isInteger() && EltBitSize < Subtarget.getELen() && ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) && BV->getRepeatedSequence(Sequence) && - (Sequence.size() * EltBitSize) <= 64) { + (Sequence.size() * EltBitSize) <= Subtarget.getELen()) { unsigned SeqLen = Sequence.size(); MVT ViaIntVT = MVT::getIntegerVT(EltBitSize * SeqLen); assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32 || @@ -3676,8 +3684,8 @@ static SDValue lowerBuildVectorOfConstants(SDValue Op, SelectionDAG &DAG, // vector type. for (const auto &SeqV : Sequence) { if (!SeqV.isUndef()) - SplatValue |= ((cast<ConstantSDNode>(SeqV)->getZExtValue() & EltMask) - << (EltIdx * EltBitSize)); + SplatValue |= + ((SeqV->getAsZExtVal() & EltMask) << (EltIdx * EltBitSize)); EltIdx++; } @@ -3938,8 +3946,7 @@ static SDValue splatPartsI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru, (isa<RegisterSDNode>(VL) && cast<RegisterSDNode>(VL)->getReg() == RISCV::X0)) NewVL = DAG.getRegister(RISCV::X0, MVT::i32); - else if (isa<ConstantSDNode>(VL) && - isUInt<4>(cast<ConstantSDNode>(VL)->getZExtValue())) + else if (isa<ConstantSDNode>(VL) && isUInt<4>(VL->getAsZExtVal())) NewVL = DAG.getNode(ISD::ADD, DL, VL.getValueType(), VL, VL); if (NewVL) { @@ -5401,8 +5408,8 @@ static SDValue lowerFMAXIMUM_FMINIMUM(SDValue Op, SelectionDAG &DAG, SDValue XIsNonNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(), {X, X, DAG.getCondCode(ISD::SETOEQ), DAG.getUNDEF(ContainerVT), Mask, VL}); - NewY = - DAG.getNode(RISCVISD::VSELECT_VL, DL, ContainerVT, XIsNonNan, Y, X, VL); + NewY = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, XIsNonNan, Y, X, + DAG.getUNDEF(ContainerVT), VL); } SDValue NewX = X; @@ -5410,8 +5417,8 @@ static SDValue lowerFMAXIMUM_FMINIMUM(SDValue Op, SelectionDAG &DAG, SDValue YIsNonNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(), {Y, Y, DAG.getCondCode(ISD::SETOEQ), DAG.getUNDEF(ContainerVT), Mask, VL}); - NewX = - DAG.getNode(RISCVISD::VSELECT_VL, DL, ContainerVT, YIsNonNan, X, Y, VL); + NewX = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, YIsNonNan, X, Y, + DAG.getUNDEF(ContainerVT), VL); } unsigned Opc = @@ -5458,6 +5465,7 @@ static unsigned getRISCVVLOp(SDValue Op) { OP_CASE(UADDSAT) OP_CASE(SSUBSAT) OP_CASE(USUBSAT) + OP_CASE(AVGFLOORU) OP_CASE(FADD) OP_CASE(FSUB) OP_CASE(FMUL) @@ -5528,7 +5536,6 @@ static unsigned getRISCVVLOp(SDValue Op) { return RISCVISD::VMXOR_VL; return RISCVISD::XOR_VL; case ISD::VP_SELECT: - return RISCVISD::VSELECT_VL; case ISD::VP_MERGE: return RISCVISD::VMERGE_VL; case ISD::VP_ASHR: @@ -6453,6 +6460,7 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op, !Subtarget.hasVInstructionsF16())) return SplitVectorOp(Op, DAG); [[fallthrough]]; + case ISD::AVGFLOORU: case ISD::SADDSAT: case ISD::UADDSAT: case ISD::SSUBSAT: @@ -6914,7 +6922,7 @@ static SDValue combineSelectToBinOp(SDNode *N, SelectionDAG &DAG, MVT VT = N->getSimpleValueType(0); SDLoc DL(N); - if (!Subtarget.hasShortForwardBranchOpt()) { + if (!Subtarget.hasConditionalMoveFusion()) { // (select c, -1, y) -> -c | y if (isAllOnesConstant(TrueV)) { SDValue Neg = DAG.getNegative(CondV, DL, VT); @@ -7078,7 +7086,7 @@ SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const { // (select c, t, f) -> (or (czero_eqz t, c), (czero_nez f, c)) // Unless we have the short forward branch optimization. - if (!Subtarget.hasShortForwardBranchOpt()) + if (!Subtarget.hasConditionalMoveFusion()) return DAG.getNode( ISD::OR, DL, VT, DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV), @@ -7456,8 +7464,9 @@ SDValue RISCVTargetLowering::lowerVectorMaskExt(SDValue Op, SelectionDAG &DAG, DAG.getUNDEF(ContainerVT), SplatZero, VL); SplatTrueVal = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, DAG.getUNDEF(ContainerVT), SplatTrueVal, VL); - SDValue Select = DAG.getNode(RISCVISD::VSELECT_VL, DL, ContainerVT, CC, - SplatTrueVal, SplatZero, VL); + SDValue Select = + DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, CC, SplatTrueVal, + SplatZero, DAG.getUNDEF(ContainerVT), VL); return convertFromScalableVector(VecVT, Select, DAG, Subtarget); } @@ -7906,8 +7915,7 @@ SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op, // Use tail agnostic policy if Idx is the last index of Vec. unsigned Policy = RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED; if (VecVT.isFixedLengthVector() && isa<ConstantSDNode>(Idx) && - cast<ConstantSDNode>(Idx)->getZExtValue() + 1 == - VecVT.getVectorNumElements()) + Idx->getAsZExtVal() + 1 == VecVT.getVectorNumElements()) Policy = RISCVII::TAIL_AGNOSTIC; SDValue Slideup = getVSlideup(DAG, Subtarget, DL, ContainerVT, Vec, ValInVec, Idx, Mask, InsertVL, Policy); @@ -8167,7 +8175,7 @@ static SDValue lowerVectorIntrinsicScalars(SDValue Op, SelectionDAG &DAG, const auto [MinVLMAX, MaxVLMAX] = RISCVTargetLowering::computeVLMAXBounds(VT, Subtarget); - uint64_t AVLInt = cast<ConstantSDNode>(AVL)->getZExtValue(); + uint64_t AVLInt = AVL->getAsZExtVal(); if (AVLInt <= MinVLMAX) { I32VL = DAG.getConstant(2 * AVLInt, DL, XLenVT); } else if (AVLInt >= 2 * MaxVLMAX) { @@ -8233,15 +8241,14 @@ static SDValue lowerVectorIntrinsicScalars(SDValue Op, SelectionDAG &DAG, SDValue Mask = Operands[NumOps - 3]; SDValue MaskedOff = Operands[1]; // Assume Policy operand is the last operand. - uint64_t Policy = - cast<ConstantSDNode>(Operands[NumOps - 1])->getZExtValue(); + uint64_t Policy = Operands[NumOps - 1]->getAsZExtVal(); // We don't need to select maskedoff if it's undef. if (MaskedOff.isUndef()) return Vec; // TAMU if (Policy == RISCVII::TAIL_AGNOSTIC) - return DAG.getNode(RISCVISD::VSELECT_VL, DL, VT, Mask, Vec, MaskedOff, - AVL); + return DAG.getNode(RISCVISD::VMERGE_VL, DL, VT, Mask, Vec, MaskedOff, + DAG.getUNDEF(VT), AVL); // TUMA or TUMU: Currently we always emit tumu policy regardless of tuma. // It's fine because vmerge does not care mask policy. return DAG.getNode(RISCVISD::VMERGE_VL, DL, VT, Mask, Vec, MaskedOff, @@ -8489,8 +8496,8 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT, {VID, SplattedIdx, DAG.getCondCode(ISD::SETEQ), DAG.getUNDEF(MaskVT), Mask, VL}); - return DAG.getNode(RISCVISD::VSELECT_VL, DL, VT, SelectCond, SplattedVal, - Vec, VL); + return DAG.getNode(RISCVISD::VMERGE_VL, DL, VT, SelectCond, SplattedVal, + Vec, DAG.getUNDEF(VT), VL); } // EGS * EEW >= 128 bits case Intrinsic::riscv_vaesdf_vv: @@ -10243,8 +10250,8 @@ SDValue RISCVTargetLowering::lowerFixedLengthVectorSelectToRVV( SDLoc DL(Op); SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second; - SDValue Select = - DAG.getNode(RISCVISD::VSELECT_VL, DL, ContainerVT, CC, Op1, Op2, VL); + SDValue Select = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, CC, Op1, + Op2, DAG.getUNDEF(ContainerVT), VL); return convertFromScalableVector(VT, Select, DAG, Subtarget); } @@ -10327,9 +10334,14 @@ SDValue RISCVTargetLowering::lowerVPOp(SDValue Op, SelectionDAG &DAG) const { Ops.push_back(DAG.getUNDEF(ContainerVT)); } else if (ISD::getVPExplicitVectorLengthIdx(Op.getOpcode()) == OpIdx.index()) { - // For VP_MERGE, copy the false operand instead of an undef value. - assert(Op.getOpcode() == ISD::VP_MERGE); - Ops.push_back(Ops.back()); + if (Op.getOpcode() == ISD::VP_MERGE) { + // For VP_MERGE, copy the false operand instead of an undef value. + Ops.push_back(Ops.back()); + } else { + assert(Op.getOpcode() == ISD::VP_SELECT); + // For VP_SELECT, add an undef value. + Ops.push_back(DAG.getUNDEF(ContainerVT)); + } } } // Pass through operands which aren't fixed-length vectors. @@ -10379,8 +10391,8 @@ SDValue RISCVTargetLowering::lowerVPExtMaskOp(SDValue Op, SDValue Splat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, DAG.getUNDEF(ContainerVT), SplatValue, VL); - SDValue Result = DAG.getNode(RISCVISD::VSELECT_VL, DL, ContainerVT, Src, - Splat, ZeroSplat, VL); + SDValue Result = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, Src, Splat, + ZeroSplat, DAG.getUNDEF(ContainerVT), VL); if (!VT.isFixedLengthVector()) return Result; return convertFromScalableVector(VT, Result, DAG, Subtarget); @@ -10508,8 +10520,8 @@ SDValue RISCVTargetLowering::lowerVPFPIntConvOp(SDValue Op, RISCVISDExtOpc == RISCVISD::VZEXT_VL ? 1 : -1, DL, XLenVT); SDValue OneSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT, DAG.getUNDEF(IntVT), One, VL); - Src = DAG.getNode(RISCVISD::VSELECT_VL, DL, IntVT, Src, OneSplat, - ZeroSplat, VL); + Src = DAG.getNode(RISCVISD::VMERGE_VL, DL, IntVT, Src, OneSplat, + ZeroSplat, DAG.getUNDEF(IntVT), VL); } else if (DstEltSize > (2 * SrcEltSize)) { // Widen before converting. MVT IntVT = MVT::getVectorVT(MVT::getIntegerVT(DstEltSize / 2), @@ -10633,8 +10645,8 @@ RISCVTargetLowering::lowerVPSpliceExperimental(SDValue Op, SDValue SplatZeroOp1 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, DAG.getUNDEF(ContainerVT), DAG.getConstant(0, DL, XLenVT), EVL1); - Op1 = DAG.getNode(RISCVISD::VSELECT_VL, DL, ContainerVT, Op1, SplatOneOp1, - SplatZeroOp1, EVL1); + Op1 = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, Op1, SplatOneOp1, + SplatZeroOp1, DAG.getUNDEF(ContainerVT), EVL1); SDValue SplatOneOp2 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, DAG.getUNDEF(ContainerVT), @@ -10642,8 +10654,8 @@ RISCVTargetLowering::lowerVPSpliceExperimental(SDValue Op, SDValue SplatZeroOp2 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, DAG.getUNDEF(ContainerVT), DAG.getConstant(0, DL, XLenVT), EVL2); - Op2 = DAG.getNode(RISCVISD::VSELECT_VL, DL, ContainerVT, Op2, SplatOneOp2, - SplatZeroOp2, EVL2); + Op2 = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, Op2, SplatOneOp2, + SplatZeroOp2, DAG.getUNDEF(ContainerVT), EVL2); } int64_t ImmValue = cast<ConstantSDNode>(Offset)->getSExtValue(); @@ -10713,8 +10725,8 @@ RISCVTargetLowering::lowerVPReverseExperimental(SDValue Op, SDValue SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IndicesVT, DAG.getUNDEF(IndicesVT), DAG.getConstant(0, DL, XLenVT), EVL); - Op1 = DAG.getNode(RISCVISD::VSELECT_VL, DL, IndicesVT, Op1, SplatOne, - SplatZero, EVL); + Op1 = DAG.getNode(RISCVISD::VMERGE_VL, DL, IndicesVT, Op1, SplatOne, + SplatZero, DAG.getUNDEF(IndicesVT), EVL); } unsigned EltSize = GatherVT.getScalarSizeInBits(); @@ -12197,7 +12209,7 @@ static SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp, if (VT.isVector()) return SDValue(); - if (!Subtarget.hasShortForwardBranchOpt()) { + if (!Subtarget.hasConditionalMoveFusion()) { // (select cond, x, (and x, c)) has custom lowering with Zicond. if ((!Subtarget.hasStdExtZicond() && !Subtarget.hasVendorXVentanaCondOps()) || @@ -12850,9 +12862,9 @@ struct CombineResult; /// Helper class for folding sign/zero extensions. /// In particular, this class is used for the following combines: -/// add | add_vl -> vwadd(u) | vwadd(u)_w -/// sub | sub_vl -> vwsub(u) | vwsub(u)_w -/// mul | mul_vl -> vwmul(u) | vwmul_su +/// add_vl -> vwadd(u) | vwadd(u)_w +/// sub_vl -> vwsub(u) | vwsub(u)_w +/// mul_vl -> vwmul(u) | vwmul_su /// /// An object of this class represents an operand of the operation we want to /// combine. @@ -12897,8 +12909,6 @@ struct NodeExtensionHelper { /// E.g., for zext(a), this would return a. SDValue getSource() const { switch (OrigOperand.getOpcode()) { - case ISD::ZERO_EXTEND: - case ISD::SIGN_EXTEND: case RISCVISD::VSEXT_VL: case RISCVISD::VZEXT_VL: return OrigOperand.getOperand(0); @@ -12915,8 +12925,7 @@ struct NodeExtensionHelper { /// Get or create a value that can feed \p Root with the given extension \p /// SExt. If \p SExt is std::nullopt, this returns the source of this operand. /// \see ::getSource(). - SDValue getOrCreateExtendedOp(SDNode *Root, SelectionDAG &DAG, - const RISCVSubtarget &Subtarget, + SDValue getOrCreateExtendedOp(const SDNode *Root, SelectionDAG &DAG, std::optional<bool> SExt) const { if (!SExt.has_value()) return OrigOperand; @@ -12931,10 +12940,8 @@ struct NodeExtensionHelper { // If we need an extension, we should be changing the type. SDLoc DL(Root); - auto [Mask, VL] = getMaskAndVL(Root, DAG, Subtarget); + auto [Mask, VL] = getMaskAndVL(Root); switch (OrigOperand.getOpcode()) { - case ISD::ZERO_EXTEND: - case ISD::SIGN_EXTEND: case RISCVISD::VSEXT_VL: case RISCVISD::VZEXT_VL: return DAG.getNode(ExtOpc, DL, NarrowVT, Source, Mask, VL); @@ -12974,15 +12981,12 @@ struct NodeExtensionHelper { /// \pre \p Opcode represents a supported root (\see ::isSupportedRoot()). static unsigned getSameExtensionOpcode(unsigned Opcode, bool IsSExt) { switch (Opcode) { - case ISD::ADD: case RISCVISD::ADD_VL: case RISCVISD::VWADD_W_VL: case RISCVISD::VWADDU_W_VL: return IsSExt ? RISCVISD::VWADD_VL : RISCVISD::VWADDU_VL; - case ISD::MUL: case RISCVISD::MUL_VL: return IsSExt ? RISCVISD::VWMUL_VL : RISCVISD::VWMULU_VL; - case ISD::SUB: case RISCVISD::SUB_VL: case RISCVISD::VWSUB_W_VL: case RISCVISD::VWSUBU_W_VL: @@ -12995,8 +12999,7 @@ struct NodeExtensionHelper { /// Get the opcode to materialize \p Opcode(sext(a), zext(b)) -> /// newOpcode(a, b). static unsigned getSUOpcode(unsigned Opcode) { - assert((Opcode == RISCVISD::MUL_VL || Opcode == ISD::MUL) && - "SU is only supported for MUL"); + assert(Opcode == RISCVISD::MUL_VL && "SU is only supported for MUL"); return RISCVISD::VWMULSU_VL; } @@ -13004,10 +13007,8 @@ struct NodeExtensionHelper { /// newOpcode(a, b). static unsigned getWOpcode(unsigned Opcode, bool IsSExt) { switch (Opcode) { - case ISD::ADD: case RISCVISD::ADD_VL: return IsSExt ? RISCVISD::VWADD_W_VL : RISCVISD::VWADDU_W_VL; - case ISD::SUB: case RISCVISD::SUB_VL: return IsSExt ? RISCVISD::VWSUB_W_VL : RISCVISD::VWSUBU_W_VL; default: @@ -13017,33 +13018,19 @@ struct NodeExtensionHelper { using CombineToTry = std::function<std::optional<CombineResult>( SDNode * /*Root*/, const NodeExtensionHelper & /*LHS*/, - const NodeExtensionHelper & /*RHS*/, SelectionDAG &, - const RISCVSubtarget &)>; + const NodeExtensionHelper & /*RHS*/)>; /// Check if this node needs to be fully folded or extended for all users. bool needToPromoteOtherUsers() const { return EnforceOneUse; } /// Helper method to set the various fields of this struct based on the /// type of \p Root. - void fillUpExtensionSupport(SDNode *Root, SelectionDAG &DAG, - const RISCVSubtarget &Subtarget) { + void fillUpExtensionSupport(SDNode *Root, SelectionDAG &DAG) { SupportsZExt = false; SupportsSExt = false; EnforceOneUse = true; CheckMask = true; - unsigned Opc = OrigOperand.getOpcode(); - switch (Opc) { - case ISD::ZERO_EXTEND: - case ISD::SIGN_EXTEND: { - if (OrigOperand.getValueType().isVector()) { - SupportsZExt = Opc == ISD::ZERO_EXTEND; - SupportsSExt = Opc == ISD::SIGN_EXTEND; - SDLoc DL(Root); - MVT VT = Root->getSimpleValueType(0); - std::tie(Mask, VL) = getDefaultScalableVLOps(VT, DL, DAG, Subtarget); - } - break; - } + switch (OrigOperand.getOpcode()) { case RISCVISD::VZEXT_VL: SupportsZExt = true; Mask = OrigOperand.getOperand(1); @@ -13099,16 +13086,8 @@ struct NodeExtensionHelper { } /// Check if \p Root supports any extension folding combines. - static bool isSupportedRoot(const SDNode *Root, const SelectionDAG &DAG) { + static bool isSupportedRoot(const SDNode *Root) { switch (Root->getOpcode()) { - case ISD::ADD: - case ISD::SUB: - case ISD::MUL: { - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - if (!TLI.isTypeLegal(Root->getValueType(0))) - return false; - return Root->getValueType(0).isScalableVector(); - } case RISCVISD::ADD_VL: case RISCVISD::MUL_VL: case RISCVISD::VWADD_W_VL: @@ -13123,10 +13102,9 @@ struct NodeExtensionHelper { } /// Build a NodeExtensionHelper for \p Root.getOperand(\p OperandIdx). - NodeExtensionHelper(SDNode *Root, unsigned OperandIdx, SelectionDAG &DAG, - const RISCVSubtarget &Subtarget) { - assert(isSupportedRoot(Root, DAG) && "Trying to build an helper with an " - "unsupported root"); + NodeExtensionHelper(SDNode *Root, unsigned OperandIdx, SelectionDAG &DAG) { + assert(isSupportedRoot(Root) && "Trying to build an helper with an " + "unsupported root"); assert(OperandIdx < 2 && "Requesting something else than LHS or RHS"); OrigOperand = Root->getOperand(OperandIdx); @@ -13142,7 +13120,7 @@ struct NodeExtensionHelper { SupportsZExt = Opc == RISCVISD::VWADDU_W_VL || Opc == RISCVISD::VWSUBU_W_VL; SupportsSExt = !SupportsZExt; - std::tie(Mask, VL) = getMaskAndVL(Root, DAG, Subtarget); + std::tie(Mask, VL) = getMaskAndVL(Root); CheckMask = true; // There's no existing extension here, so we don't have to worry about // making sure it gets removed. @@ -13151,7 +13129,7 @@ struct NodeExtensionHelper { } [[fallthrough]]; default: - fillUpExtensionSupport(Root, DAG, Subtarget); + fillUpExtensionSupport(Root, DAG); break; } } @@ -13167,27 +13145,14 @@ struct NodeExtensionHelper { } /// Helper function to get the Mask and VL from \p Root. - static std::pair<SDValue, SDValue> - getMaskAndVL(const SDNode *Root, SelectionDAG &DAG, - const RISCVSubtarget &Subtarget) { - assert(isSupportedRoot(Root, DAG) && "Unexpected root"); - switch (Root->getOpcode()) { - case ISD::ADD: - case ISD::SUB: - case ISD::MUL: { - SDLoc DL(Root); - MVT VT = Root->getSimpleValueType(0); - return getDefaultScalableVLOps(VT, DL, DAG, Subtarget); - } - default: - return std::make_pair(Root->getOperand(3), Root->getOperand(4)); - } + static std::pair<SDValue, SDValue> getMaskAndVL(const SDNode *Root) { + assert(isSupportedRoot(Root) && "Unexpected root"); + return std::make_pair(Root->getOperand(3), Root->getOperand(4)); } /// Check if the Mask and VL of this operand are compatible with \p Root. - bool areVLAndMaskCompatible(SDNode *Root, SelectionDAG &DAG, - const RISCVSubtarget &Subtarget) const { - auto [Mask, VL] = getMaskAndVL(Root, DAG, Subtarget); + bool areVLAndMaskCompatible(const SDNode *Root) const { + auto [Mask, VL] = getMaskAndVL(Root); return isMaskCompatible(Mask) && isVLCompatible(VL); } @@ -13195,14 +13160,11 @@ struct NodeExtensionHelper { /// foldings that are supported by this class. static bool isCommutative(const SDNode *N) { switch (N->getOpcode()) { - case ISD::ADD: - case ISD::MUL: case RISCVISD::ADD_VL: case RISCVISD::MUL_VL: case RISCVISD::VWADD_W_VL: case RISCVISD::VWADDU_W_VL: return true; - case ISD::SUB: case RISCVISD::SUB_VL: case RISCVISD::VWSUB_W_VL: case RISCVISD::VWSUBU_W_VL: @@ -13247,25 +13209,14 @@ struct CombineResult { /// Return a value that uses TargetOpcode and that can be used to replace /// Root. /// The actual replacement is *not* done in that method. - SDValue materialize(SelectionDAG &DAG, - const RISCVSubtarget &Subtarget) const { + SDValue materialize(SelectionDAG &DAG) const { SDValue Mask, VL, Merge; - std::tie(Mask, VL) = - NodeExtensionHelper::getMaskAndVL(Root, DAG, Subtarget); - switch (Root->getOpcode()) { - default: - Merge = Root->getOperand(2); - break; - case ISD::ADD: - case ISD::SUB: - case ISD::MUL: - Merge = DAG.getUNDEF(Root->getValueType(0)); - break; - } + std::tie(Mask, VL) = NodeExtensionHelper::getMaskAndVL(Root); + Merge = Root->getOperand(2); return DAG.getNode(TargetOpcode, SDLoc(Root), Root->getValueType(0), - LHS.getOrCreateExtendedOp(Root, DAG, Subtarget, SExtLHS), - RHS.getOrCreateExtendedOp(Root, DAG, Subtarget, SExtRHS), - Merge, Mask, VL); + LHS.getOrCreateExtendedOp(Root, DAG, SExtLHS), + RHS.getOrCreateExtendedOp(Root, DAG, SExtRHS), Merge, + Mask, VL); } }; @@ -13282,16 +13233,15 @@ struct CombineResult { static std::optional<CombineResult> canFoldToVWWithSameExtensionImpl(SDNode *Root, const NodeExtensionHelper &LHS, const NodeExtensionHelper &RHS, bool AllowSExt, - bool AllowZExt, SelectionDAG &DAG, - const RISCVSubtarget &Subtarget) { + bool AllowZExt) { assert((AllowSExt || AllowZExt) && "Forgot to set what you want?"); - if (!LHS.areVLAndMaskCompatible(Root, DAG, Subtarget) || - !RHS.areVLAndMaskCompatible(Root, DAG, Subtarget)) + if (!LHS.areVLAndMaskCompatible(Root) || !RHS.areVLAndMaskCompatible(Root)) return std::nullopt; if (AllowZExt && LHS.SupportsZExt && RHS.SupportsZExt) return CombineResult(NodeExtensionHelper::getSameExtensionOpcode( Root->getOpcode(), /*IsSExt=*/false), - Root, LHS, /*SExtLHS=*/false, RHS, /*SExtRHS=*/false); + Root, LHS, /*SExtLHS=*/false, RHS, + /*SExtRHS=*/false); if (AllowSExt && LHS.SupportsSExt && RHS.SupportsSExt) return CombineResult(NodeExtensionHelper::getSameExtensionOpcode( Root->getOpcode(), /*IsSExt=*/true), @@ -13308,10 +13258,9 @@ canFoldToVWWithSameExtensionImpl(SDNode *Root, const NodeExtensionHelper &LHS, /// can be used to apply the pattern. static std::optional<CombineResult> canFoldToVWWithSameExtension(SDNode *Root, const NodeExtensionHelper &LHS, - const NodeExtensionHelper &RHS, SelectionDAG &DAG, - const RISCVSubtarget &Subtarget) { + const NodeExtensionHelper &RHS) { return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, /*AllowSExt=*/true, - /*AllowZExt=*/true, DAG, Subtarget); + /*AllowZExt=*/true); } /// Check if \p Root follows a pattern Root(LHS, ext(RHS)) @@ -13320,9 +13269,8 @@ canFoldToVWWithSameExtension(SDNode *Root, const NodeExtensionHelper &LHS, /// can be used to apply the pattern. static std::optional<CombineResult> canFoldToVW_W(SDNode *Root, const NodeExtensionHelper &LHS, - const NodeExtensionHelper &RHS, SelectionDAG &DAG, - const RISCVSubtarget &Subtarget) { - if (!RHS.areVLAndMaskCompatible(Root, DAG, Subtarget)) + const NodeExtensionHelper &RHS) { + if (!RHS.areVLAndMaskCompatible(Root)) return std::nullopt; // FIXME: Is it useful to form a vwadd.wx or vwsub.wx if it removes a scalar @@ -13346,10 +13294,9 @@ canFoldToVW_W(SDNode *Root, const NodeExtensionHelper &LHS, /// can be used to apply the pattern. static std::optional<CombineResult> canFoldToVWWithSEXT(SDNode *Root, const NodeExtensionHelper &LHS, - const NodeExtensionHelper &RHS, SelectionDAG &DAG, - const RISCVSubtarget &Subtarget) { + const NodeExtensionHelper &RHS) { return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, /*AllowSExt=*/true, - /*AllowZExt=*/false, DAG, Subtarget); + /*AllowZExt=*/false); } /// Check if \p Root follows a pattern Root(zext(LHS), zext(RHS)) @@ -13358,10 +13305,9 @@ canFoldToVWWithSEXT(SDNode *Root, const NodeExtensionHelper &LHS, /// can be used to apply the pattern. static std::optional<CombineResult> canFoldToVWWithZEXT(SDNode *Root, const NodeExtensionHelper &LHS, - const NodeExtensionHelper &RHS, SelectionDAG &DAG, - const RISCVSubtarget &Subtarget) { + const NodeExtensionHelper &RHS) { return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, /*AllowSExt=*/false, - /*AllowZExt=*/true, DAG, Subtarget); + /*AllowZExt=*/true); } /// Check if \p Root follows a pattern Root(sext(LHS), zext(RHS)) @@ -13370,13 +13316,10 @@ canFoldToVWWithZEXT(SDNode *Root, const NodeExtensionHelper &LHS, /// can be used to apply the pattern. static std::optional<CombineResult> canFoldToVW_SU(SDNode *Root, const NodeExtensionHelper &LHS, - const NodeExtensionHelper &RHS, SelectionDAG &DAG, - const RISCVSubtarget &Subtarget) { - + const NodeExtensionHelper &RHS) { if (!LHS.SupportsSExt || !RHS.SupportsZExt) return std::nullopt; - if (!LHS.areVLAndMaskCompatible(Root, DAG, Subtarget) || - !RHS.areVLAndMaskCompatible(Root, DAG, Subtarget)) + if (!LHS.areVLAndMaskCompatible(Root) || !RHS.areVLAndMaskCompatible(Root)) return std::nullopt; return CombineResult(NodeExtensionHelper::getSUOpcode(Root->getOpcode()), Root, LHS, /*SExtLHS=*/true, RHS, /*SExtRHS=*/false); @@ -13386,8 +13329,6 @@ SmallVector<NodeExtensionHelper::CombineToTry> NodeExtensionHelper::getSupportedFoldings(const SDNode *Root) { SmallVector<CombineToTry> Strategies; switch (Root->getOpcode()) { - case ISD::ADD: - case ISD::SUB: case RISCVISD::ADD_VL: case RISCVISD::SUB_VL: // add|sub -> vwadd(u)|vwsub(u) @@ -13395,7 +13336,6 @@ NodeExtensionHelper::getSupportedFoldings(const SDNode *Root) { // add|sub -> vwadd(u)_w|vwsub(u)_w Strategies.push_back(canFoldToVW_W); break; - case ISD::MUL: case RISCVISD::MUL_VL: // mul -> vwmul(u) Strategies.push_back(canFoldToVWWithSameExtension); @@ -13426,14 +13366,12 @@ NodeExtensionHelper::getSupportedFoldings(const SDNode *Root) { /// mul_vl -> vwmul(u) | vwmul_su /// vwadd_w(u) -> vwadd(u) /// vwub_w(u) -> vwadd(u) -static SDValue combineBinOp_VLToVWBinOp_VL(SDNode *N, - TargetLowering::DAGCombinerInfo &DCI, - const RISCVSubtarget &Subtarget) { +static SDValue +combineBinOp_VLToVWBinOp_VL(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { SelectionDAG &DAG = DCI.DAG; - if (!NodeExtensionHelper::isSupportedRoot(N, DAG)) - return SDValue(); - + assert(NodeExtensionHelper::isSupportedRoot(N) && + "Shouldn't have called this method"); SmallVector<SDNode *> Worklist; SmallSet<SDNode *, 8> Inserted; Worklist.push_back(N); @@ -13442,11 +13380,11 @@ static SDValue combineBinOp_VLToVWBinOp_VL(SDNode *N, while (!Worklist.empty()) { SDNode *Root = Worklist.pop_back_val(); - if (!NodeExtensionHelper::isSupportedRoot(Root, DAG)) + if (!NodeExtensionHelper::isSupportedRoot(Root)) return SDValue(); - NodeExtensionHelper LHS(N, 0, DAG, Subtarget); - NodeExtensionHelper RHS(N, 1, DAG, Subtarget); + NodeExtensionHelper LHS(N, 0, DAG); + NodeExtensionHelper RHS(N, 1, DAG); auto AppendUsersIfNeeded = [&Worklist, &Inserted](const NodeExtensionHelper &Op) { if (Op.needToPromoteOtherUsers()) { @@ -13473,8 +13411,7 @@ static SDValue combineBinOp_VLToVWBinOp_VL(SDNode *N, for (NodeExtensionHelper::CombineToTry FoldingStrategy : FoldingStrategies) { - std::optional<CombineResult> Res = - FoldingStrategy(N, LHS, RHS, DAG, Subtarget); + std::optional<CombineResult> Res = FoldingStrategy(N, LHS, RHS); if (Res) { Matched = true; CombinesToApply.push_back(*Res); @@ -13503,7 +13440,7 @@ static SDValue combineBinOp_VLToVWBinOp_VL(SDNode *N, SmallVector<std::pair<SDValue, SDValue>> ValuesToReplace; ValuesToReplace.reserve(CombinesToApply.size()); for (CombineResult Res : CombinesToApply) { - SDValue NewValue = Res.materialize(DAG, Subtarget); + SDValue NewValue = Res.materialize(DAG); if (!InputRootReplacement) { assert(Res.Root == N && "First element is expected to be the current node"); @@ -14503,7 +14440,7 @@ static SDValue performSELECTCombine(SDNode *N, SelectionDAG &DAG, if (SDValue V = useInversedSetcc(N, DAG, Subtarget)) return V; - if (Subtarget.hasShortForwardBranchOpt()) + if (Subtarget.hasConditionalMoveFusion()) return SDValue(); SDValue TrueVal = N->getOperand(1); @@ -14775,20 +14712,13 @@ static SDValue performCONCAT_VECTORSCombine(SDNode *N, SelectionDAG &DAG, static SDValue combineToVWMACC(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget) { - - assert(N->getOpcode() == RISCVISD::ADD_VL || N->getOpcode() == ISD::ADD); - - if (N->getValueType(0).isFixedLengthVector()) - return SDValue(); - + assert(N->getOpcode() == RISCVISD::ADD_VL); SDValue Addend = N->getOperand(0); SDValue MulOp = N->getOperand(1); + SDValue AddMergeOp = N->getOperand(2); - if (N->getOpcode() == RISCVISD::ADD_VL) { - SDValue AddMergeOp = N->getOperand(2); - if (!AddMergeOp.isUndef()) - return SDValue(); - } + if (!AddMergeOp.isUndef()) + return SDValue(); auto IsVWMulOpc = [](unsigned Opc) { switch (Opc) { @@ -14812,16 +14742,8 @@ static SDValue combineToVWMACC(SDNode *N, SelectionDAG &DAG, if (!MulMergeOp.isUndef()) return SDValue(); - auto [AddMask, AddVL] = [](SDNode *N, SelectionDAG &DAG, - const RISCVSubtarget &Subtarget) { - if (N->getOpcode() == ISD::ADD) { - SDLoc DL(N); - return getDefaultScalableVLOps(N->getSimpleValueType(0), DL, DAG, - Subtarget); - } - return std::make_pair(N->getOperand(3), N->getOperand(4)); - }(N, DAG, Subtarget); - + SDValue AddMask = N->getOperand(3); + SDValue AddVL = N->getOperand(4); SDValue MulMask = MulOp.getOperand(3); SDValue MulVL = MulOp.getOperand(4); @@ -15087,18 +15009,10 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, return DAG.getNode(ISD::AND, DL, VT, NewFMV, DAG.getConstant(~SignBit, DL, VT)); } - case ISD::ADD: { - if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget)) - return V; - if (SDValue V = combineToVWMACC(N, DAG, Subtarget)) - return V; + case ISD::ADD: return performADDCombine(N, DAG, Subtarget); - } - case ISD::SUB: { - if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget)) - return V; + case ISD::SUB: return performSUBCombine(N, DAG, Subtarget); - } case ISD::AND: return performANDCombine(N, DCI, Subtarget); case ISD::OR: @@ -15106,8 +15020,6 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, case ISD::XOR: return performXORCombine(N, DAG, Subtarget); case ISD::MUL: - if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget)) - return V; return performMULCombine(N, DAG); case ISD::FADD: case ISD::UMAX: @@ -15266,7 +15178,7 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, return DAG.getNode(RISCVISD::SELECT_CC, DL, N->getValueType(0), {LHS, RHS, CC, TrueV, FalseV}); - if (!Subtarget.hasShortForwardBranchOpt()) { + if (!Subtarget.hasConditionalMoveFusion()) { // (select c, -1, y) -> -c | y if (isAllOnesConstant(TrueV)) { SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, CCVal); @@ -15584,7 +15496,7 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, break; } case RISCVISD::ADD_VL: - if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget)) + if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI)) return V; return combineToVWMACC(N, DAG, Subtarget); case RISCVISD::SUB_VL: @@ -15593,7 +15505,7 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, case RISCVISD::VWSUB_W_VL: case RISCVISD::VWSUBU_W_VL: case RISCVISD::MUL_VL: - return combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget); + return combineBinOp_VLToVWBinOp_VL(N, DCI); case RISCVISD::VFMADD_VL: case RISCVISD::VFNMADD_VL: case RISCVISD::VFMSUB_VL: @@ -18303,20 +18215,9 @@ SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI, // split it and then direct call can be matched by PseudoCALL. if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Callee)) { const GlobalValue *GV = S->getGlobal(); - - unsigned OpFlags = RISCVII::MO_CALL; - if (!getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV)) - OpFlags = RISCVII::MO_PLT; - - Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, OpFlags); + Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, RISCVII::MO_CALL); } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { - unsigned OpFlags = RISCVII::MO_CALL; - - if (!getTargetMachine().shouldAssumeDSOLocal(*MF.getFunction().getParent(), - nullptr)) - OpFlags = RISCVII::MO_PLT; - - Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, OpFlags); + Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, RISCVII::MO_CALL); } // The first call operand is the chain and the second is the target address. @@ -18694,6 +18595,7 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const { NODE_NAME_CASE(UDIV_VL) NODE_NAME_CASE(UREM_VL) NODE_NAME_CASE(XOR_VL) + NODE_NAME_CASE(AVGFLOORU_VL) NODE_NAME_CASE(SADDSAT_VL) NODE_NAME_CASE(UADDSAT_VL) NODE_NAME_CASE(SSUBSAT_VL) @@ -18783,7 +18685,6 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const { NODE_NAME_CASE(VWMACCSU_VL) NODE_NAME_CASE(VNSRL_VL) NODE_NAME_CASE(SETCC_VL) - NODE_NAME_CASE(VSELECT_VL) NODE_NAME_CASE(VMERGE_VL) NODE_NAME_CASE(VMAND_VL) NODE_NAME_CASE(VMOR_VL) @@ -19357,7 +19258,6 @@ bool RISCVTargetLowering::isVScaleKnownToBeAPowerOfTwo() const { bool RISCVTargetLowering::getIndexedAddressParts(SDNode *Op, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, - bool &IsInc, SelectionDAG &DAG) const { // Target does not support indexed loads. if (!Subtarget.hasVendorXTHeadMemIdx()) @@ -19384,7 +19284,6 @@ bool RISCVTargetLowering::getIndexedAddressParts(SDNode *Op, SDValue &Base, if (!isLegalIndexedOffset) return false; - IsInc = (Op->getOpcode() == ISD::ADD); Offset = Op->getOperand(1); return true; } @@ -19407,11 +19306,10 @@ bool RISCVTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base, } else return false; - bool IsInc; - if (!getIndexedAddressParts(Ptr.getNode(), Base, Offset, AM, IsInc, DAG)) + if (!getIndexedAddressParts(Ptr.getNode(), Base, Offset, AM, DAG)) return false; - AM = IsInc ? ISD::PRE_INC : ISD::PRE_DEC; + AM = ISD::PRE_INC; return true; } @@ -19431,15 +19329,14 @@ bool RISCVTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op, } else return false; - bool IsInc; - if (!getIndexedAddressParts(Op, Base, Offset, AM, IsInc, DAG)) + if (!getIndexedAddressParts(Op, Base, Offset, AM, DAG)) return false; // Post-indexing updates the base, so it's not a valid transform // if that's not the same as the load's pointer. if (Ptr != Base) return false; - AM = IsInc ? ISD::POST_INC : ISD::POST_DEC; + AM = ISD::POST_INC; return true; } diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h index 58ed611efc83..5d51fe168b04 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -253,6 +253,9 @@ enum NodeType : unsigned { SSUBSAT_VL, USUBSAT_VL, + // Averaging adds of unsigned integers. + AVGFLOORU_VL, + MULHS_VL, MULHU_VL, FADD_VL, @@ -330,9 +333,8 @@ enum NodeType : unsigned { // operand is VL. SETCC_VL, - // Vector select with an additional VL operand. This operation is unmasked. - VSELECT_VL, // General vmerge node with mask, true, false, passthru, and vl operands. + // Tail agnostic vselect can be implemented by setting passthru to undef. VMERGE_VL, // Mask binary operators. @@ -526,7 +528,8 @@ public: InstructionCost getVRGatherVVCost(MVT VT) const; InstructionCost getVRGatherVICost(MVT VT) const; - InstructionCost getVSlideCost(MVT VT) const; + InstructionCost getVSlideVXCost(MVT VT) const; + InstructionCost getVSlideVICost(MVT VT) const; // Provide custom lowering hooks for some operations. SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; @@ -774,8 +777,7 @@ public: bool isVScaleKnownToBeAPowerOfTwo() const override; bool getIndexedAddressParts(SDNode *Op, SDValue &Base, SDValue &Offset, - ISD::MemIndexedMode &AM, bool &IsInc, - SelectionDAG &DAG) const; + ISD::MemIndexedMode &AM, SelectionDAG &DAG) const; bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override; @@ -903,6 +905,7 @@ private: SDValue lowerFixedLengthVectorSelectToRVV(SDValue Op, SelectionDAG &DAG) const; SDValue lowerToScalableOp(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerUnsignedAvgFloor(SDValue Op, SelectionDAG &DAG) const; SDValue LowerIS_FPCLASS(SDValue Op, SelectionDAG &DAG) const; SDValue lowerVPOp(SDValue Op, SelectionDAG &DAG) const; SDValue lowerLogicVPOp(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp index 3400b24e0abb..e591aa935c0b 100644 --- a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp +++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp @@ -1381,6 +1381,11 @@ void RISCVInsertVSETVLI::doPRE(MachineBasicBlock &MBB) { if (!UnavailablePred || !AvailableInfo.isValid()) return; + // If we don't know the exact VTYPE, we can't copy the vsetvli to the exit of + // the unavailable pred. + if (AvailableInfo.hasSEWLMULRatioOnly()) + return; + // Critical edge - TODO: consider splitting? if (UnavailablePred->succ_size() != 1) return; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp index cd98438eed88..351f48c1708e 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp @@ -1346,6 +1346,10 @@ unsigned getPredicatedOpcode(unsigned Opcode) { case RISCV::SLLIW: return RISCV::PseudoCCSLLIW; break; case RISCV::SRLIW: return RISCV::PseudoCCSRLIW; break; case RISCV::SRAIW: return RISCV::PseudoCCSRAIW; break; + + case RISCV::ANDN: return RISCV::PseudoCCANDN; break; + case RISCV::ORN: return RISCV::PseudoCCORN; break; + case RISCV::XNOR: return RISCV::PseudoCCXNOR; break; } return RISCV::INSTRUCTION_LIST_END; @@ -2365,7 +2369,6 @@ RISCVInstrInfo::getSerializableDirectMachineOperandTargetFlags() const { using namespace RISCVII; static const std::pair<unsigned, const char *> TargetFlags[] = { {MO_CALL, "riscv-call"}, - {MO_PLT, "riscv-plt"}, {MO_LO, "riscv-lo"}, {MO_HI, "riscv-hi"}, {MO_PCREL_LO, "riscv-pcrel-lo"}, @@ -2651,6 +2654,7 @@ bool RISCVInstrInfo::findCommutedOpIndices(const MachineInstr &MI, case RISCV::TH_MULSH: // Operands 2 and 3 are commutable. return fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, 2, 3); + case RISCV::PseudoCCMOVGPRNoX0: case RISCV::PseudoCCMOVGPR: // Operands 4 and 5 are commutable. return fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, 4, 5); @@ -2807,6 +2811,7 @@ MachineInstr *RISCVInstrInfo::commuteInstructionImpl(MachineInstr &MI, return TargetInstrInfo::commuteInstructionImpl(WorkingMI, false, OpIdx1, OpIdx2); } + case RISCV::PseudoCCMOVGPRNoX0: case RISCV::PseudoCCMOVGPR: { // CCMOV can be commuted by inverting the condition. auto CC = static_cast<RISCVCC::CondCode>(MI.getOperand(3).getImm()); diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/llvm/lib/Target/RISCV/RISCVInstrInfo.td index 35e8edf5d2fa..792e0bbdf581 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td @@ -729,22 +729,6 @@ def UNIMP : RVInstI<0b001, OPC_SYSTEM, (outs), (ins), "unimp", "">, let imm12 = 0b110000000000; } -let Predicates = [HasStdExtZawrs] in { -def WRS_NTO : RVInstI<0b000, OPC_SYSTEM, (outs), (ins), "wrs.nto", "">, - Sched<[]> { - let rs1 = 0; - let rd = 0; - let imm12 = 0b000000001101; -} - -def WRS_STO : RVInstI<0b000, OPC_SYSTEM, (outs), (ins), "wrs.sto", "">, - Sched<[]> { - let rs1 = 0; - let rd = 0; - let imm12 = 0b000000011101; -} -} // Predicates = [HasStdExtZawrs] - } // hasSideEffects = 1, mayLoad = 0, mayStore = 0 def CSRRW : CSR_ir<0b001, "csrrw">; @@ -1387,6 +1371,24 @@ def PseudoCCMOVGPR : Pseudo<(outs GPR:$dst), ReadSFBALU, ReadSFBALU]>; } +// This should always expand to a branch+c.mv so the size is 6 or 4 if the +// branch is compressible. +let Predicates = [HasConditionalMoveFusion, NoShortForwardBranchOpt], + Constraints = "$dst = $falsev", isCommutable = 1, Size = 6 in { +// This instruction moves $truev to $dst when the condition is true. It will +// be expanded to control flow in RISCVExpandPseudoInsts. +// We use GPRNoX0 because c.mv cannot encode X0. +def PseudoCCMOVGPRNoX0 : Pseudo<(outs GPRNoX0:$dst), + (ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc, + GPRNoX0:$falsev, GPRNoX0:$truev), + [(set GPRNoX0:$dst, + (riscv_selectcc_frag:$cc (XLenVT GPR:$lhs), + (XLenVT GPR:$rhs), + cond, (XLenVT GPRNoX0:$truev), + (XLenVT GPRNoX0:$falsev)))]>, + Sched<[]>; +} + // Conditional binops, that updates update $dst to (op rs1, rs2) when condition // is true. Returns $falsev otherwise. Selected by optimizeSelect. // TODO: Can we use DefaultOperands on the regular binop to accomplish this more @@ -1517,6 +1519,23 @@ def PseudoCCSRAIW : Pseudo<(outs GPR:$dst), GPR:$falsev, GPR:$rs1, simm12:$rs2), []>, Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU, ReadSFBALU]>; + +// Zbb/Zbkb instructions +def PseudoCCANDN : Pseudo<(outs GPR:$dst), + (ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc, + GPR:$falsev, GPR:$rs1, GPR:$rs2), []>, + Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, + ReadSFBALU, ReadSFBALU, ReadSFBALU]>; +def PseudoCCORN : Pseudo<(outs GPR:$dst), + (ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc, + GPR:$falsev, GPR:$rs1, GPR:$rs2), []>, + Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, + ReadSFBALU, ReadSFBALU, ReadSFBALU]>; +def PseudoCCXNOR : Pseudo<(outs GPR:$dst), + (ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc, + GPR:$falsev, GPR:$rs1, GPR:$rs2), []>, + Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, + ReadSFBALU, ReadSFBALU, ReadSFBALU]>; } multiclass SelectCC_GPR_rrirr<DAGOperand valty, ValueType vt> { @@ -1535,7 +1554,7 @@ multiclass SelectCC_GPR_rrirr<DAGOperand valty, ValueType vt> { (IntCCtoRISCVCC $cc), valty:$truev, valty:$falsev)>; } -let Predicates = [NoShortForwardBranchOpt] in +let Predicates = [NoConditionalMoveFusion] in defm Select_GPR : SelectCC_GPR_rrirr<GPR, XLenVT>; class SelectCompressOpt<CondCode Cond> @@ -2095,6 +2114,7 @@ include "RISCVInstrInfoM.td" // Atomic include "RISCVInstrInfoA.td" +include "RISCVInstrInfoZa.td" // Scalar FP include "RISCVInstrInfoF.td" diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoA.td b/llvm/lib/Target/RISCV/RISCVInstrInfoA.td index c8301fcc6b93..4d0567e41abc 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoA.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoA.td @@ -7,8 +7,7 @@ //===----------------------------------------------------------------------===// // // This file describes the RISC-V instructions from the standard 'A', Atomic -// Instructions extension as well as the experimental 'Zacas' (Atomic -// Compare-and-Swap) extension. +// Instructions extension. // //===----------------------------------------------------------------------===// @@ -96,15 +95,6 @@ defm AMOMAXU_D : AMO_rr_aq_rl<0b11100, 0b011, "amomaxu.d">, Sched<[WriteAtomicD, ReadAtomicDA, ReadAtomicDD]>; } // Predicates = [HasStdExtA, IsRV64] -let Predicates = [HasStdExtZacas] in { -defm AMOCAS_W : AMO_rr_aq_rl<0b00101, 0b010, "amocas.w">; -defm AMOCAS_D : AMO_rr_aq_rl<0b00101, 0b011, "amocas.d">; -} // Predicates = [HasStdExtZacas] - -let Predicates = [HasStdExtZacas, IsRV64] in { -defm AMOCAS_Q : AMO_rr_aq_rl<0b00101, 0b100, "amocas.q">; -} // Predicates = [HasStdExtZacas, IsRV64] - //===----------------------------------------------------------------------===// // Pseudo-instructions and codegen patterns //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoD.td b/llvm/lib/Target/RISCV/RISCVInstrInfoD.td index 6af710049a9d..418421b2a556 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoD.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoD.td @@ -36,11 +36,13 @@ def AddrRegImmINX : ComplexPattern<iPTR, 2, "SelectAddrRegImmINX">; def GPRPF64AsFPR : AsmOperandClass { let Name = "GPRPF64AsFPR"; let ParserMethod = "parseGPRAsFPR"; + let PredicateMethod = "isGPRAsFPR"; let RenderMethod = "addRegOperands"; } def GPRF64AsFPR : AsmOperandClass { let Name = "GPRF64AsFPR"; + let PredicateMethod = "isGPRAsFPR"; let ParserMethod = "parseGPRAsFPR"; let RenderMethod = "addRegOperands"; } diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td index 30deeaa06448..fcb18b67623e 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td @@ -6719,12 +6719,14 @@ defm PseudoVMSET : VPseudoNullaryPseudoM<"VMXNOR">; // 15.2. Vector mask population count vcpop //===----------------------------------------------------------------------===// +let IsSignExtendingOpW = 1 in defm PseudoVCPOP: VPseudoVPOP_M; //===----------------------------------------------------------------------===// // 15.3. vfirst find-first-set mask bit //===----------------------------------------------------------------------===// +let IsSignExtendingOpW = 1 in defm PseudoVFIRST: VPseudoV1ST_M; //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td index b7c845703794..4f87c36506e5 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td @@ -1131,6 +1131,22 @@ defm : VPatBinarySDNode_VV_VX_VI<uaddsat, "PseudoVSADDU">; defm : VPatBinarySDNode_VV_VX<ssubsat, "PseudoVSSUB">; defm : VPatBinarySDNode_VV_VX<usubsat, "PseudoVSSUBU">; +// 12.2. Vector Single-Width Averaging Add and Subtract +foreach vti = AllIntegerVectors in { + let Predicates = GetVTypePredicates<vti>.Predicates in { + def : Pat<(avgflooru (vti.Vector vti.RegClass:$rs1), + (vti.Vector vti.RegClass:$rs2)), + (!cast<Instruction>("PseudoVAADDU_VV_"#vti.LMul.MX) + (vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs1, vti.RegClass:$rs2, + 0b10, vti.AVL, vti.Log2SEW, TA_MA)>; + def : Pat<(avgflooru (vti.Vector vti.RegClass:$rs1), + (vti.Vector (SplatPat (XLenVT GPR:$rs2)))), + (!cast<Instruction>("PseudoVAADDU_VX_"#vti.LMul.MX) + (vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs1, GPR:$rs2, + 0b10, vti.AVL, vti.Log2SEW, TA_MA)>; + } +} + // 15. Vector Mask Instructions // 15.1. Vector Mask-Register Logical Instructions diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td index 5b50a4a78c01..d60ff4b5fab0 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td @@ -111,6 +111,7 @@ def riscv_ctlz_vl : SDNode<"RISCVISD::CTLZ_VL", SDT_RISCVIntUnOp_VL> def riscv_cttz_vl : SDNode<"RISCVISD::CTTZ_VL", SDT_RISCVIntUnOp_VL>; def riscv_ctpop_vl : SDNode<"RISCVISD::CTPOP_VL", SDT_RISCVIntUnOp_VL>; +def riscv_avgflooru_vl : SDNode<"RISCVISD::AVGFLOORU_VL", SDT_RISCVIntBinOp_VL, [SDNPCommutative]>; def riscv_saddsat_vl : SDNode<"RISCVISD::SADDSAT_VL", SDT_RISCVIntBinOp_VL, [SDNPCommutative]>; def riscv_uaddsat_vl : SDNode<"RISCVISD::UADDSAT_VL", SDT_RISCVIntBinOp_VL, [SDNPCommutative]>; def riscv_ssubsat_vl : SDNode<"RISCVISD::SSUBSAT_VL", SDT_RISCVIntBinOp_VL>; @@ -338,13 +339,6 @@ def riscv_vrgatherei16_vv_vl : SDNode<"RISCVISD::VRGATHEREI16_VV_VL", SDTCisSameNumEltsAs<0, 4>, SDTCisVT<5, XLenVT>]>>; -def SDT_RISCVSelect_VL : SDTypeProfile<1, 4, [ - SDTCisVec<0>, SDTCisVec<1>, SDTCisSameNumEltsAs<0, 1>, SDTCVecEltisVT<1, i1>, - SDTCisSameAs<0, 2>, SDTCisSameAs<2, 3>, SDTCisVT<4, XLenVT> -]>; - -def riscv_vselect_vl : SDNode<"RISCVISD::VSELECT_VL", SDT_RISCVSelect_VL>; - def SDT_RISCVVMERGE_VL : SDTypeProfile<1, 5, [ SDTCisVec<0>, SDTCisVec<1>, SDTCisSameNumEltsAs<0, 1>, SDTCVecEltisVT<1, i1>, SDTCisSameAs<0, 2>, SDTCisSameAs<2, 3>, SDTCisSameAs<0, 4>, @@ -1722,21 +1716,21 @@ multiclass VPatMultiplyAccVL_VV_VX<PatFrag op, string instruction_name> { (!cast<Instruction>(instruction_name#"_VX_"# suffix #"_MASK") vti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2, (vti.Mask V0), GPR:$vl, vti.Log2SEW, TU_MU)>; - def : Pat<(riscv_vselect_vl (vti.Mask V0), + def : Pat<(riscv_vmerge_vl (vti.Mask V0), (vti.Vector (op vti.RegClass:$rd, (riscv_mul_vl_oneuse vti.RegClass:$rs1, vti.RegClass:$rs2, srcvalue, (vti.Mask true_mask), VLOpFrag), srcvalue, (vti.Mask true_mask), VLOpFrag)), - vti.RegClass:$rd, VLOpFrag), + vti.RegClass:$rd, undef, VLOpFrag), (!cast<Instruction>(instruction_name#"_VV_"# suffix #"_MASK") vti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2, (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; - def : Pat<(riscv_vselect_vl (vti.Mask V0), + def : Pat<(riscv_vmerge_vl (vti.Mask V0), (vti.Vector (op vti.RegClass:$rd, (riscv_mul_vl_oneuse (SplatPat XLenVT:$rs1), vti.RegClass:$rs2, srcvalue, (vti.Mask true_mask), VLOpFrag), srcvalue, (vti.Mask true_mask), VLOpFrag)), - vti.RegClass:$rd, VLOpFrag), + vti.RegClass:$rd, undef, VLOpFrag), (!cast<Instruction>(instruction_name#"_VX_"# suffix #"_MASK") vti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2, (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; @@ -1861,17 +1855,17 @@ multiclass VPatFPMulAccVL_VV_VF<PatFrag vop, string instruction_name> { (!cast<Instruction>(instruction_name#"_V" # vti.ScalarSuffix # "_" # suffix # "_MASK") vti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2, (vti.Mask V0), GPR:$vl, vti.Log2SEW, TU_MU)>; - def : Pat<(riscv_vselect_vl (vti.Mask V0), + def : Pat<(riscv_vmerge_vl (vti.Mask V0), (vti.Vector (vop vti.RegClass:$rs1, vti.RegClass:$rs2, vti.RegClass:$rd, (vti.Mask true_mask), VLOpFrag)), - vti.RegClass:$rd, VLOpFrag), + vti.RegClass:$rd, undef, VLOpFrag), (!cast<Instruction>(instruction_name#"_VV_"# suffix #"_MASK") vti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2, (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; - def : Pat<(riscv_vselect_vl (vti.Mask V0), + def : Pat<(riscv_vmerge_vl (vti.Mask V0), (vti.Vector (vop (SplatFPOp vti.ScalarRegClass:$rs1), vti.RegClass:$rs2, vti.RegClass:$rd, (vti.Mask true_mask), VLOpFrag)), - vti.RegClass:$rd, VLOpFrag), + vti.RegClass:$rd, undef, VLOpFrag), (!cast<Instruction>(instruction_name#"_V" # vti.ScalarSuffix # "_" # suffix # "_MASK") vti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2, (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; @@ -1905,10 +1899,10 @@ multiclass VPatFPMulAccVL_VV_VF_RM<PatFrag vop, string instruction_name> { // RISCVInsertReadWriteCSR FRM_DYN, GPR:$vl, vti.Log2SEW, TU_MU)>; - def : Pat<(riscv_vselect_vl (vti.Mask V0), + def : Pat<(riscv_vmerge_vl (vti.Mask V0), (vti.Vector (vop vti.RegClass:$rs1, vti.RegClass:$rs2, vti.RegClass:$rd, (vti.Mask true_mask), VLOpFrag)), - vti.RegClass:$rd, VLOpFrag), + vti.RegClass:$rd, undef, VLOpFrag), (!cast<Instruction>(instruction_name#"_VV_"# suffix #"_MASK") vti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2, (vti.Mask V0), @@ -1916,10 +1910,10 @@ multiclass VPatFPMulAccVL_VV_VF_RM<PatFrag vop, string instruction_name> { // RISCVInsertReadWriteCSR FRM_DYN, GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; - def : Pat<(riscv_vselect_vl (vti.Mask V0), + def : Pat<(riscv_vmerge_vl (vti.Mask V0), (vti.Vector (vop (SplatFPOp vti.ScalarRegClass:$rs1), vti.RegClass:$rs2, vti.RegClass:$rd, (vti.Mask true_mask), VLOpFrag)), - vti.RegClass:$rd, VLOpFrag), + vti.RegClass:$rd, undef, VLOpFrag), (!cast<Instruction>(instruction_name#"_V" # vti.ScalarSuffix # "_" # suffix # "_MASK") vti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2, (vti.Mask V0), @@ -2255,31 +2249,6 @@ foreach vtiTowti = AllWidenableIntVectors in { // 11.15. Vector Integer Merge Instructions foreach vti = AllIntegerVectors in { let Predicates = GetVTypePredicates<vti>.Predicates in { - def : Pat<(vti.Vector (riscv_vselect_vl (vti.Mask V0), - vti.RegClass:$rs1, - vti.RegClass:$rs2, - VLOpFrag)), - (!cast<Instruction>("PseudoVMERGE_VVM_"#vti.LMul.MX) - (vti.Vector (IMPLICIT_DEF)), - vti.RegClass:$rs2, vti.RegClass:$rs1, (vti.Mask V0), - GPR:$vl, vti.Log2SEW)>; - - def : Pat<(vti.Vector (riscv_vselect_vl (vti.Mask V0), - (SplatPat XLenVT:$rs1), - vti.RegClass:$rs2, - VLOpFrag)), - (!cast<Instruction>("PseudoVMERGE_VXM_"#vti.LMul.MX) - (vti.Vector (IMPLICIT_DEF)), - vti.RegClass:$rs2, GPR:$rs1, (vti.Mask V0), GPR:$vl, vti.Log2SEW)>; - - def : Pat<(vti.Vector (riscv_vselect_vl (vti.Mask V0), - (SplatPat_simm5 simm5:$rs1), - vti.RegClass:$rs2, - VLOpFrag)), - (!cast<Instruction>("PseudoVMERGE_VIM_"#vti.LMul.MX) - (vti.Vector (IMPLICIT_DEF)), - vti.RegClass:$rs2, simm5:$rs1, (vti.Mask V0), GPR:$vl, vti.Log2SEW)>; - def : Pat<(vti.Vector (riscv_vmerge_vl (vti.Mask V0), vti.RegClass:$rs1, vti.RegClass:$rs2, @@ -2338,6 +2307,24 @@ defm : VPatBinaryVL_VV_VX_VI<riscv_uaddsat_vl, "PseudoVSADDU">; defm : VPatBinaryVL_VV_VX<riscv_ssubsat_vl, "PseudoVSSUB">; defm : VPatBinaryVL_VV_VX<riscv_usubsat_vl, "PseudoVSSUBU">; +// 12.2. Vector Single-Width Averaging Add and Subtract +foreach vti = AllIntegerVectors in { + let Predicates = GetVTypePredicates<vti>.Predicates in { + def : Pat<(riscv_avgflooru_vl (vti.Vector vti.RegClass:$rs1), + (vti.Vector vti.RegClass:$rs2), + vti.RegClass:$merge, (vti.Mask V0), VLOpFrag), + (!cast<Instruction>("PseudoVAADDU_VV_"#vti.LMul.MX#"_MASK") + vti.RegClass:$merge, vti.RegClass:$rs1, vti.RegClass:$rs2, + (vti.Mask V0), 0b10, GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; + def : Pat<(riscv_avgflooru_vl (vti.Vector vti.RegClass:$rs1), + (vti.Vector (SplatPat (XLenVT GPR:$rs2))), + vti.RegClass:$merge, (vti.Mask V0), VLOpFrag), + (!cast<Instruction>("PseudoVAADDU_VX_"#vti.LMul.MX#"_MASK") + vti.RegClass:$merge, vti.RegClass:$rs1, GPR:$rs2, + (vti.Mask V0), 0b10, GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; + } +} + // 12.5. Vector Narrowing Fixed-Point Clip Instructions class VPatTruncSatClipMaxMinBase<string inst, VTypeInfo vti, @@ -2534,33 +2521,6 @@ foreach fvti = AllFloatVectors in { // 13.15. Vector Floating-Point Merge Instruction defvar ivti = GetIntVTypeInfo<fvti>.Vti; let Predicates = GetVTypePredicates<ivti>.Predicates in { - def : Pat<(fvti.Vector (riscv_vselect_vl (fvti.Mask V0), - fvti.RegClass:$rs1, - fvti.RegClass:$rs2, - VLOpFrag)), - (!cast<Instruction>("PseudoVMERGE_VVM_"#fvti.LMul.MX) - (fvti.Vector (IMPLICIT_DEF)), - fvti.RegClass:$rs2, fvti.RegClass:$rs1, (fvti.Mask V0), - GPR:$vl, fvti.Log2SEW)>; - - def : Pat<(fvti.Vector (riscv_vselect_vl (fvti.Mask V0), - (SplatFPOp (SelectFPImm (XLenVT GPR:$imm))), - fvti.RegClass:$rs2, - VLOpFrag)), - (!cast<Instruction>("PseudoVMERGE_VXM_"#fvti.LMul.MX) - (fvti.Vector (IMPLICIT_DEF)), - fvti.RegClass:$rs2, - GPR:$imm, - (fvti.Mask V0), GPR:$vl, fvti.Log2SEW)>; - - def : Pat<(fvti.Vector (riscv_vselect_vl (fvti.Mask V0), - (SplatFPOp (fvti.Scalar fpimm0)), - fvti.RegClass:$rs2, - VLOpFrag)), - (!cast<Instruction>("PseudoVMERGE_VIM_"#fvti.LMul.MX) - (fvti.Vector (IMPLICIT_DEF)), - fvti.RegClass:$rs2, 0, (fvti.Mask V0), GPR:$vl, fvti.Log2SEW)>; - def : Pat<(fvti.Vector (riscv_vmerge_vl (fvti.Mask V0), fvti.RegClass:$rs1, fvti.RegClass:$rs2, @@ -2571,6 +2531,16 @@ foreach fvti = AllFloatVectors in { GPR:$vl, fvti.Log2SEW)>; def : Pat<(fvti.Vector (riscv_vmerge_vl (fvti.Mask V0), + (SplatFPOp (SelectFPImm (XLenVT GPR:$imm))), + fvti.RegClass:$rs2, + fvti.RegClass:$merge, + VLOpFrag)), + (!cast<Instruction>("PseudoVMERGE_VXM_"#fvti.LMul.MX) + fvti.RegClass:$merge, fvti.RegClass:$rs2, GPR:$imm, (fvti.Mask V0), + GPR:$vl, fvti.Log2SEW)>; + + + def : Pat<(fvti.Vector (riscv_vmerge_vl (fvti.Mask V0), (SplatFPOp (fvti.Scalar fpimm0)), fvti.RegClass:$rs2, fvti.RegClass:$merge, @@ -2581,16 +2551,6 @@ foreach fvti = AllFloatVectors in { } let Predicates = GetVTypePredicates<fvti>.Predicates in { - def : Pat<(fvti.Vector (riscv_vselect_vl (fvti.Mask V0), - (SplatFPOp fvti.ScalarRegClass:$rs1), - fvti.RegClass:$rs2, - VLOpFrag)), - (!cast<Instruction>("PseudoVFMERGE_V"#fvti.ScalarSuffix#"M_"#fvti.LMul.MX) - (fvti.Vector (IMPLICIT_DEF)), - fvti.RegClass:$rs2, - (fvti.Scalar fvti.ScalarRegClass:$rs1), - (fvti.Mask V0), GPR:$vl, fvti.Log2SEW)>; - def : Pat<(fvti.Vector (riscv_vmerge_vl (fvti.Mask V0), (SplatFPOp fvti.ScalarRegClass:$rs1), fvti.RegClass:$rs2, diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZa.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZa.td new file mode 100644 index 000000000000..a09f5715b24f --- /dev/null +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZa.td @@ -0,0 +1,44 @@ +//===-- RISCVInstrInfoZa.td - RISC-V Atomic instructions ---*- tablegen -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file describes the RISC-V instructions from the standard atomic 'Za*' +// extensions: +// - Zawrs (v1.0) : Wait-on-Reservation-Set. +// - Zacas (v1.0-rc1) : Atomic Compare-and-Swap. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Zacas (Atomic Compare-and-Swap) +//===----------------------------------------------------------------------===// + +let Predicates = [HasStdExtZacas] in { +defm AMOCAS_W : AMO_rr_aq_rl<0b00101, 0b010, "amocas.w">; +defm AMOCAS_D : AMO_rr_aq_rl<0b00101, 0b011, "amocas.d">; +} // Predicates = [HasStdExtZacas] + +let Predicates = [HasStdExtZacas, IsRV64] in { +defm AMOCAS_Q : AMO_rr_aq_rl<0b00101, 0b100, "amocas.q">; +} // Predicates = [HasStdExtZacas, IsRV64] + +//===----------------------------------------------------------------------===// +// Zawrs (Wait-on-Reservation-Set) +//===----------------------------------------------------------------------===// + +let hasSideEffects = 1, mayLoad = 0, mayStore = 0 in +class WRSInst<bits<12> funct12, string opcodestr> + : RVInstI<0b000, OPC_SYSTEM, (outs), (ins), opcodestr, ""> { + let rs1 = 0; + let rd = 0; + let imm12 = funct12; +} + +let Predicates = [HasStdExtZawrs] in { +def WRS_NTO : WRSInst<0b000000001101, "wrs.nto">, Sched<[]>; +def WRS_STO : WRSInst<0b000000011101, "wrs.sto">, Sched<[]>; +} // Predicates = [HasStdExtZawrs] diff --git a/llvm/lib/Target/RISCV/RISCVOptWInstrs.cpp b/llvm/lib/Target/RISCV/RISCVOptWInstrs.cpp index 2c2b34bb5b77..c16eee67f3c5 100644 --- a/llvm/lib/Target/RISCV/RISCVOptWInstrs.cpp +++ b/llvm/lib/Target/RISCV/RISCVOptWInstrs.cpp @@ -126,7 +126,11 @@ static bool hasAllNBitUsers(const MachineInstr &OrigMI, if (MI->getNumExplicitDefs() != 1) return false; - for (auto &UserOp : MRI.use_nodbg_operands(MI->getOperand(0).getReg())) { + Register DestReg = MI->getOperand(0).getReg(); + if (!DestReg.isVirtual()) + return false; + + for (auto &UserOp : MRI.use_nodbg_operands(DestReg)) { const MachineInstr *UserMI = UserOp.getParent(); unsigned OpIdx = UserOp.getOperandNo(); diff --git a/llvm/lib/Target/RISCV/RISCVProcessors.td b/llvm/lib/Target/RISCV/RISCVProcessors.td index ba8996e710ed..52800f086129 100644 --- a/llvm/lib/Target/RISCV/RISCVProcessors.td +++ b/llvm/lib/Target/RISCV/RISCVProcessors.td @@ -232,7 +232,8 @@ def SIFIVE_P450 : RISCVProcessorModel<"sifive-p450", NoSchedModel, FeatureStdExtZba, FeatureStdExtZbb, FeatureStdExtZbs, - FeatureStdExtZfhmin]>; + FeatureStdExtZfhmin], + [TuneConditionalCompressedMoveFusion]>; def SYNTACORE_SCR1_BASE : RISCVProcessorModel<"syntacore-scr1-base", SyntacoreSCR1Model, diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.td b/llvm/lib/Target/RISCV/RISCVRegisterInfo.td index 840fd149d681..a59d058382fe 100644 --- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.td +++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.td @@ -487,7 +487,7 @@ defvar VMaskVTs = [vbool1_t, vbool2_t, vbool4_t, vbool8_t, vbool16_t, defvar VM1VTs = [vint8m1_t, vint16m1_t, vint32m1_t, vint64m1_t, vbfloat16m1_t, vfloat16m1_t, vfloat32m1_t, vfloat64m1_t, vint8mf2_t, vint8mf4_t, vint8mf8_t, - vint16mf2_t, vint16mf4_t, vint32mf2_t, + vint16mf2_t, vint16mf4_t, vint32mf2_t, vfloat16mf4_t, vfloat16mf2_t, vbfloat16mf4_t, vbfloat16mf2_t, vfloat32mf2_t]; diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.h b/llvm/lib/Target/RISCV/RISCVSubtarget.h index 26320b05d9be..2ba93764facd 100644 --- a/llvm/lib/Target/RISCV/RISCVSubtarget.h +++ b/llvm/lib/Target/RISCV/RISCVSubtarget.h @@ -150,6 +150,13 @@ public: bool hasHalfFPLoadStoreMove() const { return HasStdExtZfhmin || HasStdExtZfbfmin; } + + bool hasConditionalMoveFusion() const { + // Do we support fusing a branch+mv or branch+c.mv as a conditional move. + return (hasConditionalCompressedMoveFusion() && hasStdExtCOrZca()) || + hasShortForwardBranchOpt(); + } + bool is64Bit() const { return IsRV64; } MVT getXLenVT() const { return is64Bit() ? MVT::i64 : MVT::i32; diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp index 4614446b2150..b3916c987005 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp @@ -34,6 +34,65 @@ static cl::opt<unsigned> SLPMaxVF( "exclusively by SLP vectorizer."), cl::Hidden); +InstructionCost +RISCVTTIImpl::getRISCVInstructionCost(ArrayRef<unsigned> OpCodes, MVT VT, + TTI::TargetCostKind CostKind) { + size_t NumInstr = OpCodes.size(); + if (CostKind == TTI::TCK_CodeSize) + return NumInstr; + InstructionCost LMULCost = TLI->getLMULCost(VT); + if ((CostKind != TTI::TCK_RecipThroughput) && (CostKind != TTI::TCK_Latency)) + return LMULCost * NumInstr; + InstructionCost Cost = 0; + for (auto Op : OpCodes) { + switch (Op) { + case RISCV::VRGATHER_VI: + Cost += TLI->getVRGatherVICost(VT); + break; + case RISCV::VRGATHER_VV: + Cost += TLI->getVRGatherVVCost(VT); + break; + case RISCV::VSLIDEUP_VI: + case RISCV::VSLIDEDOWN_VI: + Cost += TLI->getVSlideVICost(VT); + break; + case RISCV::VSLIDEUP_VX: + case RISCV::VSLIDEDOWN_VX: + Cost += TLI->getVSlideVXCost(VT); + break; + case RISCV::VREDMAX_VS: + case RISCV::VREDMIN_VS: + case RISCV::VREDMAXU_VS: + case RISCV::VREDMINU_VS: + case RISCV::VREDSUM_VS: + case RISCV::VREDAND_VS: + case RISCV::VREDOR_VS: + case RISCV::VREDXOR_VS: + case RISCV::VFREDMAX_VS: + case RISCV::VFREDMIN_VS: + case RISCV::VFREDUSUM_VS: { + unsigned VL = VT.getVectorMinNumElements(); + if (!VT.isFixedLengthVector()) + VL *= *getVScaleForTuning(); + Cost += Log2_32_Ceil(VL); + break; + } + case RISCV::VFREDOSUM_VS: { + unsigned VL = VT.getVectorMinNumElements(); + if (!VT.isFixedLengthVector()) + VL *= *getVScaleForTuning(); + Cost += VL; + break; + } + case RISCV::VMV_S_X: + // FIXME: VMV_S_X doesn't use LMUL, the cost should be 1 + default: + Cost += LMULCost; + } + } + return Cost; +} + InstructionCost RISCVTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind) { assert(Ty->isIntegerTy() && @@ -281,7 +340,8 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, // Example sequence: // vnsrl.wi v10, v8, 0 if (equal(DeinterleaveMask, Mask)) - return LT.first * TLI->getLMULCost(LT.second); + return LT.first * getRISCVInstructionCost(RISCV::VNSRL_WI, + LT.second, CostKind); } } } @@ -292,7 +352,8 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, LT.second.getVectorNumElements() <= 256)) { VectorType *IdxTy = getVRGatherIndexType(LT.second, *ST, Tp->getContext()); InstructionCost IndexCost = getConstantPoolLoadCost(IdxTy, CostKind); - return IndexCost + TLI->getVRGatherVVCost(LT.second); + return IndexCost + + getRISCVInstructionCost(RISCV::VRGATHER_VV, LT.second, CostKind); } [[fallthrough]]; } @@ -310,7 +371,10 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, VectorType *MaskTy = VectorType::get(IntegerType::getInt1Ty(C), EC); InstructionCost IndexCost = getConstantPoolLoadCost(IdxTy, CostKind); InstructionCost MaskCost = getConstantPoolLoadCost(MaskTy, CostKind); - return 2 * IndexCost + 2 * TLI->getVRGatherVVCost(LT.second) + MaskCost; + return 2 * IndexCost + + getRISCVInstructionCost({RISCV::VRGATHER_VV, RISCV::VRGATHER_VV}, + LT.second, CostKind) + + MaskCost; } [[fallthrough]]; } @@ -365,19 +429,24 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, // Example sequence: // vsetivli zero, 4, e8, mf2, tu, ma (ignored) // vslidedown.vi v8, v9, 2 - return LT.first * TLI->getVSlideCost(LT.second); + return LT.first * + getRISCVInstructionCost(RISCV::VSLIDEDOWN_VI, LT.second, CostKind); case TTI::SK_InsertSubvector: // Example sequence: // vsetivli zero, 4, e8, mf2, tu, ma (ignored) // vslideup.vi v8, v9, 2 - return LT.first * TLI->getVSlideCost(LT.second); + return LT.first * + getRISCVInstructionCost(RISCV::VSLIDEUP_VI, LT.second, CostKind); case TTI::SK_Select: { // Example sequence: // li a0, 90 // vsetivli zero, 8, e8, mf2, ta, ma (ignored) // vmv.s.x v0, a0 // vmerge.vvm v8, v9, v8, v0 - return LT.first * 3 * TLI->getLMULCost(LT.second); + return LT.first * + (TLI->getLMULCost(LT.second) + // FIXME: should be 1 for li + getRISCVInstructionCost({RISCV::VMV_S_X, RISCV::VMERGE_VVM}, + LT.second, CostKind)); } case TTI::SK_Broadcast: { bool HasScalar = (Args.size() > 0) && (Operator::getOpcode(Args[0]) == @@ -389,7 +458,10 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, // vsetivli zero, 2, e8, mf8, ta, ma (ignored) // vmv.v.x v8, a0 // vmsne.vi v0, v8, 0 - return LT.first * TLI->getLMULCost(LT.second) * 3; + return LT.first * + (TLI->getLMULCost(LT.second) + // FIXME: should be 1 for andi + getRISCVInstructionCost({RISCV::VMV_V_X, RISCV::VMSNE_VI}, + LT.second, CostKind)); } // Example sequence: // vsetivli zero, 2, e8, mf8, ta, mu (ignored) @@ -400,24 +472,38 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, // vmv.v.x v8, a0 // vmsne.vi v0, v8, 0 - return LT.first * TLI->getLMULCost(LT.second) * 6; + return LT.first * + (TLI->getLMULCost(LT.second) + // FIXME: this should be 1 for andi + TLI->getLMULCost( + LT.second) + // FIXME: vmv.x.s is the same as extractelement + getRISCVInstructionCost({RISCV::VMV_V_I, RISCV::VMERGE_VIM, + RISCV::VMV_V_X, RISCV::VMSNE_VI}, + LT.second, CostKind)); } if (HasScalar) { // Example sequence: // vmv.v.x v8, a0 - return LT.first * TLI->getLMULCost(LT.second); + return LT.first * + getRISCVInstructionCost(RISCV::VMV_V_X, LT.second, CostKind); } // Example sequence: // vrgather.vi v9, v8, 0 - return LT.first * TLI->getVRGatherVICost(LT.second); + return LT.first * + getRISCVInstructionCost(RISCV::VRGATHER_VI, LT.second, CostKind); } - case TTI::SK_Splice: + case TTI::SK_Splice: { // vslidedown+vslideup. // TODO: Multiplying by LT.first implies this legalizes into multiple copies // of similar code, but I think we expand through memory. - return 2 * LT.first * TLI->getVSlideCost(LT.second); + unsigned Opcodes[2] = {RISCV::VSLIDEDOWN_VX, RISCV::VSLIDEUP_VX}; + if (Index >= 0 && Index < 32) + Opcodes[0] = RISCV::VSLIDEDOWN_VI; + else if (Index < 0 && Index > -32) + Opcodes[1] = RISCV::VSLIDEUP_VI; + return LT.first * getRISCVInstructionCost(Opcodes, LT.second, CostKind); + } case TTI::SK_Reverse: { // TODO: Cases to improve here: // * Illegal vector types @@ -437,7 +523,9 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, if (LT.second.isFixedLengthVector()) // vrsub.vi has a 5 bit immediate field, otherwise an li suffices LenCost = isInt<5>(LT.second.getVectorNumElements() - 1) ? 0 : 1; - InstructionCost GatherCost = 2 + TLI->getVRGatherVVCost(LT.second); + // FIXME: replace the constant `2` below with cost of {VID_V,VRSUB_VX} + InstructionCost GatherCost = + 2 + getRISCVInstructionCost(RISCV::VRGATHER_VV, LT.second, CostKind); // Mask operation additionally required extend and truncate InstructionCost ExtendCost = Tp->getElementType()->isIntegerTy(1) ? 3 : 0; return LT.first * (LenCost + GatherCost + ExtendCost); diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h index 4c955744b37d..7e5dbddb5b51 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h @@ -48,6 +48,9 @@ class RISCVTTIImpl : public BasicTTIImplBase<RISCVTTIImpl> { /// actual target hardware. unsigned getEstimatedVLFor(VectorType *Ty); + InstructionCost getRISCVInstructionCost(ArrayRef<unsigned> OpCodes, MVT VT, + TTI::TargetCostKind CostKind); + /// Return the cost of accessing a constant pool entry of the specified /// type. InstructionCost getConstantPoolLoadCost(Type *Ty, diff --git a/llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp b/llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp index 629db8e2eb4d..0a8b5499a1fc 100644 --- a/llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp @@ -211,8 +211,8 @@ static SPIRVType *getArgSPIRVType(const Function &F, unsigned ArgIdx, MDString *MDKernelArgType = getKernelArgAttribute(F, ArgIdx, "kernel_arg_type"); - if (!MDKernelArgType || (MDKernelArgType->getString().ends_with("*") && - MDKernelArgType->getString().ends_with("_t"))) + if (!MDKernelArgType || (!MDKernelArgType->getString().ends_with("*") && + !MDKernelArgType->getString().ends_with("_t"))) return GR->getOrCreateSPIRVType(OriginalArgType, MIRBuilder, ArgAccessQual); if (MDKernelArgType->getString().ends_with("*")) @@ -438,7 +438,8 @@ bool SPIRVCallLowering::lowerCall(MachineIRBuilder &MIRBuilder, assert(Arg.Regs.size() == 1 && "Call arg has multiple VRegs"); ArgVRegs.push_back(Arg.Regs[0]); SPIRVType *SPIRVTy = GR->getOrCreateSPIRVType(Arg.Ty, MIRBuilder); - GR->assignSPIRVTypeToVReg(SPIRVTy, Arg.Regs[0], MIRBuilder.getMF()); + if (!GR->getSPIRVTypeForVReg(Arg.Regs[0])) + GR->assignSPIRVTypeToVReg(SPIRVTy, Arg.Regs[0], MIRBuilder.getMF()); } if (auto Res = SPIRV::lowerBuiltin( DemangledName, SPIRV::InstructionSet::OpenCL_std, MIRBuilder, diff --git a/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp b/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp index 660c574daf38..fb4e9932dd2d 100644 --- a/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp @@ -74,6 +74,7 @@ class SPIRVEmitIntrinsics void processInstrAfterVisit(Instruction *I); void insertAssignPtrTypeIntrs(Instruction *I); void insertAssignTypeIntrs(Instruction *I); + void insertPtrCastInstr(Instruction *I); void processGlobalValue(GlobalVariable &GV); public: @@ -255,7 +256,19 @@ Instruction *SPIRVEmitIntrinsics::visitGetElementPtrInst(GetElementPtrInst &I) { } Instruction *SPIRVEmitIntrinsics::visitBitCastInst(BitCastInst &I) { - SmallVector<Type *, 2> Types = {I.getType(), I.getOperand(0)->getType()}; + Value *Source = I.getOperand(0); + + // SPIR-V, contrary to LLVM 17+ IR, supports bitcasts between pointers of + // varying element types. In case of IR coming from older versions of LLVM + // such bitcasts do not provide sufficient information, should be just skipped + // here, and handled in insertPtrCastInstr. + if (I.getType()->isPointerTy()) { + I.replaceAllUsesWith(Source); + I.eraseFromParent(); + return nullptr; + } + + SmallVector<Type *, 2> Types = {I.getType(), Source->getType()}; SmallVector<Value *> Args(I.op_begin(), I.op_end()); auto *NewI = IRB->CreateIntrinsic(Intrinsic::spv_bitcast, {Types}, {Args}); std::string InstName = I.hasName() ? I.getName().str() : ""; @@ -265,6 +278,111 @@ Instruction *SPIRVEmitIntrinsics::visitBitCastInst(BitCastInst &I) { return NewI; } +void SPIRVEmitIntrinsics::insertPtrCastInstr(Instruction *I) { + Value *Pointer; + Type *ExpectedElementType; + unsigned OperandToReplace; + if (StoreInst *SI = dyn_cast<StoreInst>(I)) { + Pointer = SI->getPointerOperand(); + ExpectedElementType = SI->getValueOperand()->getType(); + OperandToReplace = 1; + } else if (LoadInst *LI = dyn_cast<LoadInst>(I)) { + Pointer = LI->getPointerOperand(); + ExpectedElementType = LI->getType(); + OperandToReplace = 0; + } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I)) { + Pointer = GEPI->getPointerOperand(); + ExpectedElementType = GEPI->getSourceElementType(); + OperandToReplace = 0; + } else { + return; + } + + // If Pointer is the result of nop BitCastInst (ptr -> ptr), use the source + // pointer instead. The BitCastInst should be later removed when visited. + while (BitCastInst *BC = dyn_cast<BitCastInst>(Pointer)) + Pointer = BC->getOperand(0); + + // Do not emit spv_ptrcast if Pointer is a GlobalValue of expected type. + GlobalValue *GV = dyn_cast<GlobalValue>(Pointer); + if (GV && GV->getValueType() == ExpectedElementType) + return; + + // Do not emit spv_ptrcast if Pointer is a result of alloca with expected + // type. + AllocaInst *A = dyn_cast<AllocaInst>(Pointer); + if (A && A->getAllocatedType() == ExpectedElementType) + return; + + if (dyn_cast<GetElementPtrInst>(Pointer)) + return; + + setInsertPointSkippingPhis(*IRB, I); + Constant *ExpectedElementTypeConst = + Constant::getNullValue(ExpectedElementType); + ConstantAsMetadata *CM = + ValueAsMetadata::getConstant(ExpectedElementTypeConst); + MDTuple *TyMD = MDNode::get(F->getContext(), CM); + MetadataAsValue *VMD = MetadataAsValue::get(F->getContext(), TyMD); + unsigned AddressSpace = Pointer->getType()->getPointerAddressSpace(); + bool FirstPtrCastOrAssignPtrType = true; + + // Do not emit new spv_ptrcast if equivalent one already exists or when + // spv_assign_ptr_type already targets this pointer with the same element + // type. + for (auto User : Pointer->users()) { + auto *II = dyn_cast<IntrinsicInst>(User); + if (!II || + (II->getIntrinsicID() != Intrinsic::spv_assign_ptr_type && + II->getIntrinsicID() != Intrinsic::spv_ptrcast) || + II->getOperand(0) != Pointer) + continue; + + // There is some spv_ptrcast/spv_assign_ptr_type already targeting this + // pointer. + FirstPtrCastOrAssignPtrType = false; + if (II->getOperand(1) != VMD || + dyn_cast<ConstantInt>(II->getOperand(2))->getSExtValue() != + AddressSpace) + continue; + + // The spv_ptrcast/spv_assign_ptr_type targeting this pointer is of the same + // element type and address space. + if (II->getIntrinsicID() != Intrinsic::spv_ptrcast) + return; + + // This must be a spv_ptrcast, do not emit new if this one has the same BB + // as I. Otherwise, search for other spv_ptrcast/spv_assign_ptr_type. + if (II->getParent() != I->getParent()) + continue; + + I->setOperand(OperandToReplace, II); + return; + } + + // Do not emit spv_ptrcast if it would cast to the default pointer element + // type (i8) of the same address space. + if (ExpectedElementType->isIntegerTy(8)) + return; + + // If this would be the first spv_ptrcast and there is no spv_assign_ptr_type + // for this pointer before, do not emit spv_ptrcast but emit + // spv_assign_ptr_type instead. + if (FirstPtrCastOrAssignPtrType && isa<Instruction>(Pointer)) { + buildIntrWithMD(Intrinsic::spv_assign_ptr_type, {Pointer->getType()}, + ExpectedElementTypeConst, Pointer, + {IRB->getInt32(AddressSpace)}); + return; + } else { + SmallVector<Type *, 2> Types = {Pointer->getType(), Pointer->getType()}; + SmallVector<Value *, 2> Args = {Pointer, VMD, IRB->getInt32(AddressSpace)}; + auto *PtrCastI = + IRB->CreateIntrinsic(Intrinsic::spv_ptrcast, {Types}, Args); + I->setOperand(OperandToReplace, PtrCastI); + return; + } +} + Instruction *SPIRVEmitIntrinsics::visitInsertElementInst(InsertElementInst &I) { SmallVector<Type *, 4> Types = {I.getType(), I.getOperand(0)->getType(), I.getOperand(1)->getType(), @@ -522,13 +640,18 @@ bool SPIRVEmitIntrinsics::runOnFunction(Function &Func) { for (auto &I : Worklist) { insertAssignPtrTypeIntrs(I); insertAssignTypeIntrs(I); + insertPtrCastInstr(I); } for (auto *I : Worklist) { TrackConstants = true; if (!I->getType()->isVoidTy() || isa<StoreInst>(I)) IRB->SetInsertPoint(I->getNextNode()); + // Visitors return either the original/newly created instruction for further + // processing, nullptr otherwise. I = visit(*I); + if (!I) + continue; processInstrAfterVisit(I); } return true; diff --git a/llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.cpp b/llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.cpp index faaf7f0e2548..061bc9674237 100644 --- a/llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.cpp @@ -289,8 +289,9 @@ static Register convertPtrToInt(Register Reg, LLT ConvTy, SPIRVType *SpirvType, return ConvReg; } -bool SPIRVLegalizerInfo::legalizeCustom(LegalizerHelper &Helper, - MachineInstr &MI) const { +bool SPIRVLegalizerInfo::legalizeCustom( + LegalizerHelper &Helper, MachineInstr &MI, + LostDebugLocObserver &LocObserver) const { auto Opc = MI.getOpcode(); MachineRegisterInfo &MRI = MI.getMF()->getRegInfo(); if (!isTypeFoldingSupported(Opc)) { diff --git a/llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.h b/llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.h index 2541ff29edb0..f18b15b7f169 100644 --- a/llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.h +++ b/llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.h @@ -29,7 +29,8 @@ class SPIRVLegalizerInfo : public LegalizerInfo { SPIRVGlobalRegistry *GR; public: - bool legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI) const override; + bool legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI, + LostDebugLocObserver &LocObserver) const override; SPIRVLegalizerInfo(const SPIRVSubtarget &ST); }; } // namespace llvm diff --git a/llvm/lib/Target/SPIRV/SPIRVPreLegalizer.cpp b/llvm/lib/Target/SPIRV/SPIRVPreLegalizer.cpp index 1bfce70fedc0..cbc16fa98661 100644 --- a/llvm/lib/Target/SPIRV/SPIRVPreLegalizer.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVPreLegalizer.cpp @@ -125,12 +125,32 @@ static void insertBitcasts(MachineFunction &MF, SPIRVGlobalRegistry *GR, SmallVector<MachineInstr *, 10> ToErase; for (MachineBasicBlock &MBB : MF) { for (MachineInstr &MI : MBB) { - if (!isSpvIntrinsic(MI, Intrinsic::spv_bitcast)) + if (!isSpvIntrinsic(MI, Intrinsic::spv_bitcast) && + !isSpvIntrinsic(MI, Intrinsic::spv_ptrcast)) continue; assert(MI.getOperand(2).isReg()); MIB.setInsertPt(*MI.getParent(), MI); - MIB.buildBitcast(MI.getOperand(0).getReg(), MI.getOperand(2).getReg()); ToErase.push_back(&MI); + if (isSpvIntrinsic(MI, Intrinsic::spv_bitcast)) { + MIB.buildBitcast(MI.getOperand(0).getReg(), MI.getOperand(2).getReg()); + continue; + } + Register Def = MI.getOperand(0).getReg(); + Register Source = MI.getOperand(2).getReg(); + SPIRVType *BaseTy = GR->getOrCreateSPIRVType( + getMDOperandAsType(MI.getOperand(3).getMetadata(), 0), MIB); + SPIRVType *AssignedPtrType = GR->getOrCreateSPIRVPointerType( + BaseTy, MI, *MF.getSubtarget<SPIRVSubtarget>().getInstrInfo(), + addressSpaceToStorageClass(MI.getOperand(4).getImm())); + + // If the bitcast would be redundant, replace all uses with the source + // register. + if (GR->getSPIRVTypeForVReg(Source) == AssignedPtrType) { + MIB.getMRI()->replaceRegWith(Def, Source); + } else { + GR->assignSPIRVTypeToVReg(AssignedPtrType, Def, MF); + MIB.buildBitcast(Def, Source); + } } } for (MachineInstr *MI : ToErase) @@ -587,6 +607,40 @@ static void processSwitches(MachineFunction &MF, SPIRVGlobalRegistry *GR, } } +static bool isImplicitFallthrough(MachineBasicBlock &MBB) { + if (MBB.empty()) + return true; + + // Branching SPIR-V intrinsics are not detected by this generic method. + // Thus, we can only trust negative result. + if (!MBB.canFallThrough()) + return false; + + // Otherwise, we must manually check if we have a SPIR-V intrinsic which + // prevent an implicit fallthrough. + for (MachineBasicBlock::reverse_iterator It = MBB.rbegin(), E = MBB.rend(); + It != E; ++It) { + if (isSpvIntrinsic(*It, Intrinsic::spv_switch)) + return false; + } + return true; +} + +static void removeImplicitFallthroughs(MachineFunction &MF, + MachineIRBuilder MIB) { + // It is valid for MachineBasicBlocks to not finish with a branch instruction. + // In such cases, they will simply fallthrough their immediate successor. + for (MachineBasicBlock &MBB : MF) { + if (!isImplicitFallthrough(MBB)) + continue; + + assert(std::distance(MBB.successors().begin(), MBB.successors().end()) == + 1); + MIB.setInsertPt(MBB, MBB.end()); + MIB.buildBr(**MBB.successors().begin()); + } +} + bool SPIRVPreLegalizer::runOnMachineFunction(MachineFunction &MF) { // Initialize the type registry. const SPIRVSubtarget &ST = MF.getSubtarget<SPIRVSubtarget>(); @@ -599,6 +653,7 @@ bool SPIRVPreLegalizer::runOnMachineFunction(MachineFunction &MF) { generateAssignInstrs(MF, GR, MIB); processSwitches(MF, GR, MIB); processInstrsWithTypeFolding(MF, GR, MIB); + removeImplicitFallthroughs(MF, MIB); return true; } diff --git a/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp b/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp index 1503f263e42c..62d9090d289f 100644 --- a/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp @@ -29,6 +29,7 @@ #include "llvm/MC/TargetRegistry.h" #include "llvm/Pass.h" #include "llvm/Target/TargetOptions.h" +#include "llvm/Transforms/Utils.h" #include <optional> using namespace llvm; @@ -151,6 +152,19 @@ TargetPassConfig *SPIRVTargetMachine::createPassConfig(PassManagerBase &PM) { } void SPIRVPassConfig::addIRPasses() { + if (TM.getSubtargetImpl()->isVulkanEnv()) { + // Once legalized, we need to structurize the CFG to follow the spec. + // This is done through the following 8 steps. + // TODO(#75801): add the remaining steps. + + // 1. Simplify loop for subsequent transformations. After this steps, loops + // have the following properties: + // - loops have a single entry edge (pre-header to loop header). + // - all loop exits are dominated by the loop pre-header. + // - loops have a single back-edge. + addPass(createLoopSimplifyPass()); + } + TargetPassConfig::addIRPasses(); addPass(createSPIRVRegularizerPass()); addPass(createSPIRVPrepareFunctionsPass(TM)); diff --git a/llvm/lib/Target/SPIRV/SPIRVUtils.cpp b/llvm/lib/Target/SPIRV/SPIRVUtils.cpp index 1c0e8d84e2fd..d4f7d8e89af5 100644 --- a/llvm/lib/Target/SPIRV/SPIRVUtils.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVUtils.cpp @@ -228,8 +228,8 @@ uint64_t getIConstVal(Register ConstReg, const MachineRegisterInfo *MRI) { return MI->getOperand(1).getCImm()->getValue().getZExtValue(); } -bool isSpvIntrinsic(MachineInstr &MI, Intrinsic::ID IntrinsicID) { - if (auto *GI = dyn_cast<GIntrinsic>(&MI)) +bool isSpvIntrinsic(const MachineInstr &MI, Intrinsic::ID IntrinsicID) { + if (const auto *GI = dyn_cast<GIntrinsic>(&MI)) return GI->is(IntrinsicID); return false; } diff --git a/llvm/lib/Target/SPIRV/SPIRVUtils.h b/llvm/lib/Target/SPIRV/SPIRVUtils.h index 30fae6c7de47..60742e2f2728 100644 --- a/llvm/lib/Target/SPIRV/SPIRVUtils.h +++ b/llvm/lib/Target/SPIRV/SPIRVUtils.h @@ -79,7 +79,7 @@ MachineInstr *getDefInstrMaybeConstant(Register &ConstReg, uint64_t getIConstVal(Register ConstReg, const MachineRegisterInfo *MRI); // Check if MI is a SPIR-V specific intrinsic call. -bool isSpvIntrinsic(MachineInstr &MI, Intrinsic::ID IntrinsicID); +bool isSpvIntrinsic(const MachineInstr &MI, Intrinsic::ID IntrinsicID); // Get type of i-th operand of the metadata node. Type *getMDOperandAsType(const MDNode *N, unsigned I); diff --git a/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp index c7d8591c5bdf..320f91c76057 100644 --- a/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp +++ b/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp @@ -1641,7 +1641,7 @@ void SystemZDAGToDAGISel::Select(SDNode *Node) { // If this is a 64-bit constant that is out of the range of LLILF, // LLIHF and LGFI, split it into two 32-bit pieces. if (Node->getValueType(0) == MVT::i64) { - uint64_t Val = cast<ConstantSDNode>(Node)->getZExtValue(); + uint64_t Val = Node->getAsZExtVal(); if (!SystemZ::isImmLF(Val) && !SystemZ::isImmHF(Val) && !isInt<32>(Val)) { splitLargeImmediate(ISD::OR, Node, SDValue(), Val - uint32_t(Val), uint32_t(Val)); @@ -1677,10 +1677,8 @@ void SystemZDAGToDAGISel::Select(SDNode *Node) { isInt<16>(cast<ConstantSDNode>(Op0)->getSExtValue())))) { SDValue CCValid = Node->getOperand(2); SDValue CCMask = Node->getOperand(3); - uint64_t ConstCCValid = - cast<ConstantSDNode>(CCValid.getNode())->getZExtValue(); - uint64_t ConstCCMask = - cast<ConstantSDNode>(CCMask.getNode())->getZExtValue(); + uint64_t ConstCCValid = CCValid.getNode()->getAsZExtVal(); + uint64_t ConstCCMask = CCMask.getNode()->getAsZExtVal(); // Invert the condition. CCMask = CurDAG->getTargetConstant(ConstCCValid ^ ConstCCMask, SDLoc(Node), CCMask.getValueType()); diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp index 045c4c0aac07..2450c6801a66 100644 --- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -2662,10 +2662,8 @@ static void adjustForFNeg(Comparison &C) { // with (sext (trunc X)) into a comparison with (shl X, 32). static void adjustForLTGFR(Comparison &C) { // Check for a comparison between (shl X, 32) and 0. - if (C.Op0.getOpcode() == ISD::SHL && - C.Op0.getValueType() == MVT::i64 && - C.Op1.getOpcode() == ISD::Constant && - cast<ConstantSDNode>(C.Op1)->getZExtValue() == 0) { + if (C.Op0.getOpcode() == ISD::SHL && C.Op0.getValueType() == MVT::i64 && + C.Op1.getOpcode() == ISD::Constant && C.Op1->getAsZExtVal() == 0) { auto *C1 = dyn_cast<ConstantSDNode>(C.Op0.getOperand(1)); if (C1 && C1->getZExtValue() == 32) { SDValue ShlOp0 = C.Op0.getOperand(0); @@ -2690,7 +2688,7 @@ static void adjustICmpTruncate(SelectionDAG &DAG, const SDLoc &DL, C.Op0.getOperand(0).getOpcode() == ISD::LOAD && C.Op1.getOpcode() == ISD::Constant && cast<ConstantSDNode>(C.Op1)->getValueSizeInBits(0) <= 64 && - cast<ConstantSDNode>(C.Op1)->getZExtValue() == 0) { + C.Op1->getAsZExtVal() == 0) { auto *L = cast<LoadSDNode>(C.Op0.getOperand(0)); if (L->getMemoryVT().getStoreSizeInBits().getFixedValue() <= C.Op0.getValueSizeInBits().getFixedValue()) { @@ -3035,12 +3033,12 @@ static Comparison getCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1, CmpOp0.getResNo() == 0 && CmpOp0->hasNUsesOfValue(1, 0) && isIntrinsicWithCCAndChain(CmpOp0, Opcode, CCValid)) return getIntrinsicCmp(DAG, Opcode, CmpOp0, CCValid, - cast<ConstantSDNode>(CmpOp1)->getZExtValue(), Cond); + CmpOp1->getAsZExtVal(), Cond); if (CmpOp0.getOpcode() == ISD::INTRINSIC_WO_CHAIN && CmpOp0.getResNo() == CmpOp0->getNumValues() - 1 && isIntrinsicWithCC(CmpOp0, Opcode, CCValid)) return getIntrinsicCmp(DAG, Opcode, CmpOp0, CCValid, - cast<ConstantSDNode>(CmpOp1)->getZExtValue(), Cond); + CmpOp1->getAsZExtVal(), Cond); } Comparison C(CmpOp0, CmpOp1, Chain); C.CCMask = CCMaskForCondCode(Cond); @@ -3457,12 +3455,11 @@ SDValue SystemZTargetLowering::lowerSELECT_CC(SDValue Op, // Check for absolute and negative-absolute selections, including those // where the comparison value is sign-extended (for LPGFR and LNGFR). // This check supplements the one in DAGCombiner. - if (C.Opcode == SystemZISD::ICMP && - C.CCMask != SystemZ::CCMASK_CMP_EQ && + if (C.Opcode == SystemZISD::ICMP && C.CCMask != SystemZ::CCMASK_CMP_EQ && C.CCMask != SystemZ::CCMASK_CMP_NE && C.Op1.getOpcode() == ISD::Constant && cast<ConstantSDNode>(C.Op1)->getValueSizeInBits(0) <= 64 && - cast<ConstantSDNode>(C.Op1)->getZExtValue() == 0) { + C.Op1->getAsZExtVal() == 0) { if (isAbsolute(C.Op0, TrueOp, FalseOp)) return getAbsolute(DAG, DL, TrueOp, C.CCMask & SystemZ::CCMASK_CMP_LT); if (isAbsolute(C.Op0, FalseOp, TrueOp)) @@ -3947,8 +3944,7 @@ SystemZTargetLowering::lowerDYNAMIC_STACKALLOC_XPLINK(SDValue Op, // If user has set the no alignment function attribute, ignore // alloca alignments. - uint64_t AlignVal = - (RealignOpt ? cast<ConstantSDNode>(Align)->getZExtValue() : 0); + uint64_t AlignVal = (RealignOpt ? Align->getAsZExtVal() : 0); uint64_t StackAlign = TFI->getStackAlignment(); uint64_t RequiredAlign = std::max(AlignVal, StackAlign); @@ -4013,8 +4009,7 @@ SystemZTargetLowering::lowerDYNAMIC_STACKALLOC_ELF(SDValue Op, // If user has set the no alignment function attribute, ignore // alloca alignments. - uint64_t AlignVal = - (RealignOpt ? cast<ConstantSDNode>(Align)->getZExtValue() : 0); + uint64_t AlignVal = (RealignOpt ? Align->getAsZExtVal() : 0); uint64_t StackAlign = TFI->getStackAlignment(); uint64_t RequiredAlign = std::max(AlignVal, StackAlign); @@ -4213,7 +4208,7 @@ SDValue SystemZTargetLowering::lowerOR(SDValue Op, SelectionDAG &DAG) const { // If the low part is a constant that is outside the range of LHI, // then we're better off using IILF. if (LowOp.getOpcode() == ISD::Constant) { - int64_t Value = int32_t(cast<ConstantSDNode>(LowOp)->getZExtValue()); + int64_t Value = int32_t(LowOp->getAsZExtVal()); if (!isInt<16>(Value)) return Op; } @@ -5897,7 +5892,7 @@ SDValue SystemZTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op, Op1.getOpcode() != ISD::BITCAST && Op1.getOpcode() != ISD::ConstantFP && Op2.getOpcode() == ISD::Constant) { - uint64_t Index = cast<ConstantSDNode>(Op2)->getZExtValue(); + uint64_t Index = Op2->getAsZExtVal(); unsigned Mask = VT.getVectorNumElements() - 1; if (Index <= Mask) return Op; diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp index 37abbb072cdd..15dc44a04395 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp @@ -278,7 +278,7 @@ bool WebAssemblyFastISel::computeAddress(const Value *Obj, Address &Addr) { unsigned Idx = cast<ConstantInt>(Op)->getZExtValue(); TmpOffset += SL->getElementOffset(Idx); } else { - uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType()); + uint64_t S = GTI.getSequentialElementStride(DL); for (;;) { if (const auto *CI = dyn_cast<ConstantInt>(Op)) { // Constant-offset addressing. diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp index 4bcf89690505..7c47790d1e35 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp @@ -1869,8 +1869,7 @@ SDValue WebAssemblyTargetLowering::LowerIntrinsic(SDValue Op, Ops[OpIdx++] = Op.getOperand(2); while (OpIdx < 18) { const SDValue &MaskIdx = Op.getOperand(OpIdx + 1); - if (MaskIdx.isUndef() || - cast<ConstantSDNode>(MaskIdx.getNode())->getZExtValue() >= 32) { + if (MaskIdx.isUndef() || MaskIdx.getNode()->getAsZExtVal() >= 32) { bool isTarget = MaskIdx.getNode()->getOpcode() == ISD::TargetConstant; Ops[OpIdx++] = DAG.getConstant(0, DL, MVT::i32, isTarget); } else { @@ -1912,7 +1911,7 @@ WebAssemblyTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, const SDNode *Index = Extract.getOperand(1).getNode(); if (!isa<ConstantSDNode>(Index)) return SDValue(); - unsigned IndexVal = cast<ConstantSDNode>(Index)->getZExtValue(); + unsigned IndexVal = Index->getAsZExtVal(); unsigned Scale = ExtractedVecT.getVectorNumElements() / VecT.getVectorNumElements(); assert(Scale > 1); @@ -2335,7 +2334,7 @@ WebAssemblyTargetLowering::LowerAccessVectorElement(SDValue Op, SDNode *IdxNode = Op.getOperand(Op.getNumOperands() - 1).getNode(); if (isa<ConstantSDNode>(IdxNode)) { // Ensure the index type is i32 to match the tablegen patterns - uint64_t Idx = cast<ConstantSDNode>(IdxNode)->getZExtValue(); + uint64_t Idx = IdxNode->getAsZExtVal(); SmallVector<SDValue, 3> Ops(Op.getNode()->ops()); Ops[Op.getNumOperands() - 1] = DAG.getConstant(Idx, SDLoc(IdxNode), MVT::i32); diff --git a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.h b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.h index 1f69feceae27..12134f7b00f1 100644 --- a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.h +++ b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.h @@ -21,7 +21,6 @@ namespace llvm { class X86Subtarget; class X86TargetMachine; -/// This class provides the information for the target register banks. class X86LegalizerInfo : public LegalizerInfo { private: /// Keep a reference to the X86Subtarget around so that we can diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h b/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h index e006dd877360..304b998e1f26 100644 --- a/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h +++ b/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h @@ -148,25 +148,21 @@ classifyFirstOpcodeInMacroFusion(unsigned Opcode) { case X86::AND16ri8: case X86::AND16rm: case X86::AND16rr: - case X86::AND16rr_REV: case X86::AND32i32: case X86::AND32ri: case X86::AND32ri8: case X86::AND32rm: case X86::AND32rr: - case X86::AND32rr_REV: case X86::AND64i32: case X86::AND64ri32: case X86::AND64ri8: case X86::AND64rm: case X86::AND64rr: - case X86::AND64rr_REV: case X86::AND8i8: case X86::AND8ri: case X86::AND8ri8: case X86::AND8rm: case X86::AND8rr: - case X86::AND8rr_REV: return FirstMacroFusionInstKind::And; // CMP case X86::CMP16i16: @@ -175,28 +171,24 @@ classifyFirstOpcodeInMacroFusion(unsigned Opcode) { case X86::CMP16ri8: case X86::CMP16rm: case X86::CMP16rr: - case X86::CMP16rr_REV: case X86::CMP32i32: case X86::CMP32mr: case X86::CMP32ri: case X86::CMP32ri8: case X86::CMP32rm: case X86::CMP32rr: - case X86::CMP32rr_REV: case X86::CMP64i32: case X86::CMP64mr: case X86::CMP64ri32: case X86::CMP64ri8: case X86::CMP64rm: case X86::CMP64rr: - case X86::CMP64rr_REV: case X86::CMP8i8: case X86::CMP8mr: case X86::CMP8ri: case X86::CMP8ri8: case X86::CMP8rm: case X86::CMP8rr: - case X86::CMP8rr_REV: return FirstMacroFusionInstKind::Cmp; // ADD case X86::ADD16i16: @@ -204,50 +196,42 @@ classifyFirstOpcodeInMacroFusion(unsigned Opcode) { case X86::ADD16ri8: case X86::ADD16rm: case X86::ADD16rr: - case X86::ADD16rr_REV: case X86::ADD32i32: case X86::ADD32ri: case X86::ADD32ri8: case X86::ADD32rm: case X86::ADD32rr: - case X86::ADD32rr_REV: case X86::ADD64i32: case X86::ADD64ri32: case X86::ADD64ri8: case X86::ADD64rm: case X86::ADD64rr: - case X86::ADD64rr_REV: case X86::ADD8i8: case X86::ADD8ri: case X86::ADD8ri8: case X86::ADD8rm: case X86::ADD8rr: - case X86::ADD8rr_REV: // SUB case X86::SUB16i16: case X86::SUB16ri: case X86::SUB16ri8: case X86::SUB16rm: case X86::SUB16rr: - case X86::SUB16rr_REV: case X86::SUB32i32: case X86::SUB32ri: case X86::SUB32ri8: case X86::SUB32rm: case X86::SUB32rr: - case X86::SUB32rr_REV: case X86::SUB64i32: case X86::SUB64ri32: case X86::SUB64ri8: case X86::SUB64rm: case X86::SUB64rr: - case X86::SUB64rr_REV: case X86::SUB8i8: case X86::SUB8ri: case X86::SUB8ri8: case X86::SUB8rm: case X86::SUB8rr: - case X86::SUB8rr_REV: return FirstMacroFusionInstKind::AddSub; // INC case X86::INC16r: diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp index 924956295e7c..f7c361393fea 100644 --- a/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp +++ b/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp @@ -1650,6 +1650,9 @@ void X86MCCodeEmitter::encodeInstruction(const MCInst &MI, if (HasVEX_4V) // Skip 1st src (which is encoded in VEX_VVVV) ++SrcRegNum; + if (IsND) // Skip new data destination + ++CurOp; + emitRegModRMByte(MI.getOperand(SrcRegNum), getX86RegNum(MI.getOperand(CurOp)), CB); CurOp = SrcRegNum + 1; diff --git a/llvm/lib/Target/X86/X86.h b/llvm/lib/Target/X86/X86.h index 485afbc1dfbc..21623a805f55 100644 --- a/llvm/lib/Target/X86/X86.h +++ b/llvm/lib/Target/X86/X86.h @@ -131,9 +131,9 @@ FunctionPass *createX86FixupBWInsts(); /// to another, when profitable. FunctionPass *createX86DomainReassignmentPass(); -/// This pass replaces EVEX encoded of AVX-512 instructiosn by VEX -/// encoding when possible in order to reduce code size. -FunctionPass *createX86EvexToVexInsts(); +/// This pass compress instructions from EVEX space to legacy/VEX/EVEX space when +/// possible in order to reduce code size or facilitate HW decoding. +FunctionPass *createX86CompressEVEXPass(); /// This pass creates the thunks for the retpoline feature. FunctionPass *createX86IndirectThunksPass(); @@ -167,7 +167,7 @@ FunctionPass *createX86SpeculativeLoadHardeningPass(); FunctionPass *createX86SpeculativeExecutionSideEffectSuppression(); FunctionPass *createX86ArgumentStackSlotPass(); -void initializeEvexToVexInstPassPass(PassRegistry &); +void initializeCompressEVEXPassPass(PassRegistry &); void initializeFPSPass(PassRegistry &); void initializeFixupBWInstPassPass(PassRegistry &); void initializeFixupLEAPassPass(PassRegistry &); diff --git a/llvm/lib/Target/X86/X86EvexToVex.cpp b/llvm/lib/Target/X86/X86CompressEVEX.cpp index c425c37b4186..b95baddd9dea 100644 --- a/llvm/lib/Target/X86/X86EvexToVex.cpp +++ b/llvm/lib/Target/X86/X86CompressEVEX.cpp @@ -1,5 +1,4 @@ -//===- X86EvexToVex.cpp ---------------------------------------------------===// -// Compress EVEX instructions to VEX encoding when possible to reduce code size +//===- X86CompressEVEX.cpp ------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -7,17 +6,30 @@ // //===----------------------------------------------------------------------===// // -/// \file -/// This file defines the pass that goes over all AVX-512 instructions which -/// are encoded using the EVEX prefix and if possible replaces them by their -/// corresponding VEX encoding which is usually shorter by 2 bytes. -/// EVEX instructions may be encoded via the VEX prefix when the AVX-512 -/// instruction has a corresponding AVX/AVX2 opcode, when vector length -/// accessed by instruction is less than 512 bits and when it does not use -// the xmm or the mask registers or xmm/ymm registers with indexes higher -// than 15. -/// The pass applies code reduction on the generated code for AVX-512 instrs. +// This pass compresses instructions from EVEX space to legacy/VEX/EVEX space +// when possible in order to reduce code size or facilitate HW decoding. // +// Possible compression: +// a. AVX512 instruction (EVEX) -> AVX instruction (VEX) +// b. Promoted instruction (EVEX) -> pre-promotion instruction (legacy/VEX) +// c. NDD (EVEX) -> non-NDD (legacy) +// d. NF_ND (EVEX) -> NF (EVEX) +// +// Compression a, b and c can always reduce code size, with some exceptions +// such as promoted 16-bit CRC32 which is as long as the legacy version. +// +// legacy: +// crc32w %si, %eax ## encoding: [0x66,0xf2,0x0f,0x38,0xf1,0xc6] +// promoted: +// crc32w %si, %eax ## encoding: [0x62,0xf4,0x7d,0x08,0xf1,0xc6] +// +// From performance perspective, these should be same (same uops and same EXE +// ports). From a FMV perspective, an older legacy encoding is preferred b/c it +// can execute in more places (broader HW install base). So we will still do +// the compression. +// +// Compression d can help hardware decode (HW may skip reading the NDD +// register) although the instruction length remains unchanged. //===----------------------------------------------------------------------===// #include "MCTargetDesc/X86BaseInfo.h" @@ -38,37 +50,34 @@ using namespace llvm; -// Including the generated EVEX2VEX tables. -struct X86EvexToVexCompressTableEntry { - uint16_t EvexOpc; - uint16_t VexOpc; +// Including the generated EVEX compression tables. +struct X86CompressEVEXTableEntry { + uint16_t OldOpc; + uint16_t NewOpc; - bool operator<(const X86EvexToVexCompressTableEntry &RHS) const { - return EvexOpc < RHS.EvexOpc; + bool operator<(const X86CompressEVEXTableEntry &RHS) const { + return OldOpc < RHS.OldOpc; } - friend bool operator<(const X86EvexToVexCompressTableEntry &TE, - unsigned Opc) { - return TE.EvexOpc < Opc; + friend bool operator<(const X86CompressEVEXTableEntry &TE, unsigned Opc) { + return TE.OldOpc < Opc; } }; -#include "X86GenEVEX2VEXTables.inc" +#include "X86GenCompressEVEXTables.inc" -#define EVEX2VEX_DESC "Compressing EVEX instrs to VEX encoding when possible" -#define EVEX2VEX_NAME "x86-evex-to-vex-compress" +#define COMP_EVEX_DESC "Compressing EVEX instrs when possible" +#define COMP_EVEX_NAME "x86-compress-evex" -#define DEBUG_TYPE EVEX2VEX_NAME +#define DEBUG_TYPE COMP_EVEX_NAME namespace { -class EvexToVexInstPass : public MachineFunctionPass { +class CompressEVEXPass : public MachineFunctionPass { public: static char ID; - EvexToVexInstPass() : MachineFunctionPass(ID) {} - StringRef getPassName() const override { return EVEX2VEX_DESC; } + CompressEVEXPass() : MachineFunctionPass(ID) {} + StringRef getPassName() const override { return COMP_EVEX_DESC; } - /// Loop over all of the basic blocks, replacing EVEX instructions - /// by equivalent VEX instructions when possible for reducing code size. bool runOnMachineFunction(MachineFunction &MF) override; // This pass runs after regalloc and doesn't support VReg operands. @@ -80,7 +89,7 @@ public: } // end anonymous namespace -char EvexToVexInstPass::ID = 0; +char CompressEVEXPass::ID = 0; static bool usesExtendedRegister(const MachineInstr &MI) { auto isHiRegIdx = [](unsigned Reg) { @@ -112,8 +121,8 @@ static bool usesExtendedRegister(const MachineInstr &MI) { return false; } -static bool checkVEXInstPredicate(unsigned EvexOpc, const X86Subtarget &ST) { - switch (EvexOpc) { +static bool checkVEXInstPredicate(unsigned OldOpc, const X86Subtarget &ST) { + switch (OldOpc) { default: return true; case X86::VCVTNEPS2BF16Z128rm: @@ -151,15 +160,15 @@ static bool checkVEXInstPredicate(unsigned EvexOpc, const X86Subtarget &ST) { } // Do any custom cleanup needed to finalize the conversion. -static bool performCustomAdjustments(MachineInstr &MI, unsigned VexOpc) { - (void)VexOpc; +static bool performCustomAdjustments(MachineInstr &MI, unsigned NewOpc) { + (void)NewOpc; unsigned Opc = MI.getOpcode(); switch (Opc) { case X86::VALIGNDZ128rri: case X86::VALIGNDZ128rmi: case X86::VALIGNQZ128rri: case X86::VALIGNQZ128rmi: { - assert((VexOpc == X86::VPALIGNRrri || VexOpc == X86::VPALIGNRrmi) && + assert((NewOpc == X86::VPALIGNRrri || NewOpc == X86::VPALIGNRrmi) && "Unexpected new opcode!"); unsigned Scale = (Opc == X86::VALIGNQZ128rri || Opc == X86::VALIGNQZ128rmi) ? 8 : 4; @@ -175,8 +184,8 @@ static bool performCustomAdjustments(MachineInstr &MI, unsigned VexOpc) { case X86::VSHUFI32X4Z256rri: case X86::VSHUFI64X2Z256rmi: case X86::VSHUFI64X2Z256rri: { - assert((VexOpc == X86::VPERM2F128rr || VexOpc == X86::VPERM2I128rr || - VexOpc == X86::VPERM2F128rm || VexOpc == X86::VPERM2I128rm) && + assert((NewOpc == X86::VPERM2F128rr || NewOpc == X86::VPERM2I128rr || + NewOpc == X86::VPERM2F128rm || NewOpc == X86::VPERM2I128rm) && "Unexpected new opcode!"); MachineOperand &Imm = MI.getOperand(MI.getNumExplicitOperands() - 1); int64_t ImmVal = Imm.getImm(); @@ -200,7 +209,7 @@ static bool performCustomAdjustments(MachineInstr &MI, unsigned VexOpc) { case X86::VRNDSCALESDZm_Int: case X86::VRNDSCALESSZr_Int: case X86::VRNDSCALESSZm_Int: - const MachineOperand &Imm = MI.getOperand(MI.getNumExplicitOperands()-1); + const MachineOperand &Imm = MI.getOperand(MI.getNumExplicitOperands() - 1); int64_t ImmVal = Imm.getImm(); // Ensure that only bits 3:0 of the immediate are used. if ((ImmVal & 0xf) != ImmVal) @@ -211,86 +220,77 @@ static bool performCustomAdjustments(MachineInstr &MI, unsigned VexOpc) { return true; } -// For EVEX instructions that can be encoded using VEX encoding -// replace them by the VEX encoding in order to reduce size. -static bool CompressEvexToVexImpl(MachineInstr &MI, const X86Subtarget &ST) { - // VEX format. - // # of bytes: 0,2,3 1 1 0,1 0,1,2,4 0,1 - // [Prefixes] [VEX] OPCODE ModR/M [SIB] [DISP] [IMM] - // - // EVEX format. - // # of bytes: 4 1 1 1 4 / 1 1 - // [Prefixes] EVEX Opcode ModR/M [SIB] [Disp32] / [Disp8*N] [Immediate] - const MCInstrDesc &Desc = MI.getDesc(); +static bool CompressEVEXImpl(MachineInstr &MI, const X86Subtarget &ST) { + uint64_t TSFlags = MI.getDesc().TSFlags; // Check for EVEX instructions only. - if ((Desc.TSFlags & X86II::EncodingMask) != X86II::EVEX) + if ((TSFlags & X86II::EncodingMask) != X86II::EVEX) return false; - // Check for EVEX instructions with mask or broadcast as in these cases - // the EVEX prefix is needed in order to carry this information - // thus preventing the transformation to VEX encoding. - if (Desc.TSFlags & (X86II::EVEX_K | X86II::EVEX_B)) + // Instructions with mask or 512-bit vector can't be converted to VEX. + if (TSFlags & (X86II::EVEX_K | X86II::EVEX_L2)) return false; - // Check for EVEX instructions with L2 set. These instructions are 512-bits - // and can't be converted to VEX. - if (Desc.TSFlags & X86II::EVEX_L2) + // EVEX_B has several meanings. + // AVX512: + // register form: rounding control or SAE + // memory form: broadcast + // + // APX: + // MAP4: NDD + // + // For AVX512 cases, EVEX prefix is needed in order to carry this information + // thus preventing the transformation to VEX encoding. + if (TSFlags & X86II::EVEX_B) return false; - // Use the VEX.L bit to select the 128 or 256-bit table. - ArrayRef<X86EvexToVexCompressTableEntry> Table = - (Desc.TSFlags & X86II::VEX_L) ? ArrayRef(X86EvexToVex256CompressTable) - : ArrayRef(X86EvexToVex128CompressTable); + ArrayRef<X86CompressEVEXTableEntry> Table = ArrayRef(X86CompressEVEXTable); - unsigned EvexOpc = MI.getOpcode(); - const auto *I = llvm::lower_bound(Table, EvexOpc); - if (I == Table.end() || I->EvexOpc != EvexOpc) + unsigned Opc = MI.getOpcode(); + const auto *I = llvm::lower_bound(Table, Opc); + if (I == Table.end() || I->OldOpc != Opc) return false; - if (usesExtendedRegister(MI)) - return false; - if (!checkVEXInstPredicate(EvexOpc, ST)) - return false; - if (!performCustomAdjustments(MI, I->VexOpc)) + if (usesExtendedRegister(MI) || !checkVEXInstPredicate(Opc, ST) || + !performCustomAdjustments(MI, I->NewOpc)) return false; - MI.setDesc(ST.getInstrInfo()->get(I->VexOpc)); - MI.setAsmPrinterFlag(X86::AC_EVEX_2_VEX); + const MCInstrDesc &NewDesc = ST.getInstrInfo()->get(I->NewOpc); + MI.setDesc(NewDesc); + uint64_t Encoding = NewDesc.TSFlags & X86II::EncodingMask; + auto AsmComment = + (Encoding == X86II::VEX) ? X86::AC_EVEX_2_VEX : X86::AC_EVEX_2_LEGACY; + MI.setAsmPrinterFlag(AsmComment); return true; } -bool EvexToVexInstPass::runOnMachineFunction(MachineFunction &MF) { +bool CompressEVEXPass::runOnMachineFunction(MachineFunction &MF) { #ifndef NDEBUG // Make sure the tables are sorted. static std::atomic<bool> TableChecked(false); if (!TableChecked.load(std::memory_order_relaxed)) { - assert(llvm::is_sorted(X86EvexToVex128CompressTable) && - "X86EvexToVex128CompressTable is not sorted!"); - assert(llvm::is_sorted(X86EvexToVex256CompressTable) && - "X86EvexToVex256CompressTable is not sorted!"); + assert(llvm::is_sorted(X86CompressEVEXTable) && + "X86CompressEVEXTable is not sorted!"); TableChecked.store(true, std::memory_order_relaxed); } #endif const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>(); - if (!ST.hasAVX512()) + if (!ST.hasAVX512() && !ST.hasEGPR() && !ST.hasNDD()) return false; bool Changed = false; - /// Go over all basic blocks in function and replace - /// EVEX encoded instrs by VEX encoding when possible. for (MachineBasicBlock &MBB : MF) { // Traverse the basic block. for (MachineInstr &MI : MBB) - Changed |= CompressEvexToVexImpl(MI, ST); + Changed |= CompressEVEXImpl(MI, ST); } return Changed; } -INITIALIZE_PASS(EvexToVexInstPass, EVEX2VEX_NAME, EVEX2VEX_DESC, false, false) +INITIALIZE_PASS(CompressEVEXPass, COMP_EVEX_NAME, COMP_EVEX_DESC, false, false) -FunctionPass *llvm::createX86EvexToVexInsts() { - return new EvexToVexInstPass(); +FunctionPass *llvm::createX86CompressEVEXPass() { + return new CompressEVEXPass(); } diff --git a/llvm/lib/Target/X86/X86DomainReassignment.cpp b/llvm/lib/Target/X86/X86DomainReassignment.cpp index bdd86e48fa54..20dbaf797e32 100644 --- a/llvm/lib/Target/X86/X86DomainReassignment.cpp +++ b/llvm/lib/Target/X86/X86DomainReassignment.cpp @@ -619,40 +619,30 @@ void X86DomainReassignment::initConverters() { std::make_unique<InstrReplacerDstCOPY>(From, To); }; - bool HasEGPR = STI->hasEGPR(); - createReplacerDstCOPY(X86::MOVZX32rm16, - HasEGPR ? X86::KMOVWkm_EVEX : X86::KMOVWkm); - createReplacerDstCOPY(X86::MOVZX64rm16, - HasEGPR ? X86::KMOVWkm_EVEX : X86::KMOVWkm); +#define GET_EGPR_IF_ENABLED(OPC) STI->hasEGPR() ? OPC##_EVEX : OPC + createReplacerDstCOPY(X86::MOVZX32rm16, GET_EGPR_IF_ENABLED(X86::KMOVWkm)); + createReplacerDstCOPY(X86::MOVZX64rm16, GET_EGPR_IF_ENABLED(X86::KMOVWkm)); - createReplacerDstCOPY(X86::MOVZX32rr16, - HasEGPR ? X86::KMOVWkk_EVEX : X86::KMOVWkk); - createReplacerDstCOPY(X86::MOVZX64rr16, - HasEGPR ? X86::KMOVWkk_EVEX : X86::KMOVWkk); + createReplacerDstCOPY(X86::MOVZX32rr16, GET_EGPR_IF_ENABLED(X86::KMOVWkk)); + createReplacerDstCOPY(X86::MOVZX64rr16, GET_EGPR_IF_ENABLED(X86::KMOVWkk)); if (STI->hasDQI()) { - createReplacerDstCOPY(X86::MOVZX16rm8, - HasEGPR ? X86::KMOVBkm_EVEX : X86::KMOVBkm); - createReplacerDstCOPY(X86::MOVZX32rm8, - HasEGPR ? X86::KMOVBkm_EVEX : X86::KMOVBkm); - createReplacerDstCOPY(X86::MOVZX64rm8, - HasEGPR ? X86::KMOVBkm_EVEX : X86::KMOVBkm); - - createReplacerDstCOPY(X86::MOVZX16rr8, - HasEGPR ? X86::KMOVBkk_EVEX : X86::KMOVBkk); - createReplacerDstCOPY(X86::MOVZX32rr8, - HasEGPR ? X86::KMOVBkk_EVEX : X86::KMOVBkk); - createReplacerDstCOPY(X86::MOVZX64rr8, - HasEGPR ? X86::KMOVBkk_EVEX : X86::KMOVBkk); + createReplacerDstCOPY(X86::MOVZX16rm8, GET_EGPR_IF_ENABLED(X86::KMOVBkm)); + createReplacerDstCOPY(X86::MOVZX32rm8, GET_EGPR_IF_ENABLED(X86::KMOVBkm)); + createReplacerDstCOPY(X86::MOVZX64rm8, GET_EGPR_IF_ENABLED(X86::KMOVBkm)); + + createReplacerDstCOPY(X86::MOVZX16rr8, GET_EGPR_IF_ENABLED(X86::KMOVBkk)); + createReplacerDstCOPY(X86::MOVZX32rr8, GET_EGPR_IF_ENABLED(X86::KMOVBkk)); + createReplacerDstCOPY(X86::MOVZX64rr8, GET_EGPR_IF_ENABLED(X86::KMOVBkk)); } auto createReplacer = [&](unsigned From, unsigned To) { Converters[{MaskDomain, From}] = std::make_unique<InstrReplacer>(From, To); }; - createReplacer(X86::MOV16rm, HasEGPR ? X86::KMOVWkm_EVEX : X86::KMOVWkm); - createReplacer(X86::MOV16mr, HasEGPR ? X86::KMOVWmk_EVEX : X86::KMOVWmk); - createReplacer(X86::MOV16rr, HasEGPR ? X86::KMOVWkk_EVEX : X86::KMOVWkk); + createReplacer(X86::MOV16rm, GET_EGPR_IF_ENABLED(X86::KMOVWkm)); + createReplacer(X86::MOV16mr, GET_EGPR_IF_ENABLED(X86::KMOVWmk)); + createReplacer(X86::MOV16rr, GET_EGPR_IF_ENABLED(X86::KMOVWkk)); createReplacer(X86::SHR16ri, X86::KSHIFTRWri); createReplacer(X86::SHL16ri, X86::KSHIFTLWri); createReplacer(X86::NOT16r, X86::KNOTWrr); @@ -661,14 +651,14 @@ void X86DomainReassignment::initConverters() { createReplacer(X86::XOR16rr, X86::KXORWrr); if (STI->hasBWI()) { - createReplacer(X86::MOV32rm, HasEGPR ? X86::KMOVDkm_EVEX : X86::KMOVDkm); - createReplacer(X86::MOV64rm, HasEGPR ? X86::KMOVQkm_EVEX : X86::KMOVQkm); + createReplacer(X86::MOV32rm, GET_EGPR_IF_ENABLED(X86::KMOVDkm)); + createReplacer(X86::MOV64rm, GET_EGPR_IF_ENABLED(X86::KMOVQkm)); - createReplacer(X86::MOV32mr, HasEGPR ? X86::KMOVDmk_EVEX : X86::KMOVDmk); - createReplacer(X86::MOV64mr, HasEGPR ? X86::KMOVQmk_EVEX : X86::KMOVQmk); + createReplacer(X86::MOV32mr, GET_EGPR_IF_ENABLED(X86::KMOVDmk)); + createReplacer(X86::MOV64mr, GET_EGPR_IF_ENABLED(X86::KMOVQmk)); - createReplacer(X86::MOV32rr, HasEGPR ? X86::KMOVDkk_EVEX : X86::KMOVDkk); - createReplacer(X86::MOV64rr, HasEGPR ? X86::KMOVQkk_EVEX : X86::KMOVQkk); + createReplacer(X86::MOV32rr, GET_EGPR_IF_ENABLED(X86::KMOVDkk)); + createReplacer(X86::MOV64rr, GET_EGPR_IF_ENABLED(X86::KMOVQkk)); createReplacer(X86::SHR32ri, X86::KSHIFTRDri); createReplacer(X86::SHR64ri, X86::KSHIFTRQri); @@ -696,8 +686,8 @@ void X86DomainReassignment::initConverters() { // TODO: KTEST is not a replacement for TEST due to flag differences. Need // to prove only Z flag is used. - //createReplacer(X86::TEST32rr, X86::KTESTDrr); - //createReplacer(X86::TEST64rr, X86::KTESTQrr); + // createReplacer(X86::TEST32rr, X86::KTESTDrr); + // createReplacer(X86::TEST64rr, X86::KTESTQrr); } if (STI->hasDQI()) { @@ -706,9 +696,9 @@ void X86DomainReassignment::initConverters() { createReplacer(X86::AND8rr, X86::KANDBrr); - createReplacer(X86::MOV8rm, HasEGPR ? X86::KMOVBkm_EVEX : X86::KMOVBkm); - createReplacer(X86::MOV8mr, HasEGPR ? X86::KMOVBmk_EVEX : X86::KMOVBmk); - createReplacer(X86::MOV8rr, HasEGPR ? X86::KMOVBkk_EVEX : X86::KMOVBkk); + createReplacer(X86::MOV8rm, GET_EGPR_IF_ENABLED(X86::KMOVBkm)); + createReplacer(X86::MOV8mr, GET_EGPR_IF_ENABLED(X86::KMOVBmk)); + createReplacer(X86::MOV8rr, GET_EGPR_IF_ENABLED(X86::KMOVBkk)); createReplacer(X86::NOT8r, X86::KNOTBrr); @@ -719,11 +709,12 @@ void X86DomainReassignment::initConverters() { // TODO: KTEST is not a replacement for TEST due to flag differences. Need // to prove only Z flag is used. - //createReplacer(X86::TEST8rr, X86::KTESTBrr); - //createReplacer(X86::TEST16rr, X86::KTESTWrr); + // createReplacer(X86::TEST8rr, X86::KTESTBrr); + // createReplacer(X86::TEST16rr, X86::KTESTWrr); createReplacer(X86::XOR8rr, X86::KXORBrr); } +#undef GET_EGPR_IF_ENABLED } bool X86DomainReassignment::runOnMachineFunction(MachineFunction &MF) { diff --git a/llvm/lib/Target/X86/X86FastISel.cpp b/llvm/lib/Target/X86/X86FastISel.cpp index 0ba31e173a1a..1ce1e6f6a563 100644 --- a/llvm/lib/Target/X86/X86FastISel.cpp +++ b/llvm/lib/Target/X86/X86FastISel.cpp @@ -916,7 +916,7 @@ redo_gep: // A array/variable index is always of the form i*S where S is the // constant scale size. See if we can push the scale into immediates. - uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType()); + uint64_t S = GTI.getSequentialElementStride(DL); for (;;) { if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) { // Constant-offset addressing. @@ -3046,22 +3046,24 @@ bool X86FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) { switch (II->getIntrinsicID()) { default: llvm_unreachable("Unexpected intrinsic."); +#define GET_EGPR_IF_ENABLED(OPC) Subtarget->hasEGPR() ? OPC##_EVEX : OPC case Intrinsic::x86_sse42_crc32_32_8: - Opc = X86::CRC32r32r8; + Opc = GET_EGPR_IF_ENABLED(X86::CRC32r32r8); RC = &X86::GR32RegClass; break; case Intrinsic::x86_sse42_crc32_32_16: - Opc = X86::CRC32r32r16; + Opc = GET_EGPR_IF_ENABLED(X86::CRC32r32r16); RC = &X86::GR32RegClass; break; case Intrinsic::x86_sse42_crc32_32_32: - Opc = X86::CRC32r32r32; + Opc = GET_EGPR_IF_ENABLED(X86::CRC32r32r32); RC = &X86::GR32RegClass; break; case Intrinsic::x86_sse42_crc32_64_64: - Opc = X86::CRC32r64r64; + Opc = GET_EGPR_IF_ENABLED(X86::CRC32r64r64); RC = &X86::GR64RegClass; break; +#undef GET_EGPR_IF_ENABLED } const Value *LHS = II->getArgOperand(0); diff --git a/llvm/lib/Target/X86/X86FlagsCopyLowering.cpp b/llvm/lib/Target/X86/X86FlagsCopyLowering.cpp index b13bf361ab79..aad839b83ee1 100644 --- a/llvm/lib/Target/X86/X86FlagsCopyLowering.cpp +++ b/llvm/lib/Target/X86/X86FlagsCopyLowering.cpp @@ -173,7 +173,6 @@ static FlagArithMnemonic getMnemonicFromOpcode(unsigned Opcode) { #define LLVM_EXPAND_ADC_SBB_INSTR(MNEMONIC) \ LLVM_EXPAND_INSTR_SIZES(MNEMONIC, rr) \ - LLVM_EXPAND_INSTR_SIZES(MNEMONIC, rr_REV) \ LLVM_EXPAND_INSTR_SIZES(MNEMONIC, rm) \ LLVM_EXPAND_INSTR_SIZES(MNEMONIC, mr) \ case X86::MNEMONIC##8ri: \ diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp index 73b10cf3067e..53ce720be2da 100644 --- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -2852,7 +2852,7 @@ bool X86DAGToDAGISel::selectVectorAddr(MemSDNode *Parent, SDValue BasePtr, SDValue &Index, SDValue &Disp, SDValue &Segment) { X86ISelAddressMode AM; - AM.Scale = cast<ConstantSDNode>(ScaleOp)->getZExtValue(); + AM.Scale = ScaleOp->getAsZExtVal(); // Attempt to match index patterns, as long as we're not relying on implicit // sign-extension, which is performed BEFORE scale. diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 1e4b1361f98a..5a28240ea9e2 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -7371,7 +7371,7 @@ static SDValue lowerBuildVectorAsBroadcast(BuildVectorSDNode *BVOp, /// index. static int getUnderlyingExtractedFromVec(SDValue &ExtractedFromVec, SDValue ExtIdx) { - int Idx = cast<ConstantSDNode>(ExtIdx)->getZExtValue(); + int Idx = ExtIdx->getAsZExtVal(); if (!isa<ShuffleVectorSDNode>(ExtractedFromVec)) return Idx; @@ -7475,10 +7475,12 @@ static SDValue buildFromShuffleMostly(SDValue Op, SelectionDAG &DAG) { static SDValue LowerBUILD_VECTORvXbf16(SDValue Op, SelectionDAG &DAG, const X86Subtarget &Subtarget) { MVT VT = Op.getSimpleValueType(); - MVT IVT = VT.changeVectorElementTypeToInteger(); + MVT IVT = + VT.changeVectorElementType(Subtarget.hasFP16() ? MVT::f16 : MVT::i16); SmallVector<SDValue, 16> NewOps; for (unsigned I = 0, E = Op.getNumOperands(); I != E; ++I) - NewOps.push_back(DAG.getBitcast(MVT::i16, Op.getOperand(I))); + NewOps.push_back(DAG.getBitcast(Subtarget.hasFP16() ? MVT::f16 : MVT::i16, + Op.getOperand(I))); SDValue Res = DAG.getNode(ISD::BUILD_VECTOR, SDLoc(), IVT, NewOps); return DAG.getBitcast(VT, Res); } @@ -8793,7 +8795,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { MachineFunction &MF = DAG.getMachineFunction(); MachinePointerInfo MPI = MachinePointerInfo::getConstantPool(MF); SDValue Ld = DAG.getLoad(VT, dl, DAG.getEntryNode(), LegalDAGConstVec, MPI); - unsigned InsertC = cast<ConstantSDNode>(InsIndex)->getZExtValue(); + unsigned InsertC = InsIndex->getAsZExtVal(); unsigned NumEltsInLow128Bits = 128 / VT.getScalarSizeInBits(); if (InsertC < NumEltsInLow128Bits) return DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Ld, VarElt, InsIndex); @@ -14369,6 +14371,13 @@ static SDValue lower128BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, const APInt &Zeroable, const X86Subtarget &Subtarget, SelectionDAG &DAG) { + if (VT == MVT::v8bf16) { + V1 = DAG.getBitcast(MVT::v8i16, V1); + V2 = DAG.getBitcast(MVT::v8i16, V2); + return DAG.getBitcast(VT, + DAG.getVectorShuffle(MVT::v8i16, DL, V1, V2, Mask)); + } + switch (VT.SimpleTy) { case MVT::v2i64: return lowerV2I64Shuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG); @@ -17096,14 +17105,14 @@ static SDValue lower512BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, return splitAndLowerShuffle(DL, VT, V1, V2, Mask, DAG, /*SimpleOnly*/ false); } - if (VT == MVT::v32f16) { + if (VT == MVT::v32f16 || VT == MVT::v32bf16) { if (!Subtarget.hasBWI()) return splitAndLowerShuffle(DL, VT, V1, V2, Mask, DAG, /*SimpleOnly*/ false); V1 = DAG.getBitcast(MVT::v32i16, V1); V2 = DAG.getBitcast(MVT::v32i16, V2); - return DAG.getBitcast(MVT::v32f16, + return DAG.getBitcast(VT, DAG.getVectorShuffle(MVT::v32i16, DL, V1, V2, Mask)); } @@ -17747,7 +17756,7 @@ static SDValue LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG) { DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, DAG.getBitcast(MVT::v4i32, Vec), Idx)); - unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue(); + unsigned IdxVal = Idx->getAsZExtVal(); SDValue Extract = DAG.getNode(X86ISD::PEXTRB, dl, MVT::i32, Vec, DAG.getTargetConstant(IdxVal, dl, MVT::i8)); return DAG.getNode(ISD::TRUNCATE, dl, VT, Extract); @@ -21515,9 +21524,8 @@ SDValue X86TargetLowering::LowerFP_TO_BF16(SDValue Op, RTLIB::Libcall LC = RTLIB::getFPROUND(Op.getOperand(0).getValueType(), MVT::bf16); SDValue Res = - makeLibCall(DAG, LC, MVT::f32, Op.getOperand(0), CallOptions, DL).first; - return DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, - DAG.getBitcast(MVT::i32, Res)); + makeLibCall(DAG, LC, MVT::f16, Op.getOperand(0), CallOptions, DL).first; + return DAG.getBitcast(MVT::i16, Res); } /// Depending on uarch and/or optimizing for size, we might prefer to use a @@ -24061,7 +24069,7 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { // a >= b ? -1 : 0 -> RES = setcc_carry // a >= b ? 0 : -1 -> RES = ~setcc_carry if (Cond.getOpcode() == X86ISD::SUB) { - unsigned CondCode = cast<ConstantSDNode>(CC)->getZExtValue(); + unsigned CondCode = CC->getAsZExtVal(); if ((CondCode == X86::COND_AE || CondCode == X86::COND_B) && (isAllOnesConstant(Op1) || isAllOnesConstant(Op2)) && @@ -25359,8 +25367,7 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, if (IntrData->Type == INTR_TYPE_3OP_IMM8 && Src3.getValueType() != MVT::i8) { - Src3 = DAG.getTargetConstant( - cast<ConstantSDNode>(Src3)->getZExtValue() & 0xff, dl, MVT::i8); + Src3 = DAG.getTargetConstant(Src3->getAsZExtVal() & 0xff, dl, MVT::i8); } // We specify 2 possible opcodes for intrinsics with rounding modes. @@ -25385,8 +25392,7 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, assert(Op.getOperand(4)->getOpcode() == ISD::TargetConstant); SDValue Src4 = Op.getOperand(4); if (Src4.getValueType() != MVT::i8) { - Src4 = DAG.getTargetConstant( - cast<ConstantSDNode>(Src4)->getZExtValue() & 0xff, dl, MVT::i8); + Src4 = DAG.getTargetConstant(Src4->getAsZExtVal() & 0xff, dl, MVT::i8); } return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), @@ -26788,7 +26794,7 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget, {Chain, Op1, Op2, Size}, VT, MMO); Chain = Res.getValue(1); Res = DAG.getZExtOrTrunc(getSETCC(X86::COND_B, Res, DL, DAG), DL, VT); - unsigned Imm = cast<ConstantSDNode>(Op2)->getZExtValue(); + unsigned Imm = Op2->getAsZExtVal(); if (Imm) Res = DAG.getNode(ISD::SHL, DL, VT, Res, DAG.getShiftAmountConstant(Imm, VT, DL)); @@ -40221,6 +40227,34 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG, } return SDValue(); } + case X86ISD::SHUF128: { + // If we're permuting the upper 256-bits subvectors of a concatenation, then + // see if we can peek through and access the subvector directly. + if (VT.is512BitVector()) { + // 512-bit mask uses 4 x i2 indices - if the msb is always set then only the + // upper subvector is used. + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + uint64_t Mask = N->getConstantOperandVal(2); + SmallVector<SDValue> LHSOps, RHSOps; + SDValue NewLHS, NewRHS; + if ((Mask & 0x0A) == 0x0A && + collectConcatOps(LHS.getNode(), LHSOps, DAG) && LHSOps.size() == 2) { + NewLHS = widenSubVector(LHSOps[1], false, Subtarget, DAG, DL, 512); + Mask &= ~0x0A; + } + if ((Mask & 0xA0) == 0xA0 && + collectConcatOps(RHS.getNode(), RHSOps, DAG) && RHSOps.size() == 2) { + NewRHS = widenSubVector(RHSOps[1], false, Subtarget, DAG, DL, 512); + Mask &= ~0xA0; + } + if (NewLHS || NewRHS) + return DAG.getNode(X86ISD::SHUF128, DL, VT, NewLHS ? NewLHS : LHS, + NewRHS ? NewRHS : RHS, + DAG.getTargetConstant(Mask, DL, MVT::i8)); + } + return SDValue(); + } case X86ISD::VPERM2X128: { // Fold vperm2x128(bitcast(x),bitcast(y),c) -> bitcast(vperm2x128(x,y,c)). SDValue LHS = N->getOperand(0); @@ -41320,6 +41354,20 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode( return TLO.CombineTo(Op, Src); break; } + case X86ISD::VZEXT_LOAD: { + // If upper demanded elements are not demanded then simplify to a + // scalar_to_vector(load()). + MVT SVT = VT.getSimpleVT().getVectorElementType(); + if (DemandedElts == 1 && Op.getValue(1).use_empty() && isTypeLegal(SVT)) { + SDLoc DL(Op); + auto *Mem = cast<MemSDNode>(Op); + SDValue Elt = TLO.DAG.getLoad(SVT, DL, Mem->getChain(), Mem->getBasePtr(), + Mem->getMemOperand()); + SDValue Vec = TLO.DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, Elt); + return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Vec)); + } + break; + } case X86ISD::VBROADCAST: { SDValue Src = Op.getOperand(0); MVT SrcVT = Src.getSimpleValueType(); @@ -41795,7 +41843,7 @@ bool X86TargetLowering::SimplifyDemandedBitsForTargetNode( SDValue Op0 = Op.getOperand(0); SDValue Op1 = Op.getOperand(1); - unsigned ShAmt = cast<ConstantSDNode>(Op1)->getZExtValue(); + unsigned ShAmt = Op1->getAsZExtVal(); if (ShAmt >= BitWidth) break; @@ -42580,7 +42628,7 @@ static SDValue combinevXi1ConstantToInteger(SDValue Op, SelectionDAG &DAG) { APInt Imm(SrcVT.getVectorNumElements(), 0); for (unsigned Idx = 0, e = Op.getNumOperands(); Idx < e; ++Idx) { SDValue In = Op.getOperand(Idx); - if (!In.isUndef() && (cast<ConstantSDNode>(In)->getZExtValue() & 0x1)) + if (!In.isUndef() && (In->getAsZExtVal() & 0x1)) Imm.setBit(Idx); } EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), Imm.getBitWidth()); @@ -49931,18 +49979,17 @@ static SDValue combineLoad(SDNode *N, SelectionDAG &DAG, SDValue Ptr = Ld->getBasePtr(); SDValue Chain = Ld->getChain(); for (SDNode *User : Chain->uses()) { - if (User != N && + auto *UserLd = dyn_cast<MemSDNode>(User); + if (User != N && UserLd && (User->getOpcode() == X86ISD::SUBV_BROADCAST_LOAD || User->getOpcode() == X86ISD::VBROADCAST_LOAD || ISD::isNormalLoad(User)) && - cast<MemSDNode>(User)->getChain() == Chain && - !User->hasAnyUseOfValue(1) && + UserLd->getChain() == Chain && !User->hasAnyUseOfValue(1) && User->getValueSizeInBits(0).getFixedValue() > RegVT.getFixedSizeInBits()) { if (User->getOpcode() == X86ISD::SUBV_BROADCAST_LOAD && - cast<MemSDNode>(User)->getBasePtr() == Ptr && - cast<MemSDNode>(User)->getMemoryVT().getSizeInBits() == - MemVT.getSizeInBits()) { + UserLd->getBasePtr() == Ptr && + UserLd->getMemoryVT().getSizeInBits() == MemVT.getSizeInBits()) { SDValue Extract = extractSubVector(SDValue(User, 0), 0, DAG, SDLoc(N), RegVT.getSizeInBits()); Extract = DAG.getBitcast(RegVT, Extract); @@ -49961,7 +50008,7 @@ static SDValue combineLoad(SDNode *N, SelectionDAG &DAG, // See if we are loading a constant that matches in the lower // bits of a longer constant (but from a different constant pool ptr). EVT UserVT = User->getValueType(0); - SDValue UserPtr = cast<MemSDNode>(User)->getBasePtr(); + SDValue UserPtr = UserLd->getBasePtr(); const Constant *LdC = getTargetConstantFromBasePtr(Ptr); const Constant *UserC = getTargetConstantFromBasePtr(UserPtr); if (LdC && UserC && UserPtr != Ptr) { @@ -53258,7 +53305,7 @@ static SDValue combineGatherScatter(SDNode *N, SelectionDAG &DAG, if (Index.getOpcode() == ISD::ADD && Index.getValueType().getVectorElementType() == PtrVT && isa<ConstantSDNode>(Scale)) { - uint64_t ScaleAmt = cast<ConstantSDNode>(Scale)->getZExtValue(); + uint64_t ScaleAmt = Scale->getAsZExtVal(); if (auto *BV = dyn_cast<BuildVectorSDNode>(Index.getOperand(1))) { BitVector UndefElts; if (ConstantSDNode *C = BV->getConstantSplatNode(&UndefElts)) { @@ -54572,6 +54619,14 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT, Op0.getValueType() == cast<MemSDNode>(SrcVec)->getMemoryVT()) return Op0.getOperand(0); } + + // concat_vectors(permq(x),permq(x)) -> permq(concat_vectors(x,x)) + if (Op0.getOpcode() == X86ISD::VPERMI && Subtarget.useAVX512Regs() && + !X86::mayFoldLoad(Op0.getOperand(0), Subtarget)) + return DAG.getNode(Op0.getOpcode(), DL, VT, + DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, + Op0.getOperand(0), Op0.getOperand(0)), + Op0.getOperand(1)); } // concat(extract_subvector(v0,c0), extract_subvector(v1,c1)) -> vperm2x128. @@ -54979,6 +55034,19 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT, ConcatSubOperand(VT, Ops, 1), Op0.getOperand(2)); } break; + case X86ISD::BLENDI: + if (NumOps == 2 && VT.is512BitVector() && Subtarget.useBWIRegs()) { + uint64_t Mask0 = Ops[0].getConstantOperandVal(2); + uint64_t Mask1 = Ops[1].getConstantOperandVal(2); + uint64_t Mask = (Mask1 << (VT.getVectorNumElements() / 2)) | Mask0; + MVT MaskSVT = MVT::getIntegerVT(VT.getVectorNumElements()); + MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorNumElements()); + SDValue Sel = + DAG.getBitcast(MaskVT, DAG.getConstant(Mask, DL, MaskSVT)); + return DAG.getSelect(DL, VT, Sel, ConcatSubOperand(VT, Ops, 1), + ConcatSubOperand(VT, Ops, 0)); + } + break; case ISD::VSELECT: if (!IsSplat && Subtarget.hasAVX512() && (VT.is256BitVector() || @@ -57602,7 +57670,7 @@ X86TargetLowering::getStackProbeSize(const MachineFunction &MF) const { } Align X86TargetLowering::getPrefLoopAlignment(MachineLoop *ML) const { - if (ML->isInnermost() && + if (ML && ML->isInnermost() && ExperimentalPrefInnermostLoopAlignment.getNumOccurrences()) return Align(1ULL << ExperimentalPrefInnermostLoopAlignment); return TargetLowering::getPrefLoopAlignment(); diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h index 9bd1622cb0d3..32745400a38b 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -1714,16 +1714,6 @@ namespace llvm { MachineBasicBlock *Entry, const SmallVectorImpl<MachineBasicBlock *> &Exits) const override; - bool splitValueIntoRegisterParts( - SelectionDAG & DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, - unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) - const override; - - SDValue joinRegisterPartsIntoValue( - SelectionDAG & DAG, const SDLoc &DL, const SDValue *Parts, - unsigned NumParts, MVT PartVT, EVT ValueVT, - std::optional<CallingConv::ID> CC) const override; - bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override; bool mayBeEmittedAsTailCall(const CallInst *CI) const override; diff --git a/llvm/lib/Target/X86/X86ISelLoweringCall.cpp b/llvm/lib/Target/X86/X86ISelLoweringCall.cpp index b8b5421b9005..d75bd4171fde 100644 --- a/llvm/lib/Target/X86/X86ISelLoweringCall.cpp +++ b/llvm/lib/Target/X86/X86ISelLoweringCall.cpp @@ -127,6 +127,9 @@ MVT X86TargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context, return getRegisterTypeForCallingConv(Context, CC, VT.changeVectorElementType(MVT::f16)); + if (VT == MVT::bf16) + return MVT::f16; + return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT); } @@ -421,40 +424,6 @@ unsigned X86TargetLowering::getJumpTableEncoding() const { return TargetLowering::getJumpTableEncoding(); } -bool X86TargetLowering::splitValueIntoRegisterParts( - SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, - unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const { - bool IsABIRegCopy = CC.has_value(); - EVT ValueVT = Val.getValueType(); - if (IsABIRegCopy && ValueVT == MVT::bf16 && PartVT == MVT::f32) { - unsigned ValueBits = ValueVT.getSizeInBits(); - unsigned PartBits = PartVT.getSizeInBits(); - Val = DAG.getNode(ISD::BITCAST, DL, MVT::getIntegerVT(ValueBits), Val); - Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::getIntegerVT(PartBits), Val); - Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val); - Parts[0] = Val; - return true; - } - return false; -} - -SDValue X86TargetLowering::joinRegisterPartsIntoValue( - SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts, - MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const { - bool IsABIRegCopy = CC.has_value(); - if (IsABIRegCopy && ValueVT == MVT::bf16 && PartVT == MVT::f32) { - unsigned ValueBits = ValueVT.getSizeInBits(); - unsigned PartBits = PartVT.getSizeInBits(); - SDValue Val = Parts[0]; - - Val = DAG.getNode(ISD::BITCAST, DL, MVT::getIntegerVT(PartBits), Val); - Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::getIntegerVT(ValueBits), Val); - Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val); - return Val; - } - return SDValue(); -} - bool X86TargetLowering::useSoftFloat() const { return Subtarget.useSoftFloat(); } diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index c3a673f97d34..fe7d90fbcdf7 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -448,7 +448,7 @@ multiclass vinsert_for_type<ValueType EltVT32, int Opcode128, X86VectorVTInfo< 2, EltVT64, VR128X>, X86VectorVTInfo< 4, EltVT64, VR256X>, null_frag, vinsert128_insert, sched>, - VEX_W1X, EVEX_V256; + EVEX_V256, REX_W; // Even with DQI we'd like to only use these instructions for masking. let Predicates = [HasDQI] in { @@ -750,7 +750,7 @@ multiclass vextract_for_type<ValueType EltVT32, int Opcode128, X86VectorVTInfo< 4, EltVT64, VR256X>, X86VectorVTInfo< 2, EltVT64, VR128X>, null_frag, vextract128_extract, SchedRR, SchedMR>, - VEX_W1X, EVEX_V256, EVEX_CD8<64, CD8VT2>; + EVEX_V256, EVEX_CD8<64, CD8VT2>, REX_W; // Even with DQI we'd like to only use these instructions for masking. let Predicates = [HasDQI] in { @@ -1161,7 +1161,7 @@ multiclass avx512_fp_broadcast_ss<bits<8> opc, string OpcodeStr, defm VBROADCASTSS : avx512_fp_broadcast_ss<0x18, "vbroadcastss", avx512vl_f32_info>; defm VBROADCASTSD : avx512_fp_broadcast_sd<0x19, "vbroadcastsd", - avx512vl_f64_info>, VEX_W1X; + avx512vl_f64_info>, REX_W; multiclass avx512_int_broadcast_reg<bits<8> opc, SchedWrite SchedRR, X86VectorVTInfo _, SDPatternOperator OpNode, @@ -1267,7 +1267,7 @@ defm VPBROADCASTW : avx512_int_broadcast_rm_vl<0x79, "vpbroadcastw", defm VPBROADCASTD : avx512_int_broadcast_rm_vl<0x58, "vpbroadcastd", avx512vl_i32_info, HasAVX512, 1>; defm VPBROADCASTQ : avx512_int_broadcast_rm_vl<0x59, "vpbroadcastq", - avx512vl_i64_info, HasAVX512, 1>, VEX_W1X; + avx512vl_i64_info, HasAVX512, 1>, REX_W; multiclass avx512_subvec_broadcast_rm<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, @@ -1460,11 +1460,11 @@ let Predicates = [HasBF16, HasVLX] in let Predicates = [HasVLX, HasDQI] in { defm VBROADCASTI64X2Z128 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2", - X86SubVBroadcastld128, v4i64x_info, v2i64x_info>, VEX_W1X, - EVEX_V256, EVEX_CD8<64, CD8VT2>; + X86SubVBroadcastld128, v4i64x_info, v2i64x_info>, + EVEX_V256, EVEX_CD8<64, CD8VT2>, REX_W; defm VBROADCASTF64X2Z128 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2", - X86SubVBroadcastld128, v4f64x_info, v2f64x_info>, VEX_W1X, - EVEX_V256, EVEX_CD8<64, CD8VT2>; + X86SubVBroadcastld128, v4f64x_info, v2f64x_info>, + EVEX_V256, EVEX_CD8<64, CD8VT2>, REX_W; // Patterns for selects of bitcasted operations. def : Pat<(vselect_mask VK4WM:$mask, @@ -3185,15 +3185,13 @@ defm : operation_subvector_mask_lowering<VK32, v32i1, VK64, v64i1>; multiclass avx512_load<bits<8> opc, string OpcodeStr, string Name, X86VectorVTInfo _, PatFrag ld_frag, PatFrag mload, - X86SchedWriteMoveLS Sched, string EVEX2VEXOvrd, - bit NoRMPattern = 0, + X86SchedWriteMoveLS Sched, bit NoRMPattern = 0, SDPatternOperator SelectOprr = vselect> { let hasSideEffects = 0 in { let isMoveReg = 1 in def rr : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst), (ins _.RC:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [], - _.ExeDomain>, EVEX, Sched<[Sched.RR]>, - EVEX2VEXOverride<EVEX2VEXOvrd#"rr">; + _.ExeDomain>, EVEX, Sched<[Sched.RR]>; def rrkz : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst), (ins _.KRCWM:$mask, _.RC:$src), !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|", @@ -3209,8 +3207,7 @@ multiclass avx512_load<bits<8> opc, string OpcodeStr, string Name, !if(NoRMPattern, [], [(set _.RC:$dst, (_.VT (ld_frag addr:$src)))]), - _.ExeDomain>, EVEX, Sched<[Sched.RM]>, - EVEX2VEXOverride<EVEX2VEXOvrd#"rm">; + _.ExeDomain>, EVEX, Sched<[Sched.RM]>; let Constraints = "$src0 = $dst", isConvertibleToThreeAddress = 1 in { def rrk : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst), @@ -3253,53 +3250,48 @@ multiclass avx512_load<bits<8> opc, string OpcodeStr, string Name, multiclass avx512_alignedload_vl<bits<8> opc, string OpcodeStr, AVX512VLVectorVTInfo _, Predicate prd, X86SchedWriteMoveLSWidths Sched, - string EVEX2VEXOvrd, bit NoRMPattern = 0> { + bit NoRMPattern = 0> { let Predicates = [prd] in defm Z : avx512_load<opc, OpcodeStr, NAME, _.info512, _.info512.AlignedLdFrag, masked_load_aligned, - Sched.ZMM, "", NoRMPattern>, EVEX_V512; + Sched.ZMM, NoRMPattern>, EVEX_V512; let Predicates = [prd, HasVLX] in { defm Z256 : avx512_load<opc, OpcodeStr, NAME, _.info256, _.info256.AlignedLdFrag, masked_load_aligned, - Sched.YMM, EVEX2VEXOvrd#"Y", NoRMPattern>, EVEX_V256; + Sched.YMM, NoRMPattern>, EVEX_V256; defm Z128 : avx512_load<opc, OpcodeStr, NAME, _.info128, _.info128.AlignedLdFrag, masked_load_aligned, - Sched.XMM, EVEX2VEXOvrd, NoRMPattern>, EVEX_V128; + Sched.XMM, NoRMPattern>, EVEX_V128; } } multiclass avx512_load_vl<bits<8> opc, string OpcodeStr, AVX512VLVectorVTInfo _, Predicate prd, X86SchedWriteMoveLSWidths Sched, - string EVEX2VEXOvrd, bit NoRMPattern = 0, + bit NoRMPattern = 0, SDPatternOperator SelectOprr = vselect> { let Predicates = [prd] in defm Z : avx512_load<opc, OpcodeStr, NAME, _.info512, _.info512.LdFrag, - masked_load, Sched.ZMM, "", - NoRMPattern, SelectOprr>, EVEX_V512; + masked_load, Sched.ZMM, NoRMPattern, SelectOprr>, EVEX_V512; let Predicates = [prd, HasVLX] in { defm Z256 : avx512_load<opc, OpcodeStr, NAME, _.info256, _.info256.LdFrag, - masked_load, Sched.YMM, EVEX2VEXOvrd#"Y", - NoRMPattern, SelectOprr>, EVEX_V256; + masked_load, Sched.YMM, NoRMPattern, SelectOprr>, EVEX_V256; defm Z128 : avx512_load<opc, OpcodeStr, NAME, _.info128, _.info128.LdFrag, - masked_load, Sched.XMM, EVEX2VEXOvrd, - NoRMPattern, SelectOprr>, EVEX_V128; + masked_load, Sched.XMM, NoRMPattern, SelectOprr>, EVEX_V128; } } multiclass avx512_store<bits<8> opc, string OpcodeStr, string BaseName, X86VectorVTInfo _, PatFrag st_frag, PatFrag mstore, - X86SchedWriteMoveLS Sched, string EVEX2VEXOvrd, - bit NoMRPattern = 0> { + X86SchedWriteMoveLS Sched, bit NoMRPattern = 0> { let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in { let isMoveReg = 1 in def rr_REV : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst), (ins _.RC:$src), OpcodeStr # "\t{$src, $dst|$dst, $src}", [], _.ExeDomain>, EVEX, - Sched<[Sched.RR]>, - EVEX2VEXOverride<EVEX2VEXOvrd#"rr_REV">; + Sched<[Sched.RR]>; def rrk_REV : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst), (ins _.KRCWM:$mask, _.RC:$src), OpcodeStr # "\t{$src, ${dst} {${mask}}|"# @@ -3319,8 +3311,7 @@ multiclass avx512_store<bits<8> opc, string OpcodeStr, string BaseName, !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), !if(NoMRPattern, [], [(st_frag (_.VT _.RC:$src), addr:$dst)]), - _.ExeDomain>, EVEX, Sched<[Sched.MR]>, - EVEX2VEXOverride<EVEX2VEXOvrd#"mr">; + _.ExeDomain>, EVEX, Sched<[Sched.MR]>; def mrk : AVX512PI<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src), OpcodeStr # "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}", @@ -3344,102 +3335,92 @@ multiclass avx512_store<bits<8> opc, string OpcodeStr, string BaseName, multiclass avx512_store_vl< bits<8> opc, string OpcodeStr, AVX512VLVectorVTInfo _, Predicate prd, X86SchedWriteMoveLSWidths Sched, - string EVEX2VEXOvrd, bit NoMRPattern = 0> { + bit NoMRPattern = 0> { let Predicates = [prd] in defm Z : avx512_store<opc, OpcodeStr, NAME, _.info512, store, - masked_store, Sched.ZMM, "", - NoMRPattern>, EVEX_V512; + masked_store, Sched.ZMM, NoMRPattern>, EVEX_V512; let Predicates = [prd, HasVLX] in { defm Z256 : avx512_store<opc, OpcodeStr, NAME, _.info256, store, - masked_store, Sched.YMM, - EVEX2VEXOvrd#"Y", NoMRPattern>, EVEX_V256; + masked_store, Sched.YMM, NoMRPattern>, EVEX_V256; defm Z128 : avx512_store<opc, OpcodeStr, NAME, _.info128, store, - masked_store, Sched.XMM, EVEX2VEXOvrd, - NoMRPattern>, EVEX_V128; + masked_store, Sched.XMM, NoMRPattern>, EVEX_V128; } } multiclass avx512_alignedstore_vl<bits<8> opc, string OpcodeStr, AVX512VLVectorVTInfo _, Predicate prd, X86SchedWriteMoveLSWidths Sched, - string EVEX2VEXOvrd, bit NoMRPattern = 0> { + bit NoMRPattern = 0> { let Predicates = [prd] in defm Z : avx512_store<opc, OpcodeStr, NAME, _.info512, alignedstore, - masked_store_aligned, Sched.ZMM, "", - NoMRPattern>, EVEX_V512; + masked_store_aligned, Sched.ZMM, NoMRPattern>, EVEX_V512; let Predicates = [prd, HasVLX] in { defm Z256 : avx512_store<opc, OpcodeStr, NAME, _.info256, alignedstore, - masked_store_aligned, Sched.YMM, - EVEX2VEXOvrd#"Y", NoMRPattern>, EVEX_V256; + masked_store_aligned, Sched.YMM, NoMRPattern>, EVEX_V256; defm Z128 : avx512_store<opc, OpcodeStr, NAME, _.info128, alignedstore, - masked_store_aligned, Sched.XMM, EVEX2VEXOvrd, - NoMRPattern>, EVEX_V128; + masked_store_aligned, Sched.XMM, NoMRPattern>, EVEX_V128; } } defm VMOVAPS : avx512_alignedload_vl<0x28, "vmovaps", avx512vl_f32_info, - HasAVX512, SchedWriteFMoveLS, "VMOVAPS">, + HasAVX512, SchedWriteFMoveLS>, avx512_alignedstore_vl<0x29, "vmovaps", avx512vl_f32_info, - HasAVX512, SchedWriteFMoveLS, "VMOVAPS">, + HasAVX512, SchedWriteFMoveLS>, TB, EVEX_CD8<32, CD8VF>; defm VMOVAPD : avx512_alignedload_vl<0x28, "vmovapd", avx512vl_f64_info, - HasAVX512, SchedWriteFMoveLS, "VMOVAPD">, + HasAVX512, SchedWriteFMoveLS>, avx512_alignedstore_vl<0x29, "vmovapd", avx512vl_f64_info, - HasAVX512, SchedWriteFMoveLS, "VMOVAPD">, + HasAVX512, SchedWriteFMoveLS>, TB, PD, REX_W, EVEX_CD8<64, CD8VF>; defm VMOVUPS : avx512_load_vl<0x10, "vmovups", avx512vl_f32_info, HasAVX512, - SchedWriteFMoveLS, "VMOVUPS", 0, null_frag>, + SchedWriteFMoveLS, 0, null_frag>, avx512_store_vl<0x11, "vmovups", avx512vl_f32_info, HasAVX512, - SchedWriteFMoveLS, "VMOVUPS">, + SchedWriteFMoveLS>, TB, EVEX_CD8<32, CD8VF>; defm VMOVUPD : avx512_load_vl<0x10, "vmovupd", avx512vl_f64_info, HasAVX512, - SchedWriteFMoveLS, "VMOVUPD", 0, null_frag>, + SchedWriteFMoveLS, 0, null_frag>, avx512_store_vl<0x11, "vmovupd", avx512vl_f64_info, HasAVX512, - SchedWriteFMoveLS, "VMOVUPD">, + SchedWriteFMoveLS>, TB, PD, REX_W, EVEX_CD8<64, CD8VF>; defm VMOVDQA32 : avx512_alignedload_vl<0x6F, "vmovdqa32", avx512vl_i32_info, - HasAVX512, SchedWriteVecMoveLS, - "VMOVDQA", 1>, + HasAVX512, SchedWriteVecMoveLS, 1>, avx512_alignedstore_vl<0x7F, "vmovdqa32", avx512vl_i32_info, - HasAVX512, SchedWriteVecMoveLS, - "VMOVDQA", 1>, + HasAVX512, SchedWriteVecMoveLS, 1>, TB, PD, EVEX_CD8<32, CD8VF>; defm VMOVDQA64 : avx512_alignedload_vl<0x6F, "vmovdqa64", avx512vl_i64_info, - HasAVX512, SchedWriteVecMoveLS, - "VMOVDQA">, + HasAVX512, SchedWriteVecMoveLS>, avx512_alignedstore_vl<0x7F, "vmovdqa64", avx512vl_i64_info, - HasAVX512, SchedWriteVecMoveLS, - "VMOVDQA">, + HasAVX512, SchedWriteVecMoveLS>, TB, PD, REX_W, EVEX_CD8<64, CD8VF>; defm VMOVDQU8 : avx512_load_vl<0x6F, "vmovdqu8", avx512vl_i8_info, HasBWI, - SchedWriteVecMoveLS, "VMOVDQU", 1>, + SchedWriteVecMoveLS, 1>, avx512_store_vl<0x7F, "vmovdqu8", avx512vl_i8_info, HasBWI, - SchedWriteVecMoveLS, "VMOVDQU", 1>, + SchedWriteVecMoveLS, 1>, TB, XD, EVEX_CD8<8, CD8VF>; defm VMOVDQU16 : avx512_load_vl<0x6F, "vmovdqu16", avx512vl_i16_info, HasBWI, - SchedWriteVecMoveLS, "VMOVDQU", 1>, + SchedWriteVecMoveLS, 1>, avx512_store_vl<0x7F, "vmovdqu16", avx512vl_i16_info, HasBWI, - SchedWriteVecMoveLS, "VMOVDQU", 1>, + SchedWriteVecMoveLS, 1>, TB, XD, REX_W, EVEX_CD8<16, CD8VF>; defm VMOVDQU32 : avx512_load_vl<0x6F, "vmovdqu32", avx512vl_i32_info, HasAVX512, - SchedWriteVecMoveLS, "VMOVDQU", 1, null_frag>, + SchedWriteVecMoveLS, 1, null_frag>, avx512_store_vl<0x7F, "vmovdqu32", avx512vl_i32_info, HasAVX512, - SchedWriteVecMoveLS, "VMOVDQU", 1>, + SchedWriteVecMoveLS, 1>, TB, XS, EVEX_CD8<32, CD8VF>; defm VMOVDQU64 : avx512_load_vl<0x6F, "vmovdqu64", avx512vl_i64_info, HasAVX512, - SchedWriteVecMoveLS, "VMOVDQU", 0, null_frag>, + SchedWriteVecMoveLS, 0, null_frag>, avx512_store_vl<0x7F, "vmovdqu64", avx512vl_i64_info, HasAVX512, - SchedWriteVecMoveLS, "VMOVDQU">, + SchedWriteVecMoveLS>, TB, XS, REX_W, EVEX_CD8<64, CD8VF>; // Special instructions to help with spilling when we don't have VLX. We need @@ -4844,8 +4825,7 @@ defm VPMULLD : avx512_binop_rm_vl_d<0x40, "vpmulld", mul, defm VPMULLW : avx512_binop_rm_vl_w<0xD5, "vpmullw", mul, SchedWriteVecIMul, HasBWI, 1>; defm VPMULLQ : avx512_binop_rm_vl_q<0x40, "vpmullq", mul, - SchedWriteVecIMul, HasDQI, 1>, T8, - NotEVEX2VEXConvertible; + SchedWriteVecIMul, HasDQI, 1>, T8; defm VPMULHW : avx512_binop_rm_vl_w<0xE5, "vpmulhw", mulhs, SchedWriteVecIMul, HasBWI, 1>; defm VPMULHUW : avx512_binop_rm_vl_w<0xE4, "vpmulhuw", mulhu, SchedWriteVecIMul, @@ -4989,8 +4969,7 @@ defm VPMAXSW : avx512_binop_rm_vl_w<0xEE, "vpmaxsw", smax, defm VPMAXSD : avx512_binop_rm_vl_d<0x3D, "vpmaxsd", smax, SchedWriteVecALU, HasAVX512, 1>, T8; defm VPMAXSQ : avx512_binop_rm_vl_q<0x3D, "vpmaxsq", smax, - SchedWriteVecALU, HasAVX512, 1>, T8, - NotEVEX2VEXConvertible; + SchedWriteVecALU, HasAVX512, 1>, T8; defm VPMAXUB : avx512_binop_rm_vl_b<0xDE, "vpmaxub", umax, SchedWriteVecALU, HasBWI, 1>; @@ -4999,8 +4978,7 @@ defm VPMAXUW : avx512_binop_rm_vl_w<0x3E, "vpmaxuw", umax, defm VPMAXUD : avx512_binop_rm_vl_d<0x3F, "vpmaxud", umax, SchedWriteVecALU, HasAVX512, 1>, T8; defm VPMAXUQ : avx512_binop_rm_vl_q<0x3F, "vpmaxuq", umax, - SchedWriteVecALU, HasAVX512, 1>, T8, - NotEVEX2VEXConvertible; + SchedWriteVecALU, HasAVX512, 1>, T8; defm VPMINSB : avx512_binop_rm_vl_b<0x38, "vpminsb", smin, SchedWriteVecALU, HasBWI, 1>, T8; @@ -5009,8 +4987,7 @@ defm VPMINSW : avx512_binop_rm_vl_w<0xEA, "vpminsw", smin, defm VPMINSD : avx512_binop_rm_vl_d<0x39, "vpminsd", smin, SchedWriteVecALU, HasAVX512, 1>, T8; defm VPMINSQ : avx512_binop_rm_vl_q<0x39, "vpminsq", smin, - SchedWriteVecALU, HasAVX512, 1>, T8, - NotEVEX2VEXConvertible; + SchedWriteVecALU, HasAVX512, 1>, T8; defm VPMINUB : avx512_binop_rm_vl_b<0xDA, "vpminub", umin, SchedWriteVecALU, HasBWI, 1>; @@ -5019,8 +4996,7 @@ defm VPMINUW : avx512_binop_rm_vl_w<0x3A, "vpminuw", umin, defm VPMINUD : avx512_binop_rm_vl_d<0x3B, "vpminud", umin, SchedWriteVecALU, HasAVX512, 1>, T8; defm VPMINUQ : avx512_binop_rm_vl_q<0x3B, "vpminuq", umin, - SchedWriteVecALU, HasAVX512, 1>, T8, - NotEVEX2VEXConvertible; + SchedWriteVecALU, HasAVX512, 1>, T8; // PMULLQ: Use 512bit version to implement 128/256 bit in case NoVLX. let Predicates = [HasDQI, NoVLX] in { @@ -5405,8 +5381,7 @@ multiclass avx512_fp_scalar_round<bits<8> opc, string OpcodeStr,X86VectorVTInfo } multiclass avx512_fp_scalar_sae<bits<8> opc, string OpcodeStr,X86VectorVTInfo _, SDNode OpNode, SDNode VecNode, SDNode SaeNode, - X86FoldableSchedWrite sched, bit IsCommutable, - string EVEX2VexOvrd> { + X86FoldableSchedWrite sched, bit IsCommutable> { let ExeDomain = _.ExeDomain in { defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), (ins _.RC:$src1, _.RC:$src2), OpcodeStr, @@ -5427,8 +5402,7 @@ multiclass avx512_fp_scalar_sae<bits<8> opc, string OpcodeStr,X86VectorVTInfo _, (ins _.FRC:$src1, _.FRC:$src2), OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>, - Sched<[sched]>, - EVEX2VEXOverride<EVEX2VexOvrd#"rr"> { + Sched<[sched]> { let isCommutable = IsCommutable; } def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst), @@ -5436,8 +5410,7 @@ multiclass avx512_fp_scalar_sae<bits<8> opc, string OpcodeStr,X86VectorVTInfo _, OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set _.FRC:$dst, (OpNode _.FRC:$src1, (_.ScalarLdFrag addr:$src2)))]>, - Sched<[sched.Folded, sched.ReadAfterFold]>, - EVEX2VEXOverride<EVEX2VexOvrd#"rm">; + Sched<[sched.Folded, sched.ReadAfterFold]>; } let Uses = [MXCSR] in @@ -5474,19 +5447,15 @@ multiclass avx512_binop_s_sae<bits<8> opc, string OpcodeStr, SDNode OpNode, SDNode VecNode, SDNode SaeNode, X86SchedWriteSizes sched, bit IsCommutable> { defm SSZ : avx512_fp_scalar_sae<opc, OpcodeStr#"ss", f32x_info, OpNode, - VecNode, SaeNode, sched.PS.Scl, IsCommutable, - NAME#"SS">, + VecNode, SaeNode, sched.PS.Scl, IsCommutable>, TB, XS, EVEX, VVVV, VEX_LIG, EVEX_CD8<32, CD8VT1>; defm SDZ : avx512_fp_scalar_sae<opc, OpcodeStr#"sd", f64x_info, OpNode, - VecNode, SaeNode, sched.PD.Scl, IsCommutable, - NAME#"SD">, + VecNode, SaeNode, sched.PD.Scl, IsCommutable>, TB, XD, REX_W, EVEX, VVVV, VEX_LIG, EVEX_CD8<64, CD8VT1>; let Predicates = [HasFP16] in { defm SHZ : avx512_fp_scalar_sae<opc, OpcodeStr#"sh", f16x_info, OpNode, - VecNode, SaeNode, sched.PH.Scl, IsCommutable, - NAME#"SH">, - T_MAP5, XS, EVEX, VVVV, VEX_LIG, EVEX_CD8<16, CD8VT1>, - NotEVEX2VEXConvertible; + VecNode, SaeNode, sched.PH.Scl, IsCommutable>, + T_MAP5, XS, EVEX, VVVV, VEX_LIG, EVEX_CD8<16, CD8VT1>; } } defm VADD : avx512_binop_s_round<0x58, "vadd", any_fadd, X86fadds, X86faddRnds, @@ -5506,14 +5475,13 @@ defm VMAX : avx512_binop_s_sae<0x5F, "vmax", X86fmax, X86fmaxs, X86fmaxSAEs, // X86fminc and X86fmaxc instead of X86fmin and X86fmax multiclass avx512_comutable_binop_s<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, SDNode OpNode, - X86FoldableSchedWrite sched, - string EVEX2VEXOvrd> { + X86FoldableSchedWrite sched> { let isCodeGenOnly = 1, Predicates = [HasAVX512], ExeDomain = _.ExeDomain in { def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst), (ins _.FRC:$src1, _.FRC:$src2), OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>, - Sched<[sched]>, EVEX2VEXOverride<EVEX2VEXOvrd#"rr"> { + Sched<[sched]> { let isCommutable = 1; } def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst), @@ -5521,36 +5489,34 @@ multiclass avx512_comutable_binop_s<bits<8> opc, string OpcodeStr, OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set _.FRC:$dst, (OpNode _.FRC:$src1, (_.ScalarLdFrag addr:$src2)))]>, - Sched<[sched.Folded, sched.ReadAfterFold]>, - EVEX2VEXOverride<EVEX2VEXOvrd#"rm">; + Sched<[sched.Folded, sched.ReadAfterFold]>; } } defm VMINCSSZ : avx512_comutable_binop_s<0x5D, "vminss", f32x_info, X86fminc, - SchedWriteFCmp.Scl, "VMINCSS">, TB, XS, + SchedWriteFCmp.Scl>, TB, XS, EVEX, VVVV, VEX_LIG, EVEX_CD8<32, CD8VT1>, SIMD_EXC; defm VMINCSDZ : avx512_comutable_binop_s<0x5D, "vminsd", f64x_info, X86fminc, - SchedWriteFCmp.Scl, "VMINCSD">, TB, XD, + SchedWriteFCmp.Scl>, TB, XD, REX_W, EVEX, VVVV, VEX_LIG, EVEX_CD8<64, CD8VT1>, SIMD_EXC; defm VMAXCSSZ : avx512_comutable_binop_s<0x5F, "vmaxss", f32x_info, X86fmaxc, - SchedWriteFCmp.Scl, "VMAXCSS">, TB, XS, + SchedWriteFCmp.Scl>, TB, XS, EVEX, VVVV, VEX_LIG, EVEX_CD8<32, CD8VT1>, SIMD_EXC; defm VMAXCSDZ : avx512_comutable_binop_s<0x5F, "vmaxsd", f64x_info, X86fmaxc, - SchedWriteFCmp.Scl, "VMAXCSD">, TB, XD, + SchedWriteFCmp.Scl>, TB, XD, REX_W, EVEX, VVVV, VEX_LIG, EVEX_CD8<64, CD8VT1>, SIMD_EXC; defm VMINCSHZ : avx512_comutable_binop_s<0x5D, "vminsh", f16x_info, X86fminc, - SchedWriteFCmp.Scl, "VMINCSH">, T_MAP5, XS, - EVEX, VVVV, VEX_LIG, EVEX_CD8<16, CD8VT1>, SIMD_EXC, - NotEVEX2VEXConvertible; + SchedWriteFCmp.Scl>, T_MAP5, XS, + EVEX, VVVV, VEX_LIG, EVEX_CD8<16, CD8VT1>, SIMD_EXC; + defm VMAXCSHZ : avx512_comutable_binop_s<0x5F, "vmaxsh", f16x_info, X86fmaxc, - SchedWriteFCmp.Scl, "VMAXCSH">, T_MAP5, XS, - EVEX, VVVV, VEX_LIG, EVEX_CD8<16, CD8VT1>, SIMD_EXC, - NotEVEX2VEXConvertible; + SchedWriteFCmp.Scl>, T_MAP5, XS, + EVEX, VVVV, VEX_LIG, EVEX_CD8<16, CD8VT1>, SIMD_EXC; multiclass avx512_fp_packed<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, SDPatternOperator MaskOpNode, @@ -5820,8 +5786,7 @@ multiclass avx512_fp_scalef_all<bits<8> opc, bits<8> opcScaler, string OpcodeStr EVEX_V256, EVEX_CD8<16, CD8VF>, T_MAP6, PD; } } -defm VSCALEF : avx512_fp_scalef_all<0x2C, 0x2D, "vscalef", - SchedWriteFAdd>, NotEVEX2VEXConvertible; +defm VSCALEF : avx512_fp_scalef_all<0x2C, 0x2D, "vscalef", SchedWriteFAdd>; //===----------------------------------------------------------------------===// // AVX-512 VPTESTM instructions @@ -5985,11 +5950,9 @@ multiclass avx512_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode, multiclass avx512_shift_types<bits<8> opcd, bits<8> opcq, bits<8> opcw, string OpcodeStr, SDNode OpNode, - X86SchedWriteWidths sched, - bit NotEVEX2VEXConvertibleQ = 0> { + X86SchedWriteWidths sched> { defm D : avx512_shift_sizes<opcd, OpcodeStr#"d", OpNode, sched, v4i32, avx512vl_i32_info, HasAVX512>; - let notEVEX2VEXConvertible = NotEVEX2VEXConvertibleQ in defm Q : avx512_shift_sizes<opcq, OpcodeStr#"q", OpNode, sched, v2i64, avx512vl_i64_info, HasAVX512>, REX_W; defm W : avx512_shift_sizes<opcw, OpcodeStr#"w", OpNode, sched, v8i16, @@ -6034,11 +5997,9 @@ multiclass avx512_shift_rmi_w<bits<8> opcw, Format ImmFormR, Format ImmFormM, multiclass avx512_shift_rmi_dq<bits<8> opcd, bits<8> opcq, Format ImmFormR, Format ImmFormM, string OpcodeStr, SDNode OpNode, - X86SchedWriteWidths sched, - bit NotEVEX2VEXConvertibleQ = 0> { + X86SchedWriteWidths sched> { defm D: avx512_shift_rmi_sizes<opcd, ImmFormR, ImmFormM, OpcodeStr#"d", OpNode, sched, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; - let notEVEX2VEXConvertible = NotEVEX2VEXConvertibleQ in defm Q: avx512_shift_rmi_sizes<opcq, ImmFormR, ImmFormM, OpcodeStr#"q", OpNode, sched, avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, REX_W; } @@ -6054,7 +6015,7 @@ defm VPSLL : avx512_shift_rmi_dq<0x72, 0x73, MRM6r, MRM6m, "vpsll", X86vshli, SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX, VVVV; defm VPSRA : avx512_shift_rmi_dq<0x72, 0x72, MRM4r, MRM4m, "vpsra", X86vsrai, - SchedWriteVecShiftImm, 1>, + SchedWriteVecShiftImm>, avx512_shift_rmi_w<0x71, MRM4r, MRM4m, "vpsraw", X86vsrai, SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX, VVVV; @@ -6066,7 +6027,7 @@ defm VPROL : avx512_shift_rmi_dq<0x72, 0x72, MRM1r, MRM1m, "vprol", X86vrotli, defm VPSLL : avx512_shift_types<0xF2, 0xF3, 0xF1, "vpsll", X86vshl, SchedWriteVecShift>; defm VPSRA : avx512_shift_types<0xE2, 0xE2, 0xE1, "vpsra", X86vsra, - SchedWriteVecShift, 1>; + SchedWriteVecShift>; defm VPSRL : avx512_shift_types<0xD2, 0xD3, 0xD1, "vpsrl", X86vsrl, SchedWriteVecShift>; @@ -6435,7 +6396,7 @@ defm VPERMILPS : avx512_permil<"vpermilps", 0x04, 0x0C, avx512vl_f32_info, avx512vl_i32_info>; let ExeDomain = SSEPackedDouble in defm VPERMILPD : avx512_permil<"vpermilpd", 0x05, 0x0D, avx512vl_f64_info, - avx512vl_i64_info>, VEX_W1X; + avx512vl_i64_info>, REX_W; //===----------------------------------------------------------------------===// // AVX-512 - VPSHUFD, VPSHUFLW, VPSHUFHW @@ -8443,9 +8404,9 @@ multiclass avx512_cvtqq2pd<bits<8> opc, string OpcodeStr, SDPatternOperator OpNo } let Predicates = [HasDQI, HasVLX] in { defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v2i64x_info, OpNode, - MaskOpNode, sched.XMM>, EVEX_V128, NotEVEX2VEXConvertible; + MaskOpNode, sched.XMM>, EVEX_V128; defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i64x_info, OpNode, - MaskOpNode, sched.YMM>, EVEX_V256, NotEVEX2VEXConvertible; + MaskOpNode, sched.YMM>, EVEX_V256; } } @@ -8524,11 +8485,10 @@ multiclass avx512_cvtqq2ps_dq2ph<bits<8> opc, string OpcodeStr, SDPatternOperato defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, _dst.info128, _src.info128, null_frag, null_frag, sched.XMM, _src.info128.BroadcastStr, "{x}", i128mem, _src.info128.KRCWM>, - EVEX_V128, NotEVEX2VEXConvertible; + EVEX_V128; defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, _dst.info128, _src.info256, OpNode, MaskOpNode, sched.YMM, _src.info256.BroadcastStr, - "{y}">, EVEX_V256, - NotEVEX2VEXConvertible; + "{y}">, EVEX_V256; // Special patterns to allow use of X86VM[SU]intToFP for masking. Instruction // patterns have been disabled with null_frag. @@ -10882,8 +10842,7 @@ defm VGETMANTSH: avx512_common_fp_sae_scalar_imm<"vgetmantsh", f16x_info, multiclass avx512_shuff_packed_128_common<bits<8> opc, string OpcodeStr, X86FoldableSchedWrite sched, X86VectorVTInfo _, - X86VectorVTInfo CastInfo, - string EVEX2VEXOvrd> { + X86VectorVTInfo CastInfo> { let ExeDomain = _.ExeDomain in { defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), (ins _.RC:$src1, _.RC:$src2, u8imm:$src3), @@ -10891,7 +10850,7 @@ multiclass avx512_shuff_packed_128_common<bits<8> opc, string OpcodeStr, (_.VT (bitconvert (CastInfo.VT (X86Shuf128 _.RC:$src1, _.RC:$src2, (i8 timm:$src3)))))>, - Sched<[sched]>, EVEX2VEXOverride<EVEX2VEXOvrd#"rr">; + Sched<[sched]>; defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), (ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3), OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", @@ -10900,8 +10859,7 @@ multiclass avx512_shuff_packed_128_common<bits<8> opc, string OpcodeStr, (CastInfo.VT (X86Shuf128 _.RC:$src1, (CastInfo.LdFrag addr:$src2), (i8 timm:$src3)))))>, - Sched<[sched.Folded, sched.ReadAfterFold]>, - EVEX2VEXOverride<EVEX2VEXOvrd#"rm">; + Sched<[sched.Folded, sched.ReadAfterFold]>; defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3), OpcodeStr, "$src3, ${src2}"#_.BroadcastStr#", $src1", @@ -10918,45 +10876,40 @@ multiclass avx512_shuff_packed_128_common<bits<8> opc, string OpcodeStr, multiclass avx512_shuff_packed_128<string OpcodeStr, X86FoldableSchedWrite sched, AVX512VLVectorVTInfo _, - AVX512VLVectorVTInfo CastInfo, bits<8> opc, - string EVEX2VEXOvrd>{ + AVX512VLVectorVTInfo CastInfo, bits<8> opc>{ let Predicates = [HasAVX512] in defm Z : avx512_shuff_packed_128_common<opc, OpcodeStr, sched, - _.info512, CastInfo.info512, "">, EVEX_V512; + _.info512, CastInfo.info512>, EVEX_V512; let Predicates = [HasAVX512, HasVLX] in defm Z256 : avx512_shuff_packed_128_common<opc, OpcodeStr, sched, - _.info256, CastInfo.info256, - EVEX2VEXOvrd>, EVEX_V256; + _.info256, CastInfo.info256>, EVEX_V256; } defm VSHUFF32X4 : avx512_shuff_packed_128<"vshuff32x4", WriteFShuffle256, - avx512vl_f32_info, avx512vl_f64_info, 0x23, "VPERM2F128">, AVX512AIi8Base, EVEX, VVVV, EVEX_CD8<32, CD8VF>; + avx512vl_f32_info, avx512vl_f64_info, 0x23>, AVX512AIi8Base, EVEX, VVVV, EVEX_CD8<32, CD8VF>; defm VSHUFF64X2 : avx512_shuff_packed_128<"vshuff64x2", WriteFShuffle256, - avx512vl_f64_info, avx512vl_f64_info, 0x23, "VPERM2F128">, AVX512AIi8Base, EVEX, VVVV, EVEX_CD8<64, CD8VF>, REX_W; + avx512vl_f64_info, avx512vl_f64_info, 0x23>, AVX512AIi8Base, EVEX, VVVV, EVEX_CD8<64, CD8VF>, REX_W; defm VSHUFI32X4 : avx512_shuff_packed_128<"vshufi32x4", WriteFShuffle256, - avx512vl_i32_info, avx512vl_i64_info, 0x43, "VPERM2I128">, AVX512AIi8Base, EVEX, VVVV, EVEX_CD8<32, CD8VF>; + avx512vl_i32_info, avx512vl_i64_info, 0x43>, AVX512AIi8Base, EVEX, VVVV, EVEX_CD8<32, CD8VF>; defm VSHUFI64X2 : avx512_shuff_packed_128<"vshufi64x2", WriteFShuffle256, - avx512vl_i64_info, avx512vl_i64_info, 0x43, "VPERM2I128">, AVX512AIi8Base, EVEX, VVVV, EVEX_CD8<64, CD8VF>, REX_W; + avx512vl_i64_info, avx512vl_i64_info, 0x43>, AVX512AIi8Base, EVEX, VVVV, EVEX_CD8<64, CD8VF>, REX_W; multiclass avx512_valign<bits<8> opc, string OpcodeStr, X86FoldableSchedWrite sched, X86VectorVTInfo _>{ - // NOTE: EVEX2VEXOverride changed back to Unset for 256-bit at the - // instantiation of this class. let ExeDomain = _.ExeDomain in { defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), (ins _.RC:$src1, _.RC:$src2, u8imm:$src3), OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", (_.VT (X86VAlign _.RC:$src1, _.RC:$src2, (i8 timm:$src3)))>, - Sched<[sched]>, EVEX2VEXOverride<"VPALIGNRrri">; + Sched<[sched]>; defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), (ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3), OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", (_.VT (X86VAlign _.RC:$src1, (bitconvert (_.LdFrag addr:$src2)), (i8 timm:$src3)))>, - Sched<[sched.Folded, sched.ReadAfterFold]>, - EVEX2VEXOverride<"VPALIGNRrmi">; + Sched<[sched.Folded, sched.ReadAfterFold]>; defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3), @@ -10979,7 +10932,6 @@ multiclass avx512_valign_common<string OpcodeStr, X86SchedWriteWidths sched, defm Z128 : avx512_valign<0x03, OpcodeStr, sched.XMM, _.info128>, AVX512AIi8Base, EVEX, VVVV, EVEX_V128; // We can't really override the 256-bit version so change it back to unset. - let EVEX2VEXOverride = ? in defm Z256 : avx512_valign<0x03, OpcodeStr, sched.YMM, _.info256>, AVX512AIi8Base, EVEX, VVVV, EVEX_V256; } @@ -11111,7 +11063,7 @@ let Predicates = [HasVLX, HasBWI] in { defm VDBPSADBW: avx512_common_3Op_rm_imm8<0x42, X86dbpsadbw, "vdbpsadbw", SchedWritePSADBW, avx512vl_i16_info, avx512vl_i8_info>, - EVEX_CD8<8, CD8VF>, NotEVEX2VEXConvertible; + EVEX_CD8<8, CD8VF>; multiclass avx512_unary_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, X86FoldableSchedWrite sched, X86VectorVTInfo _> { @@ -13088,12 +13040,10 @@ multiclass avx512_cvtqq2ph<bits<8> opc, string OpcodeStr, SDPatternOperator OpNo let Predicates = [HasFP16, HasVLX] in { defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v2i64x_info, null_frag, null_frag, sched.XMM, "{1to2}", "{x}", - i128mem, VK2WM>, - EVEX_V128, NotEVEX2VEXConvertible; + i128mem, VK2WM>, EVEX_V128; defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v4i64x_info, null_frag, null_frag, sched.YMM, "{1to4}", "{y}", - i256mem, VK4WM>, - EVEX_V256, NotEVEX2VEXConvertible; + i256mem, VK4WM>, EVEX_V256; } def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}", diff --git a/llvm/lib/Target/X86/X86InstrArithmetic.td b/llvm/lib/Target/X86/X86InstrArithmetic.td index 6b0c1b8c28c9..5cfa95e085e3 100644 --- a/llvm/lib/Target/X86/X86InstrArithmetic.td +++ b/llvm/lib/Target/X86/X86InstrArithmetic.td @@ -71,24 +71,60 @@ multiclass Mul<bits<8> o, string m, Format RegMRM, Format MemMRM, SDPatternOpera // FIXME: Used for 8-bit mul, ignore result upper 8 bits. // This probably ought to be moved to a def : Pat<> if the // syntax can be accepted. - let Defs = [AL,EFLAGS,AX], Uses = [AL] in - def 8r : MulDivOpR<o, RegMRM, m, Xi8, WriteIMul8, - [(set AL, (node AL, GR8:$src1)), (implicit EFLAGS)]>; - let Defs = [AX,DX,EFLAGS], Uses = [AX] in - def 16r : MulDivOpR<o, RegMRM, m, Xi16, WriteIMul16, []>, OpSize16; - let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in - def 32r : MulDivOpR<o, RegMRM, m, Xi32, WriteIMul32, []>, OpSize32; - let Defs = [RAX,RDX,EFLAGS], Uses = [RAX] in - def 64r : MulDivOpR<o, RegMRM, m, Xi64, WriteIMul64, []>; - let Defs = [AL,EFLAGS,AX], Uses = [AL] in - def 8m : MulDivOpM<o, MemMRM, m, Xi8, WriteIMul8, - [(set AL, (node AL, (loadi8 addr:$src1))), (implicit EFLAGS)]>; - let Defs = [AX,DX,EFLAGS], Uses = [AX] in - def 16m : MulDivOpM<o, MemMRM, m, Xi16, WriteIMul16, []>, OpSize16; - let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in - def 32m : MulDivOpM<o, MemMRM, m, Xi32, WriteIMul32, []>, OpSize32; - let Defs = [RAX,RDX,EFLAGS], Uses = [RAX] in - def 64m : MulDivOpM<o, MemMRM, m, Xi64, WriteIMul64, []>, Requires<[In64BitMode]>; + let Defs = [AL, EFLAGS, AX], Uses = [AL] in + def 8r : MulDivOpR<o, RegMRM, m, Xi8, WriteIMul8, + [(set AL, (node AL, GR8:$src1)), (implicit EFLAGS)]>; + let Defs = [AX, DX, EFLAGS], Uses = [AX] in + def 16r : MulDivOpR<o, RegMRM, m, Xi16, WriteIMul16, []>, OpSize16; + let Defs = [EAX, EDX, EFLAGS], Uses = [EAX] in + def 32r : MulDivOpR<o, RegMRM, m, Xi32, WriteIMul32, []>, OpSize32; + let Defs = [RAX, RDX, EFLAGS], Uses = [RAX] in + def 64r : MulDivOpR<o, RegMRM, m, Xi64, WriteIMul64, []>; + let Defs = [AL, EFLAGS, AX], Uses = [AL] in + def 8m : MulDivOpM<o, MemMRM, m, Xi8, WriteIMul8, + [(set AL, (node AL, (loadi8 addr:$src1))), (implicit EFLAGS)]>; + let Defs = [AX, DX, EFLAGS], Uses = [AX] in + def 16m : MulDivOpM<o, MemMRM, m, Xi16, WriteIMul16, []>, OpSize16; + let Defs = [EAX, EDX, EFLAGS], Uses = [EAX] in + def 32m : MulDivOpM<o, MemMRM, m, Xi32, WriteIMul32, []>, OpSize32; + let Defs = [RAX, RDX, EFLAGS], Uses = [RAX] in + def 64m : MulDivOpM<o, MemMRM, m, Xi64, WriteIMul64, []>, Requires<[In64BitMode]>; + + let Predicates = [In64BitMode] in { + let Defs = [AL, AX], Uses = [AL] in + def 8r_NF : MulDivOpR<o, RegMRM, m, Xi8, WriteIMul8, []>, NF; + let Defs = [AX, DX], Uses = [AX] in + def 16r_NF : MulDivOpR<o, RegMRM, m, Xi16, WriteIMul16, []>, NF, PD; + let Defs = [EAX, EDX], Uses = [EAX] in + def 32r_NF : MulDivOpR<o, RegMRM, m, Xi32, WriteIMul32, []>, NF; + let Defs = [RAX, RDX], Uses = [RAX] in + def 64r_NF : MulDivOpR<o, RegMRM, m, Xi64, WriteIMul64, []>, NF; + let Defs = [AL, AX], Uses = [AL] in + def 8m_NF : MulDivOpM<o, MemMRM, m, Xi8, WriteIMul8, []>, NF; + let Defs = [AX, DX], Uses = [AX] in + def 16m_NF : MulDivOpM<o, MemMRM, m, Xi16, WriteIMul16, []>, NF, PD; + let Defs = [EAX, EDX], Uses = [EAX] in + def 32m_NF : MulDivOpM<o, MemMRM, m, Xi32, WriteIMul32, []>, NF; + let Defs = [RAX, RDX], Uses = [RAX] in + def 64m_NF : MulDivOpM<o, MemMRM, m, Xi64, WriteIMul64, []>, NF; + + let Defs = [AL, EFLAGS, AX], Uses = [AL] in + def 8r_EVEX : MulDivOpR<o, RegMRM, m, Xi8, WriteIMul8, []>, PL; + let Defs = [AX, DX, EFLAGS], Uses = [AX] in + def 16r_EVEX : MulDivOpR<o, RegMRM, m, Xi16, WriteIMul16, []>, PL, PD; + let Defs = [EAX, EDX, EFLAGS], Uses = [EAX] in + def 32r_EVEX : MulDivOpR<o, RegMRM, m, Xi32, WriteIMul32, []>, PL; + let Defs = [RAX, RDX, EFLAGS], Uses = [RAX] in + def 64r_EVEX : MulDivOpR<o, RegMRM, m, Xi64, WriteIMul64, []>, PL; + let Defs = [AL, EFLAGS, AX], Uses = [AL] in + def 8m_EVEX : MulDivOpM<o, MemMRM, m, Xi8, WriteIMul8, []>, PL; + let Defs = [AX, DX, EFLAGS], Uses = [AX] in + def 16m_EVEX : MulDivOpM<o, MemMRM, m, Xi16, WriteIMul16, []>, PL, PD; + let Defs = [EAX, EDX, EFLAGS], Uses = [EAX] in + def 32m_EVEX : MulDivOpM<o, MemMRM, m, Xi32, WriteIMul32, []>, PL; + let Defs = [RAX, RDX, EFLAGS], Uses = [RAX] in + def 64m_EVEX : MulDivOpM<o, MemMRM, m, Xi64, WriteIMul64, []>, PL; + } } defm MUL : Mul<0xF7, "mul", MRM4r, MRM4m, mul>; @@ -99,137 +135,341 @@ multiclass Div<bits<8> o, string m, Format RegMRM, Format MemMRM> { defvar sched16 = !if(!eq(m, "div"), WriteDiv16, WriteIDiv16); defvar sched32 = !if(!eq(m, "div"), WriteDiv32, WriteIDiv32); defvar sched64 = !if(!eq(m, "div"), WriteDiv64, WriteIDiv64); - let Defs = [AL,AH,EFLAGS], Uses = [AX] in - def 8r : MulDivOpR<o, RegMRM, m, Xi8, sched8, []>; - let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in - def 16r : MulDivOpR<o, RegMRM, m, Xi16, sched16, []>, OpSize16; - let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in - def 32r : MulDivOpR<o, RegMRM, m, Xi32, sched32, []>, OpSize32; - let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in - def 64r : MulDivOpR<o, RegMRM, m, Xi64, sched64, []>; - let Defs = [AL,AH,EFLAGS], Uses = [AX] in - def 8m : MulDivOpM<o, MemMRM, m, Xi8, sched8, []>; - let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in - def 16m : MulDivOpM<o, MemMRM, m, Xi16, sched16, []>, OpSize16; - let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in - def 32m : MulDivOpM<o, MemMRM, m, Xi32, sched32, []>, OpSize32; - let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in - def 64m : MulDivOpM<o, MemMRM, m, Xi64, sched64, []>, Requires<[In64BitMode]>; + let Defs = [AL, AH, EFLAGS], Uses = [AX] in + def 8r : MulDivOpR<o, RegMRM, m, Xi8, sched8, []>; + let Defs = [AX, DX, EFLAGS], Uses = [AX, DX] in + def 16r : MulDivOpR<o, RegMRM, m, Xi16, sched16, []>, OpSize16; + let Defs = [EAX, EDX, EFLAGS], Uses = [EAX, EDX] in + def 32r : MulDivOpR<o, RegMRM, m, Xi32, sched32, []>, OpSize32; + let Defs = [RAX, RDX, EFLAGS], Uses = [RAX, RDX] in + def 64r : MulDivOpR<o, RegMRM, m, Xi64, sched64, []>; + let Defs = [AL, AH, EFLAGS], Uses = [AX] in + def 8m : MulDivOpM<o, MemMRM, m, Xi8, sched8, []>; + let Defs = [AX, DX, EFLAGS], Uses = [AX, DX] in + def 16m : MulDivOpM<o, MemMRM, m, Xi16, sched16, []>, OpSize16; + let Defs = [EAX, EDX, EFLAGS], Uses = [EAX, EDX] in + def 32m : MulDivOpM<o, MemMRM, m, Xi32, sched32, []>, OpSize32; + let Defs = [RAX, RDX, EFLAGS], Uses = [RAX, RDX] in + def 64m : MulDivOpM<o, MemMRM, m, Xi64, sched64, []>, Requires<[In64BitMode]>; + + let Predicates = [In64BitMode] in { + let Defs = [AL, AH], Uses = [AX] in + def 8r_NF : MulDivOpR<o, RegMRM, m, Xi8, sched8, []>, NF; + let Defs = [AX, DX], Uses = [AX, DX] in + def 16r_NF : MulDivOpR<o, RegMRM, m, Xi16, sched16, []>, NF, PD; + let Defs = [EAX, EDX], Uses = [EAX, EDX] in + def 32r_NF : MulDivOpR<o, RegMRM, m, Xi32, sched32, []>, NF; + let Defs = [RAX, RDX], Uses = [RAX, RDX] in + def 64r_NF : MulDivOpR<o, RegMRM, m, Xi64, sched64, []>, NF; + let Defs = [AL, AH], Uses = [AX] in + def 8m_NF : MulDivOpM<o, MemMRM, m, Xi8, sched8, []>, NF; + let Defs = [AX, DX], Uses = [AX, DX] in + def 16m_NF : MulDivOpM<o, MemMRM, m, Xi16, sched16, []>, NF, PD; + let Defs = [EAX, EDX], Uses = [EAX, EDX] in + def 32m_NF : MulDivOpM<o, MemMRM, m, Xi32, sched32, []>, NF; + let Defs = [RAX, RDX], Uses = [RAX, RDX] in + def 64m_NF : MulDivOpM<o, MemMRM, m, Xi64, sched64, []>, NF; + + let Defs = [AL, AH, EFLAGS], Uses = [AX] in + def 8r_EVEX : MulDivOpR<o, RegMRM, m, Xi8, sched8, []>, PL; + let Defs = [AX, DX, EFLAGS], Uses = [AX, DX] in + def 16r_EVEX : MulDivOpR<o, RegMRM, m, Xi16, sched16, []>, PL, PD; + let Defs = [EAX, EDX, EFLAGS], Uses = [EAX, EDX] in + def 32r_EVEX : MulDivOpR<o, RegMRM, m, Xi32, sched32, []>, PL; + let Defs = [RAX, RDX, EFLAGS], Uses = [RAX, RDX] in + def 64r_EVEX : MulDivOpR<o, RegMRM, m, Xi64, sched64, []>, PL; + let Defs = [AL, AH, EFLAGS], Uses = [AX] in + def 8m_EVEX : MulDivOpM<o, MemMRM, m, Xi8, sched8, []>, PL; + let Defs = [AX, DX, EFLAGS], Uses = [AX, DX] in + def 16m_EVEX : MulDivOpM<o, MemMRM, m, Xi16, sched16, []>, PL, PD; + let Defs = [EAX, EDX, EFLAGS], Uses = [EAX, EDX] in + def 32m_EVEX : MulDivOpM<o, MemMRM, m, Xi32, sched32, []>, PL; + let Defs = [RAX, RDX, EFLAGS], Uses = [RAX, RDX] in + def 64m_EVEX : MulDivOpM<o, MemMRM, m, Xi64, sched64, []>, PL; + } } + let hasSideEffects = 1 in { // so that we don't speculatively execute -defm DIV: Div<0xF7, "div", MRM6r, MRM6m>; -defm IDIV: Div<0xF7, "idiv", MRM7r, MRM7m>; + defm DIV: Div<0xF7, "div", MRM6r, MRM6m>; + defm IDIV: Div<0xF7, "idiv", MRM7r, MRM7m>; } -class IMulOpRR<X86TypeInfo t, X86FoldableSchedWrite sched> - : BinOpRR_RF<0xAF, "imul", t, X86smul_flag>, TB { +class IMulOpRR_R<X86TypeInfo t, X86FoldableSchedWrite sched, bit ndd = 0> + : BinOpRR_R<0xAF, "imul", t, ndd> { let Form = MRMSrcReg; let SchedRW = [sched]; // X = IMUL Y, Z --> X = IMUL Z, Y let isCommutable = 1; } -class IMulOpRM<X86TypeInfo t, X86FoldableSchedWrite sched> - : BinOpRM_RF<0xAF, "imul", t, X86smul_flag>, TB { -let Form = MRMSrcMem; -let SchedRW = [sched.Folded, sched.ReadAfterFold]; +class IMulOpRR_RF<X86TypeInfo t, X86FoldableSchedWrite sched, bit ndd = 0> + : BinOpRR_RF<0xAF, "imul", t, X86smul_flag, ndd> { + let Form = MRMSrcReg; + let SchedRW = [sched]; + // X = IMUL Y, Z --> X = IMUL Z, Y + let isCommutable = 1; +} +class IMulOpRM_R<X86TypeInfo t, X86FoldableSchedWrite sched, bit ndd = 0> + : BinOpRM_R<0xAF, "imul", t, ndd> { + let Form = MRMSrcMem; + let SchedRW = [sched.Folded, sched.ReadAfterFold]; +} +class IMulOpRM_RF<X86TypeInfo t, X86FoldableSchedWrite sched, bit ndd = 0> + : BinOpRM_RF<0xAF, "imul", t, X86smul_flag, ndd> { + let Form = MRMSrcMem; + let SchedRW = [sched.Folded, sched.ReadAfterFold]; +} + +let Predicates = [NoNDD] in { + def IMUL16rr : IMulOpRR_RF<Xi16, WriteIMul16Reg>, TB, OpSize16; + def IMUL32rr : IMulOpRR_RF<Xi32, WriteIMul32Reg>, TB, OpSize32; + def IMUL64rr : IMulOpRR_RF<Xi64, WriteIMul64Reg>, TB; + def IMUL16rm : IMulOpRM_RF<Xi16, WriteIMul16Reg>, TB, OpSize16; + def IMUL32rm : IMulOpRM_RF<Xi32, WriteIMul32Reg>, TB, OpSize32; + def IMUL64rm : IMulOpRM_RF<Xi64, WriteIMul64Reg>, TB; +} +let Predicates = [HasNDD, In64BitMode] in { + def IMUL16rr_ND : IMulOpRR_RF<Xi16, WriteIMul16Reg, 1>, PD; + def IMUL32rr_ND : IMulOpRR_RF<Xi32, WriteIMul32Reg, 1>; + def IMUL64rr_ND : IMulOpRR_RF<Xi64, WriteIMul64Reg, 1>; + def IMUL16rm_ND : IMulOpRM_RF<Xi16, WriteIMul16Reg, 1>, PD; + def IMUL32rm_ND : IMulOpRM_RF<Xi32, WriteIMul32Reg, 1>; + def IMUL64rm_ND : IMulOpRM_RF<Xi64, WriteIMul64Reg, 1>; } -def IMUL16rr : IMulOpRR<Xi16, WriteIMul16Reg>, OpSize16; -def IMUL32rr : IMulOpRR<Xi32, WriteIMul32Reg>, OpSize32; -def IMUL64rr : IMulOpRR<Xi64, WriteIMul64Reg>; -def IMUL16rm : IMulOpRM<Xi16, WriteIMul16Reg>, OpSize16; -def IMUL32rm : IMulOpRM<Xi32, WriteIMul32Reg>, OpSize32; -def IMUL64rm : IMulOpRM<Xi64, WriteIMul64Reg>; +let Predicates = [In64BitMode], Pattern = [(null_frag)] in { + def IMUL16rr_NF : IMulOpRR_R<Xi16, WriteIMul16Reg>, NF, PD; + def IMUL32rr_NF : IMulOpRR_R<Xi32, WriteIMul32Reg>, NF; + def IMUL64rr_NF : IMulOpRR_R<Xi64, WriteIMul64Reg>, NF; + def IMUL16rm_NF : IMulOpRM_R<Xi16, WriteIMul16Reg>, NF, PD; + def IMUL32rm_NF : IMulOpRM_R<Xi32, WriteIMul32Reg>, NF; + def IMUL64rm_NF : IMulOpRM_R<Xi64, WriteIMul64Reg>, NF; + + def IMUL16rr_NF_ND : IMulOpRR_R<Xi16, WriteIMul16Reg, 1>, EVEX_NF, PD; + def IMUL32rr_NF_ND : IMulOpRR_R<Xi32, WriteIMul32Reg, 1>, EVEX_NF; + def IMUL64rr_NF_ND : IMulOpRR_R<Xi64, WriteIMul64Reg, 1>, EVEX_NF; + def IMUL16rm_NF_ND : IMulOpRM_R<Xi16, WriteIMul16Reg, 1>, EVEX_NF, PD; + def IMUL32rm_NF_ND : IMulOpRM_R<Xi32, WriteIMul32Reg, 1>, EVEX_NF; + def IMUL64rm_NF_ND : IMulOpRM_R<Xi64, WriteIMul64Reg, 1>, EVEX_NF; + + def IMUL16rr_EVEX : IMulOpRR_RF<Xi16, WriteIMul16Reg>, PL, PD; + def IMUL32rr_EVEX : IMulOpRR_RF<Xi32, WriteIMul32Reg>, PL; + def IMUL64rr_EVEX : IMulOpRR_RF<Xi64, WriteIMul64Reg>, PL; + def IMUL16rm_EVEX : IMulOpRM_RF<Xi16, WriteIMul16Reg>, PL, PD; + def IMUL32rm_EVEX : IMulOpRM_RF<Xi32, WriteIMul32Reg>, PL; + def IMUL64rm_EVEX : IMulOpRM_RF<Xi64, WriteIMul64Reg>, PL; +} class IMulOpRI8_R<X86TypeInfo t, X86FoldableSchedWrite sched> : BinOpRI8<0x6B, "imul", binop_ndd_args, t, MRMSrcReg, - (outs t.RegClass:$dst)>, DefEFLAGS { + (outs t.RegClass:$dst)> { let SchedRW = [sched]; } class IMulOpRI_R<X86TypeInfo t, X86FoldableSchedWrite sched> : BinOpRI<0x69, "imul", binop_ndd_args, t, MRMSrcReg, + (outs t.RegClass:$dst), []> { + let SchedRW = [sched]; +} +class IMulOpRI_RF<X86TypeInfo t, X86FoldableSchedWrite sched> + : BinOpRI<0x69, "imul", binop_ndd_args, t, MRMSrcReg, (outs t.RegClass:$dst), [(set t.RegClass:$dst, EFLAGS, (X86smul_flag t.RegClass:$src1, t.ImmNoSuOperator:$src2))]>, DefEFLAGS { let SchedRW = [sched]; } class IMulOpMI8_R<X86TypeInfo t, X86FoldableSchedWrite sched> - : BinOpMI8<"imul", binop_ndd_args, t, MRMSrcMem, (outs t.RegClass:$dst)>, - DefEFLAGS { + : BinOpMI8<"imul", binop_ndd_args, t, MRMSrcMem, (outs t.RegClass:$dst)> { let Opcode = 0x6B; let SchedRW = [sched.Folded]; } class IMulOpMI_R<X86TypeInfo t, X86FoldableSchedWrite sched> : BinOpMI<0x69, "imul", binop_ndd_args, t, MRMSrcMem, + (outs t.RegClass:$dst), []> { + let SchedRW = [sched.Folded]; +} +class IMulOpMI_RF<X86TypeInfo t, X86FoldableSchedWrite sched> + : BinOpMI<0x69, "imul", binop_ndd_args, t, MRMSrcMem, (outs t.RegClass:$dst), [(set t.RegClass:$dst, EFLAGS, (X86smul_flag (t.LoadNode addr:$src1), t.ImmNoSuOperator:$src2))]>, DefEFLAGS { let SchedRW = [sched.Folded]; } -def IMUL16rri8 : IMulOpRI8_R<Xi16, WriteIMul16Imm>, OpSize16; -def IMUL32rri8 : IMulOpRI8_R<Xi32, WriteIMul32Imm>, OpSize32; -def IMUL64rri8 : IMulOpRI8_R<Xi64, WriteIMul64Imm>; -def IMUL16rri : IMulOpRI_R<Xi16, WriteIMul16Imm>, OpSize16; -def IMUL32rri : IMulOpRI_R<Xi32, WriteIMul32Imm>, OpSize32; -def IMUL64rri32 : IMulOpRI_R<Xi64, WriteIMul64Imm>; - -def IMUL16rmi8 : IMulOpMI8_R<Xi16, WriteIMul16Imm>, OpSize16; -def IMUL32rmi8 : IMulOpMI8_R<Xi32, WriteIMul32Imm>, OpSize32; -def IMUL64rmi8 : IMulOpMI8_R<Xi64, WriteIMul64Imm>; -def IMUL16rmi : IMulOpMI_R<Xi16, WriteIMul16Imm>, OpSize16; -def IMUL32rmi : IMulOpMI_R<Xi32, WriteIMul32Imm>, OpSize32; -def IMUL64rmi32 : IMulOpMI_R<Xi64, WriteIMul64Imm>; - +def IMUL16rri8 : IMulOpRI8_R<Xi16, WriteIMul16Imm>, DefEFLAGS, OpSize16; +def IMUL32rri8 : IMulOpRI8_R<Xi32, WriteIMul32Imm>, DefEFLAGS, OpSize32; +def IMUL64rri8 : IMulOpRI8_R<Xi64, WriteIMul64Imm>, DefEFLAGS; +def IMUL16rri : IMulOpRI_RF<Xi16, WriteIMul16Imm>, OpSize16; +def IMUL32rri : IMulOpRI_RF<Xi32, WriteIMul32Imm>, OpSize32; +def IMUL64rri32 : IMulOpRI_RF<Xi64, WriteIMul64Imm>; +def IMUL16rmi8 : IMulOpMI8_R<Xi16, WriteIMul16Imm>, DefEFLAGS, OpSize16; +def IMUL32rmi8 : IMulOpMI8_R<Xi32, WriteIMul32Imm>, DefEFLAGS, OpSize32; +def IMUL64rmi8 : IMulOpMI8_R<Xi64, WriteIMul64Imm>, DefEFLAGS; +def IMUL16rmi : IMulOpMI_RF<Xi16, WriteIMul16Imm>, OpSize16; +def IMUL32rmi : IMulOpMI_RF<Xi32, WriteIMul32Imm>, OpSize32; +def IMUL64rmi32 : IMulOpMI_RF<Xi64, WriteIMul64Imm>; + +let Predicates = [In64BitMode] in { + def IMUL16rri8_NF : IMulOpRI8_R<Xi16, WriteIMul16Imm>, NF, PD; + def IMUL32rri8_NF : IMulOpRI8_R<Xi32, WriteIMul32Imm>, NF; + def IMUL64rri8_NF : IMulOpRI8_R<Xi64, WriteIMul64Imm>, NF; + def IMUL16rri_NF : IMulOpRI_R<Xi16, WriteIMul16Imm>, NF, PD; + def IMUL32rri_NF : IMulOpRI_R<Xi32, WriteIMul32Imm>, NF; + def IMUL64rri32_NF : IMulOpRI_R<Xi64, WriteIMul64Imm>, NF; + def IMUL16rmi8_NF : IMulOpMI8_R<Xi16, WriteIMul16Imm>, NF, PD; + def IMUL32rmi8_NF : IMulOpMI8_R<Xi32, WriteIMul32Imm>, NF; + def IMUL64rmi8_NF : IMulOpMI8_R<Xi64, WriteIMul64Imm>, NF; + def IMUL16rmi_NF : IMulOpMI_R<Xi16, WriteIMul16Imm>, NF, PD; + def IMUL32rmi_NF : IMulOpMI_R<Xi32, WriteIMul32Imm>, NF; + def IMUL64rmi32_NF : IMulOpMI_R<Xi64, WriteIMul64Imm>, NF; + + def IMUL16rri8_EVEX : IMulOpRI8_R<Xi16, WriteIMul16Imm>, DefEFLAGS, PL, PD; + def IMUL32rri8_EVEX : IMulOpRI8_R<Xi32, WriteIMul32Imm>, DefEFLAGS, PL; + def IMUL64rri8_EVEX : IMulOpRI8_R<Xi64, WriteIMul64Imm>, DefEFLAGS, PL; + def IMUL16rri_EVEX : IMulOpRI_RF<Xi16, WriteIMul16Imm>, PL, PD; + def IMUL32rri_EVEX : IMulOpRI_RF<Xi32, WriteIMul32Imm>, PL; + def IMUL64rri32_EVEX : IMulOpRI_RF<Xi64, WriteIMul64Imm>, PL; + def IMUL16rmi8_EVEX : IMulOpMI8_R<Xi16, WriteIMul16Imm>, DefEFLAGS, PL, PD; + def IMUL32rmi8_EVEX : IMulOpMI8_R<Xi32, WriteIMul32Imm>, DefEFLAGS, PL; + def IMUL64rmi8_EVEX : IMulOpMI8_R<Xi64, WriteIMul64Imm>, DefEFLAGS, PL; + def IMUL16rmi_EVEX : IMulOpMI_RF<Xi16, WriteIMul16Imm>, PL, PD; + def IMUL32rmi_EVEX : IMulOpMI_RF<Xi32, WriteIMul32Imm>, PL; + def IMUL64rmi32_EVEX : IMulOpMI_RF<Xi64, WriteIMul64Imm>, PL; +} //===----------------------------------------------------------------------===// // INC and DEC Instructions // -class IncOpR_RF<X86TypeInfo t> : UnaryOpR_RF<0xFF, MRM0r, "inc", t, null_frag> { +class IncOpR_RF<X86TypeInfo t, bit ndd = 0> : UnaryOpR_RF<0xFF, MRM0r, "inc", t, null_frag, ndd> { let Pattern = [(set t.RegClass:$dst, EFLAGS, (X86add_flag_nocf t.RegClass:$src1, 1))]; } -class DecOpR_RF<X86TypeInfo t> : UnaryOpR_RF<0xFF, MRM1r, "dec", t, null_frag> { +class DecOpR_RF<X86TypeInfo t, bit ndd = 0> : UnaryOpR_RF<0xFF, MRM1r, "dec", t, null_frag, ndd> { let Pattern = [(set t.RegClass:$dst, EFLAGS, (X86sub_flag_nocf t.RegClass:$src1, 1))]; } -class IncOpM_M<X86TypeInfo t> : UnaryOpM_MF<0xFF, MRM0m, "inc", t, null_frag> { +class IncOpR_R<X86TypeInfo t, bit ndd = 0> : UnaryOpR_R<0xFF, MRM0r, "inc", t, null_frag, ndd>; +class DecOpR_R<X86TypeInfo t, bit ndd = 0> : UnaryOpR_R<0xFF, MRM1r, "dec", t, null_frag, ndd>; +class IncOpM_MF<X86TypeInfo t> : UnaryOpM_MF<0xFF, MRM0m, "inc", t, null_frag> { let Pattern = [(store (add (t.LoadNode addr:$src1), 1), addr:$src1), (implicit EFLAGS)]; } -class DecOpM_M<X86TypeInfo t> : UnaryOpM_MF<0xFF, MRM1m, "dec", t, null_frag> { +class DecOpM_MF<X86TypeInfo t> : UnaryOpM_MF<0xFF, MRM1m, "dec", t, null_frag> { let Pattern = [(store (add (t.LoadNode addr:$src1), -1), addr:$src1), (implicit EFLAGS)]; } +class IncOpM_RF<X86TypeInfo t> : UnaryOpM_RF<0xFF, MRM0m, "inc", t, null_frag> { + let Pattern = [(set t.RegClass:$dst, EFLAGS, (add (t.LoadNode addr:$src1), 1))]; +} +class DecOpM_RF<X86TypeInfo t> : UnaryOpM_RF<0xFF, MRM1m, "dec", t, null_frag> { + let Pattern = [(set t.RegClass:$dst, EFLAGS, (add (t.LoadNode addr:$src1), -1))]; +} +class IncOpM_M<X86TypeInfo t> : UnaryOpM_M<0xFF, MRM0m, "inc", t, null_frag>; +class DecOpM_M<X86TypeInfo t> : UnaryOpM_M<0xFF, MRM1m, "dec", t, null_frag>; +class IncOpM_R<X86TypeInfo t> : UnaryOpM_R<0xFF, MRM0m, "inc", t, null_frag>; +class DecOpM_R<X86TypeInfo t> : UnaryOpM_R<0xFF, MRM1m, "dec", t, null_frag>; + // IncDec_Alt - Instructions like "inc reg" short forms. // Short forms only valid in 32-bit mode. Selected during MCInst lowering. class IncDec_Alt<bits<8> o, string m, X86TypeInfo t> : UnaryOpR_RF<o, AddRegFrm, m, t, null_frag>, Requires<[Not64BitMode]>; let isConvertibleToThreeAddress = 1 in { -def INC16r_alt : IncDec_Alt<0x40, "inc", Xi16>, OpSize16; -def INC32r_alt : IncDec_Alt<0x40, "inc", Xi32>, OpSize32; -def DEC16r_alt : IncDec_Alt<0x48, "dec", Xi16>, OpSize16; -def DEC32r_alt : IncDec_Alt<0x48, "dec", Xi32>, OpSize32; -def INC8r : IncOpR_RF<Xi8>; -def INC16r : IncOpR_RF<Xi16>, OpSize16; -def INC32r : IncOpR_RF<Xi32>, OpSize32; -def INC64r : IncOpR_RF<Xi64>; -def DEC8r : DecOpR_RF<Xi8>; -def DEC16r : DecOpR_RF<Xi16>, OpSize16; -def DEC32r : DecOpR_RF<Xi32>, OpSize32; -def DEC64r : DecOpR_RF<Xi64>; + def INC16r_alt : IncDec_Alt<0x40, "inc", Xi16>, OpSize16; + def INC32r_alt : IncDec_Alt<0x40, "inc", Xi32>, OpSize32; + def DEC16r_alt : IncDec_Alt<0x48, "dec", Xi16>, OpSize16; + def DEC32r_alt : IncDec_Alt<0x48, "dec", Xi32>, OpSize32; + let Predicates = [NoNDD] in { + def INC8r : IncOpR_RF<Xi8>; + def INC16r : IncOpR_RF<Xi16>, OpSize16; + def INC32r : IncOpR_RF<Xi32>, OpSize32; + def INC64r : IncOpR_RF<Xi64>; + def DEC8r : DecOpR_RF<Xi8>; + def DEC16r : DecOpR_RF<Xi16>, OpSize16; + def DEC32r : DecOpR_RF<Xi32>, OpSize32; + def DEC64r : DecOpR_RF<Xi64>; + } + let Predicates = [HasNDD, In64BitMode] in { + def INC8r_ND : IncOpR_RF<Xi8, 1>; + def INC16r_ND : IncOpR_RF<Xi16, 1>, PD; + def INC32r_ND : IncOpR_RF<Xi32, 1>; + def INC64r_ND : IncOpR_RF<Xi64, 1>; + def DEC8r_ND : DecOpR_RF<Xi8, 1>; + def DEC16r_ND : DecOpR_RF<Xi16, 1>, PD; + def DEC32r_ND : DecOpR_RF<Xi32, 1>; + def DEC64r_ND : DecOpR_RF<Xi64, 1>; + } + let Predicates = [In64BitMode], Pattern = [(null_frag)] in { + def INC8r_NF : IncOpR_R<Xi8>, NF; + def INC16r_NF : IncOpR_R<Xi16>, NF, PD; + def INC32r_NF : IncOpR_R<Xi32>, NF; + def INC64r_NF : IncOpR_R<Xi64>, NF; + def DEC8r_NF : DecOpR_R<Xi8>, NF; + def DEC16r_NF : DecOpR_R<Xi16>, NF, PD; + def DEC32r_NF : DecOpR_R<Xi32>, NF; + def DEC64r_NF : DecOpR_R<Xi64>, NF; + def INC8r_NF_ND : IncOpR_R<Xi8, 1>, NF; + def INC16r_NF_ND : IncOpR_R<Xi16, 1>, NF, PD; + def INC32r_NF_ND : IncOpR_R<Xi32, 1>, NF; + def INC64r_NF_ND : IncOpR_R<Xi64, 1>, NF; + def DEC8r_NF_ND : DecOpR_R<Xi8, 1>, NF; + def DEC16r_NF_ND : DecOpR_R<Xi16, 1>, NF, PD; + def DEC32r_NF_ND : DecOpR_R<Xi32, 1>, NF; + def DEC64r_NF_ND : DecOpR_R<Xi64, 1>, NF; + def INC8r_EVEX : IncOpR_RF<Xi8>, PL; + def INC16r_EVEX : IncOpR_RF<Xi16>, PL, PD; + def INC32r_EVEX : IncOpR_RF<Xi32>, PL; + def INC64r_EVEX : IncOpR_RF<Xi64>, PL; + def DEC8r_EVEX : DecOpR_RF<Xi8>, PL; + def DEC16r_EVEX : DecOpR_RF<Xi16>, PL, PD; + def DEC32r_EVEX : DecOpR_RF<Xi32>, PL; + def DEC64r_EVEX : DecOpR_RF<Xi64>, PL; + } } let Predicates = [UseIncDec] in { -def INC8m : IncOpM_M<Xi8>; -def INC16m : IncOpM_M<Xi16>, OpSize16; -def INC32m : IncOpM_M<Xi32>, OpSize32; -def DEC8m : DecOpM_M<Xi8>; -def DEC16m : DecOpM_M<Xi16>, OpSize16; -def DEC32m : DecOpM_M<Xi32>, OpSize32; + def INC8m : IncOpM_MF<Xi8>; + def INC16m : IncOpM_MF<Xi16>, OpSize16; + def INC32m : IncOpM_MF<Xi32>, OpSize32; + def DEC8m : DecOpM_MF<Xi8>; + def DEC16m : DecOpM_MF<Xi16>, OpSize16; + def DEC32m : DecOpM_MF<Xi32>, OpSize32; } let Predicates = [UseIncDec, In64BitMode] in { -def INC64m : IncOpM_M<Xi64>; -def DEC64m : DecOpM_M<Xi64>; + def INC64m : IncOpM_MF<Xi64>; + def DEC64m : DecOpM_MF<Xi64>; +} +let Predicates = [HasNDD, In64BitMode, UseIncDec] in { + def INC8m_ND : IncOpM_RF<Xi8>; + def INC16m_ND : IncOpM_RF<Xi16>, PD; + def INC32m_ND : IncOpM_RF<Xi32>; + def DEC8m_ND : DecOpM_RF<Xi8>; + def DEC16m_ND : DecOpM_RF<Xi16>, PD; + def DEC32m_ND : DecOpM_RF<Xi32>; + def INC64m_ND : IncOpM_RF<Xi64>; + def DEC64m_ND : DecOpM_RF<Xi64>; +} +let Predicates = [In64BitMode], Pattern = [(null_frag)] in { + def INC8m_NF : IncOpM_M<Xi8>, NF; + def INC16m_NF : IncOpM_M<Xi16>, NF, PD; + def INC32m_NF : IncOpM_M<Xi32>, NF; + def INC64m_NF : IncOpM_M<Xi64>, NF; + def DEC8m_NF : DecOpM_M<Xi8>, NF; + def DEC16m_NF : DecOpM_M<Xi16>, NF, PD; + def DEC32m_NF : DecOpM_M<Xi32>, NF; + def DEC64m_NF : DecOpM_M<Xi64>, NF; + def INC8m_NF_ND : IncOpM_R<Xi8>, NF; + def INC16m_NF_ND : IncOpM_R<Xi16>, NF, PD; + def INC32m_NF_ND : IncOpM_R<Xi32>, NF; + def INC64m_NF_ND : IncOpM_R<Xi64>, NF; + def DEC8m_NF_ND : DecOpM_R<Xi8>, NF; + def DEC16m_NF_ND : DecOpM_R<Xi16>, NF, PD; + def DEC32m_NF_ND : DecOpM_R<Xi32>, NF; + def DEC64m_NF_ND : DecOpM_R<Xi64>, NF; + def INC8m_EVEX : IncOpM_MF<Xi8>, PL; + def INC16m_EVEX : IncOpM_MF<Xi16>, PL, PD; + def INC32m_EVEX : IncOpM_MF<Xi32>, PL; + def INC64m_EVEX : IncOpM_MF<Xi64>, PL; + def DEC8m_EVEX : DecOpM_MF<Xi8>, PL; + def DEC16m_EVEX : DecOpM_MF<Xi16>, PL, PD; + def DEC32m_EVEX : DecOpM_MF<Xi32>, PL; + def DEC64m_EVEX : DecOpM_MF<Xi64>, PL; } //===----------------------------------------------------------------------===// @@ -350,212 +590,212 @@ multiclass ArithBinOp_RF<bits<8> BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4, let isCommutable = CommutableRR, isConvertibleToThreeAddress = ConvertibleToThreeAddressRR in { let Predicates = [NoNDD] in { - def NAME#8rr : BinOpRR_RF<BaseOpc, mnemonic, Xi8 , opnodeflag>; - def NAME#16rr : BinOpRR_RF<BaseOpc, mnemonic, Xi16, opnodeflag>, OpSize16; - def NAME#32rr : BinOpRR_RF<BaseOpc, mnemonic, Xi32, opnodeflag>, OpSize32; - def NAME#64rr : BinOpRR_RF<BaseOpc, mnemonic, Xi64, opnodeflag>; + def 8rr : BinOpRR_RF<BaseOpc, mnemonic, Xi8 , opnodeflag>; + def 16rr : BinOpRR_RF<BaseOpc, mnemonic, Xi16, opnodeflag>, OpSize16; + def 32rr : BinOpRR_RF<BaseOpc, mnemonic, Xi32, opnodeflag>, OpSize32; + def 64rr : BinOpRR_RF<BaseOpc, mnemonic, Xi64, opnodeflag>; } let Predicates = [HasNDD, In64BitMode] in { - def NAME#8rr_ND : BinOpRR_RF<BaseOpc, mnemonic, Xi8 , opnodeflag, 1>; - def NAME#16rr_ND : BinOpRR_RF<BaseOpc, mnemonic, Xi16, opnodeflag, 1>, PD; - def NAME#32rr_ND : BinOpRR_RF<BaseOpc, mnemonic, Xi32, opnodeflag, 1>; - def NAME#64rr_ND : BinOpRR_RF<BaseOpc, mnemonic, Xi64, opnodeflag, 1>; - def NAME#8rr_NF_ND : BinOpRR_R<BaseOpc, mnemonic, Xi8, 1>, EVEX_NF; - def NAME#16rr_NF_ND : BinOpRR_R<BaseOpc, mnemonic, Xi16, 1>, EVEX_NF, PD; - def NAME#32rr_NF_ND : BinOpRR_R<BaseOpc, mnemonic, Xi32, 1>, EVEX_NF; - def NAME#64rr_NF_ND : BinOpRR_R<BaseOpc, mnemonic, Xi64, 1>, EVEX_NF; + def 8rr_ND : BinOpRR_RF<BaseOpc, mnemonic, Xi8 , opnodeflag, 1>; + def 16rr_ND : BinOpRR_RF<BaseOpc, mnemonic, Xi16, opnodeflag, 1>, PD; + def 32rr_ND : BinOpRR_RF<BaseOpc, mnemonic, Xi32, opnodeflag, 1>; + def 64rr_ND : BinOpRR_RF<BaseOpc, mnemonic, Xi64, opnodeflag, 1>; + def 8rr_NF_ND : BinOpRR_R<BaseOpc, mnemonic, Xi8, 1>, EVEX_NF; + def 16rr_NF_ND : BinOpRR_R<BaseOpc, mnemonic, Xi16, 1>, EVEX_NF, PD; + def 32rr_NF_ND : BinOpRR_R<BaseOpc, mnemonic, Xi32, 1>, EVEX_NF; + def 64rr_NF_ND : BinOpRR_R<BaseOpc, mnemonic, Xi64, 1>, EVEX_NF; } let Predicates = [In64BitMode] in { - def NAME#8rr_NF : BinOpRR_R<BaseOpc, mnemonic, Xi8>, NF; - def NAME#16rr_NF : BinOpRR_R<BaseOpc, mnemonic, Xi16>, NF, PD; - def NAME#32rr_NF : BinOpRR_R<BaseOpc, mnemonic, Xi32>, NF; - def NAME#64rr_NF : BinOpRR_R<BaseOpc, mnemonic, Xi64>, NF; - def NAME#8rr_EVEX : BinOpRR_RF<BaseOpc, mnemonic, Xi8 , null_frag>, PL; - def NAME#16rr_EVEX : BinOpRR_RF<BaseOpc, mnemonic, Xi16, null_frag>, PL, PD; - def NAME#32rr_EVEX : BinOpRR_RF<BaseOpc, mnemonic, Xi32, null_frag>, PL; - def NAME#64rr_EVEX : BinOpRR_RF<BaseOpc, mnemonic, Xi64, null_frag>, PL; + def 8rr_NF : BinOpRR_R<BaseOpc, mnemonic, Xi8>, NF; + def 16rr_NF : BinOpRR_R<BaseOpc, mnemonic, Xi16>, NF, PD; + def 32rr_NF : BinOpRR_R<BaseOpc, mnemonic, Xi32>, NF; + def 64rr_NF : BinOpRR_R<BaseOpc, mnemonic, Xi64>, NF; + def 8rr_EVEX : BinOpRR_RF<BaseOpc, mnemonic, Xi8 , null_frag>, PL; + def 16rr_EVEX : BinOpRR_RF<BaseOpc, mnemonic, Xi16, null_frag>, PL, PD; + def 32rr_EVEX : BinOpRR_RF<BaseOpc, mnemonic, Xi32, null_frag>, PL; + def 64rr_EVEX : BinOpRR_RF<BaseOpc, mnemonic, Xi64, null_frag>, PL; } } - def NAME#8rr_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi8>; - def NAME#16rr_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi16>, OpSize16; - def NAME#32rr_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi32>, OpSize32; - def NAME#64rr_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi64>; + def 8rr_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi8>; + def 16rr_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi16>, OpSize16; + def 32rr_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi32>, OpSize32; + def 64rr_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi64>; let Predicates = [In64BitMode] in { - def NAME#8rr_EVEX_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi8>, PL; - def NAME#16rr_EVEX_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi16>, PL, PD; - def NAME#32rr_EVEX_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi32>, PL; - def NAME#64rr_EVEX_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi64>, PL; - def NAME#8rr_ND_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi8, 1>; - def NAME#16rr_ND_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi16, 1>, PD; - def NAME#32rr_ND_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi32, 1>; - def NAME#64rr_ND_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi64, 1>; - def NAME#8rr_NF_REV : BinOpRR_R_Rev<BaseOpc2, mnemonic, Xi8>, NF; - def NAME#16rr_NF_REV : BinOpRR_R_Rev<BaseOpc2, mnemonic, Xi16>, NF, PD; - def NAME#32rr_NF_REV : BinOpRR_R_Rev<BaseOpc2, mnemonic, Xi32>, NF; - def NAME#64rr_NF_REV : BinOpRR_R_Rev<BaseOpc2, mnemonic, Xi64>, NF; - def NAME#8rr_NF_ND_REV : BinOpRR_R_Rev<BaseOpc2, mnemonic, Xi8, 1>, EVEX_NF; - def NAME#16rr_NF_ND_REV : BinOpRR_R_Rev<BaseOpc2, mnemonic, Xi16, 1>, EVEX_NF, PD; - def NAME#32rr_NF_ND_REV : BinOpRR_R_Rev<BaseOpc2, mnemonic, Xi32, 1>, EVEX_NF; - def NAME#64rr_NF_ND_REV : BinOpRR_R_Rev<BaseOpc2, mnemonic, Xi64, 1>, EVEX_NF; + def 8rr_EVEX_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi8>, PL; + def 16rr_EVEX_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi16>, PL, PD; + def 32rr_EVEX_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi32>, PL; + def 64rr_EVEX_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi64>, PL; + def 8rr_ND_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi8, 1>; + def 16rr_ND_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi16, 1>, PD; + def 32rr_ND_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi32, 1>; + def 64rr_ND_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi64, 1>; + def 8rr_NF_REV : BinOpRR_R_Rev<BaseOpc2, mnemonic, Xi8>, NF; + def 16rr_NF_REV : BinOpRR_R_Rev<BaseOpc2, mnemonic, Xi16>, NF, PD; + def 32rr_NF_REV : BinOpRR_R_Rev<BaseOpc2, mnemonic, Xi32>, NF; + def 64rr_NF_REV : BinOpRR_R_Rev<BaseOpc2, mnemonic, Xi64>, NF; + def 8rr_NF_ND_REV : BinOpRR_R_Rev<BaseOpc2, mnemonic, Xi8, 1>, EVEX_NF; + def 16rr_NF_ND_REV : BinOpRR_R_Rev<BaseOpc2, mnemonic, Xi16, 1>, EVEX_NF, PD; + def 32rr_NF_ND_REV : BinOpRR_R_Rev<BaseOpc2, mnemonic, Xi32, 1>, EVEX_NF; + def 64rr_NF_ND_REV : BinOpRR_R_Rev<BaseOpc2, mnemonic, Xi64, 1>, EVEX_NF; } let Predicates = [NoNDD] in { - def NAME#8rm : BinOpRM_RF<BaseOpc2, mnemonic, Xi8 , opnodeflag>; - def NAME#16rm : BinOpRM_RF<BaseOpc2, mnemonic, Xi16, opnodeflag>, OpSize16; - def NAME#32rm : BinOpRM_RF<BaseOpc2, mnemonic, Xi32, opnodeflag>, OpSize32; - def NAME#64rm : BinOpRM_RF<BaseOpc2, mnemonic, Xi64, opnodeflag>; + def 8rm : BinOpRM_RF<BaseOpc2, mnemonic, Xi8 , opnodeflag>; + def 16rm : BinOpRM_RF<BaseOpc2, mnemonic, Xi16, opnodeflag>, OpSize16; + def 32rm : BinOpRM_RF<BaseOpc2, mnemonic, Xi32, opnodeflag>, OpSize32; + def 64rm : BinOpRM_RF<BaseOpc2, mnemonic, Xi64, opnodeflag>; } let Predicates = [HasNDD, In64BitMode] in { - def NAME#8rm_ND : BinOpRM_RF<BaseOpc2, mnemonic, Xi8 , opnodeflag, 1>; - def NAME#16rm_ND : BinOpRM_RF<BaseOpc2, mnemonic, Xi16, opnodeflag, 1>, PD; - def NAME#32rm_ND : BinOpRM_RF<BaseOpc2, mnemonic, Xi32, opnodeflag, 1>; - def NAME#64rm_ND : BinOpRM_RF<BaseOpc2, mnemonic, Xi64, opnodeflag, 1>; - def NAME#8rm_NF_ND : BinOpRM_R<BaseOpc2, mnemonic, Xi8, 1>, EVEX_NF; - def NAME#16rm_NF_ND : BinOpRM_R<BaseOpc2, mnemonic, Xi16, 1>, EVEX_NF, PD; - def NAME#32rm_NF_ND : BinOpRM_R<BaseOpc2, mnemonic, Xi32, 1>, EVEX_NF; - def NAME#64rm_NF_ND : BinOpRM_R<BaseOpc2, mnemonic, Xi64, 1>, EVEX_NF; + def 8rm_ND : BinOpRM_RF<BaseOpc2, mnemonic, Xi8 , opnodeflag, 1>; + def 16rm_ND : BinOpRM_RF<BaseOpc2, mnemonic, Xi16, opnodeflag, 1>, PD; + def 32rm_ND : BinOpRM_RF<BaseOpc2, mnemonic, Xi32, opnodeflag, 1>; + def 64rm_ND : BinOpRM_RF<BaseOpc2, mnemonic, Xi64, opnodeflag, 1>; + def 8rm_NF_ND : BinOpRM_R<BaseOpc2, mnemonic, Xi8, 1>, EVEX_NF; + def 16rm_NF_ND : BinOpRM_R<BaseOpc2, mnemonic, Xi16, 1>, EVEX_NF, PD; + def 32rm_NF_ND : BinOpRM_R<BaseOpc2, mnemonic, Xi32, 1>, EVEX_NF; + def 64rm_NF_ND : BinOpRM_R<BaseOpc2, mnemonic, Xi64, 1>, EVEX_NF; } let Predicates = [In64BitMode] in { - def NAME#8rm_NF : BinOpRM_R<BaseOpc2, mnemonic, Xi8>, NF; - def NAME#16rm_NF : BinOpRM_R<BaseOpc2, mnemonic, Xi16>, NF, PD; - def NAME#32rm_NF : BinOpRM_R<BaseOpc2, mnemonic, Xi32>, NF; - def NAME#64rm_NF : BinOpRM_R<BaseOpc2, mnemonic, Xi64>, NF; - def NAME#8rm_EVEX : BinOpRM_RF<BaseOpc2, mnemonic, Xi8 , null_frag>, PL; - def NAME#16rm_EVEX : BinOpRM_RF<BaseOpc2, mnemonic, Xi16, null_frag>, PL, PD; - def NAME#32rm_EVEX : BinOpRM_RF<BaseOpc2, mnemonic, Xi32, null_frag>, PL; - def NAME#64rm_EVEX : BinOpRM_RF<BaseOpc2, mnemonic, Xi64, null_frag>, PL; + def 8rm_NF : BinOpRM_R<BaseOpc2, mnemonic, Xi8>, NF; + def 16rm_NF : BinOpRM_R<BaseOpc2, mnemonic, Xi16>, NF, PD; + def 32rm_NF : BinOpRM_R<BaseOpc2, mnemonic, Xi32>, NF; + def 64rm_NF : BinOpRM_R<BaseOpc2, mnemonic, Xi64>, NF; + def 8rm_EVEX : BinOpRM_RF<BaseOpc2, mnemonic, Xi8 , null_frag>, PL; + def 16rm_EVEX : BinOpRM_RF<BaseOpc2, mnemonic, Xi16, null_frag>, PL, PD; + def 32rm_EVEX : BinOpRM_RF<BaseOpc2, mnemonic, Xi32, null_frag>, PL; + def 64rm_EVEX : BinOpRM_RF<BaseOpc2, mnemonic, Xi64, null_frag>, PL; } let isConvertibleToThreeAddress = ConvertibleToThreeAddress in { let Predicates = [NoNDD] in { // NOTE: These are order specific, we want the ri8 forms to be listed // first so that they are slightly preferred to the ri forms. - def NAME#16ri8 : BinOpRI8_RF<0x83, mnemonic, Xi16, RegMRM>, OpSize16; - def NAME#32ri8 : BinOpRI8_RF<0x83, mnemonic, Xi32, RegMRM>, OpSize32; - def NAME#64ri8 : BinOpRI8_RF<0x83, mnemonic, Xi64, RegMRM>; - def NAME#8ri : BinOpRI_RF<0x80, mnemonic, Xi8 , opnodeflag, RegMRM>; - def NAME#16ri : BinOpRI_RF<0x81, mnemonic, Xi16, opnodeflag, RegMRM>, OpSize16; - def NAME#32ri : BinOpRI_RF<0x81, mnemonic, Xi32, opnodeflag, RegMRM>, OpSize32; - def NAME#64ri32: BinOpRI_RF<0x81, mnemonic, Xi64, opnodeflag, RegMRM>; + def 16ri8 : BinOpRI8_RF<0x83, mnemonic, Xi16, RegMRM>, OpSize16; + def 32ri8 : BinOpRI8_RF<0x83, mnemonic, Xi32, RegMRM>, OpSize32; + def 64ri8 : BinOpRI8_RF<0x83, mnemonic, Xi64, RegMRM>; + def 8ri : BinOpRI_RF<0x80, mnemonic, Xi8 , opnodeflag, RegMRM>; + def 16ri : BinOpRI_RF<0x81, mnemonic, Xi16, opnodeflag, RegMRM>, OpSize16; + def 32ri : BinOpRI_RF<0x81, mnemonic, Xi32, opnodeflag, RegMRM>, OpSize32; + def 64ri32: BinOpRI_RF<0x81, mnemonic, Xi64, opnodeflag, RegMRM>; } let Predicates = [HasNDD, In64BitMode] in { - def NAME#16ri8_ND : BinOpRI8_RF<0x83, mnemonic, Xi16, RegMRM, 1>, PD; - def NAME#32ri8_ND : BinOpRI8_RF<0x83, mnemonic, Xi32, RegMRM, 1>; - def NAME#64ri8_ND : BinOpRI8_RF<0x83, mnemonic, Xi64, RegMRM, 1>; - def NAME#8ri_ND : BinOpRI_RF<0x80, mnemonic, Xi8 , opnodeflag, RegMRM, 1>; - def NAME#16ri_ND : BinOpRI_RF<0x81, mnemonic, Xi16, opnodeflag, RegMRM, 1>, PD; - def NAME#32ri_ND : BinOpRI_RF<0x81, mnemonic, Xi32, opnodeflag, RegMRM, 1>; - def NAME#64ri32_ND: BinOpRI_RF<0x81, mnemonic, Xi64, opnodeflag, RegMRM, 1>; - def NAME#16ri8_NF_ND : BinOpRI8_R<0x83, mnemonic, Xi16, RegMRM, 1>, EVEX_NF, PD; - def NAME#32ri8_NF_ND : BinOpRI8_R<0x83, mnemonic, Xi32, RegMRM, 1>, EVEX_NF; - def NAME#64ri8_NF_ND : BinOpRI8_R<0x83, mnemonic, Xi64, RegMRM, 1>, EVEX_NF; - def NAME#8ri_NF_ND : BinOpRI_R<0x80, mnemonic, Xi8, RegMRM, 1>, EVEX_NF; - def NAME#16ri_NF_ND : BinOpRI_R<0x81, mnemonic, Xi16, RegMRM, 1>, EVEX_NF, PD; - def NAME#32ri_NF_ND : BinOpRI_R<0x81, mnemonic, Xi32, RegMRM, 1>, EVEX_NF; - def NAME#64ri32_NF_ND : BinOpRI_R<0x81, mnemonic, Xi64, RegMRM, 1>, EVEX_NF; + def 16ri8_ND : BinOpRI8_RF<0x83, mnemonic, Xi16, RegMRM, 1>, PD; + def 32ri8_ND : BinOpRI8_RF<0x83, mnemonic, Xi32, RegMRM, 1>; + def 64ri8_ND : BinOpRI8_RF<0x83, mnemonic, Xi64, RegMRM, 1>; + def 8ri_ND : BinOpRI_RF<0x80, mnemonic, Xi8 , opnodeflag, RegMRM, 1>; + def 16ri_ND : BinOpRI_RF<0x81, mnemonic, Xi16, opnodeflag, RegMRM, 1>, PD; + def 32ri_ND : BinOpRI_RF<0x81, mnemonic, Xi32, opnodeflag, RegMRM, 1>; + def 64ri32_ND: BinOpRI_RF<0x81, mnemonic, Xi64, opnodeflag, RegMRM, 1>; + def 16ri8_NF_ND : BinOpRI8_R<0x83, mnemonic, Xi16, RegMRM, 1>, EVEX_NF, PD; + def 32ri8_NF_ND : BinOpRI8_R<0x83, mnemonic, Xi32, RegMRM, 1>, EVEX_NF; + def 64ri8_NF_ND : BinOpRI8_R<0x83, mnemonic, Xi64, RegMRM, 1>, EVEX_NF; + def 8ri_NF_ND : BinOpRI_R<0x80, mnemonic, Xi8, RegMRM, 1>, EVEX_NF; + def 16ri_NF_ND : BinOpRI_R<0x81, mnemonic, Xi16, RegMRM, 1>, EVEX_NF, PD; + def 32ri_NF_ND : BinOpRI_R<0x81, mnemonic, Xi32, RegMRM, 1>, EVEX_NF; + def 64ri32_NF_ND : BinOpRI_R<0x81, mnemonic, Xi64, RegMRM, 1>, EVEX_NF; } let Predicates = [In64BitMode] in { - def NAME#16ri8_NF : BinOpRI8_R<0x83, mnemonic, Xi16, RegMRM>, NF, PD; - def NAME#32ri8_NF : BinOpRI8_R<0x83, mnemonic, Xi32, RegMRM>, NF; - def NAME#64ri8_NF : BinOpRI8_R<0x83, mnemonic, Xi64, RegMRM>, NF; - def NAME#8ri_NF : BinOpRI_R<0x80, mnemonic, Xi8, RegMRM>, NF; - def NAME#16ri_NF : BinOpRI_R<0x81, mnemonic, Xi16, RegMRM>, NF, PD; - def NAME#32ri_NF : BinOpRI_R<0x81, mnemonic, Xi32, RegMRM>, NF; - def NAME#64ri32_NF : BinOpRI_R<0x81, mnemonic, Xi64, RegMRM>, NF; - def NAME#16ri8_EVEX : BinOpRI8_RF<0x83, mnemonic, Xi16, RegMRM>, PL, PD; - def NAME#32ri8_EVEX : BinOpRI8_RF<0x83, mnemonic, Xi32, RegMRM>, PL; - def NAME#64ri8_EVEX : BinOpRI8_RF<0x83, mnemonic, Xi64, RegMRM>, PL; - def NAME#8ri_EVEX : BinOpRI_RF<0x80, mnemonic, Xi8 , null_frag, RegMRM>, PL; - def NAME#16ri_EVEX : BinOpRI_RF<0x81, mnemonic, Xi16, null_frag, RegMRM>, PL, PD; - def NAME#32ri_EVEX : BinOpRI_RF<0x81, mnemonic, Xi32, null_frag, RegMRM>, PL; - def NAME#64ri32_EVEX: BinOpRI_RF<0x81, mnemonic, Xi64, null_frag, RegMRM>, PL; + def 16ri8_NF : BinOpRI8_R<0x83, mnemonic, Xi16, RegMRM>, NF, PD; + def 32ri8_NF : BinOpRI8_R<0x83, mnemonic, Xi32, RegMRM>, NF; + def 64ri8_NF : BinOpRI8_R<0x83, mnemonic, Xi64, RegMRM>, NF; + def 8ri_NF : BinOpRI_R<0x80, mnemonic, Xi8, RegMRM>, NF; + def 16ri_NF : BinOpRI_R<0x81, mnemonic, Xi16, RegMRM>, NF, PD; + def 32ri_NF : BinOpRI_R<0x81, mnemonic, Xi32, RegMRM>, NF; + def 64ri32_NF : BinOpRI_R<0x81, mnemonic, Xi64, RegMRM>, NF; + def 16ri8_EVEX : BinOpRI8_RF<0x83, mnemonic, Xi16, RegMRM>, PL, PD; + def 32ri8_EVEX : BinOpRI8_RF<0x83, mnemonic, Xi32, RegMRM>, PL; + def 64ri8_EVEX : BinOpRI8_RF<0x83, mnemonic, Xi64, RegMRM>, PL; + def 8ri_EVEX : BinOpRI_RF<0x80, mnemonic, Xi8 , null_frag, RegMRM>, PL; + def 16ri_EVEX : BinOpRI_RF<0x81, mnemonic, Xi16, null_frag, RegMRM>, PL, PD; + def 32ri_EVEX : BinOpRI_RF<0x81, mnemonic, Xi32, null_frag, RegMRM>, PL; + def 64ri32_EVEX: BinOpRI_RF<0x81, mnemonic, Xi64, null_frag, RegMRM>, PL; } } - def NAME#8mr : BinOpMR_MF<BaseOpc, mnemonic, Xi8 , opnode>; - def NAME#16mr : BinOpMR_MF<BaseOpc, mnemonic, Xi16, opnode>, OpSize16; - def NAME#32mr : BinOpMR_MF<BaseOpc, mnemonic, Xi32, opnode>, OpSize32; - def NAME#64mr : BinOpMR_MF<BaseOpc, mnemonic, Xi64, opnode>; + def 8mr : BinOpMR_MF<BaseOpc, mnemonic, Xi8 , opnode>; + def 16mr : BinOpMR_MF<BaseOpc, mnemonic, Xi16, opnode>, OpSize16; + def 32mr : BinOpMR_MF<BaseOpc, mnemonic, Xi32, opnode>, OpSize32; + def 64mr : BinOpMR_MF<BaseOpc, mnemonic, Xi64, opnode>; let Predicates = [HasNDD, In64BitMode] in { - def NAME#8mr_ND : BinOpMR_RF<BaseOpc, mnemonic, Xi8 , opnode>; - def NAME#16mr_ND : BinOpMR_RF<BaseOpc, mnemonic, Xi16, opnode>, PD; - def NAME#32mr_ND : BinOpMR_RF<BaseOpc, mnemonic, Xi32, opnode>; - def NAME#64mr_ND : BinOpMR_RF<BaseOpc, mnemonic, Xi64, opnode>; - def NAME#8mr_NF_ND : BinOpMR_R<BaseOpc, mnemonic, Xi8>, EVEX_NF; - def NAME#16mr_NF_ND : BinOpMR_R<BaseOpc, mnemonic, Xi16>, EVEX_NF, PD; - def NAME#32mr_NF_ND : BinOpMR_R<BaseOpc, mnemonic, Xi32>, EVEX_NF; - def NAME#64mr_NF_ND : BinOpMR_R<BaseOpc, mnemonic, Xi64>, EVEX_NF; + def 8mr_ND : BinOpMR_RF<BaseOpc, mnemonic, Xi8 , opnode>; + def 16mr_ND : BinOpMR_RF<BaseOpc, mnemonic, Xi16, opnode>, PD; + def 32mr_ND : BinOpMR_RF<BaseOpc, mnemonic, Xi32, opnode>; + def 64mr_ND : BinOpMR_RF<BaseOpc, mnemonic, Xi64, opnode>; + def 8mr_NF_ND : BinOpMR_R<BaseOpc, mnemonic, Xi8>, EVEX_NF; + def 16mr_NF_ND : BinOpMR_R<BaseOpc, mnemonic, Xi16>, EVEX_NF, PD; + def 32mr_NF_ND : BinOpMR_R<BaseOpc, mnemonic, Xi32>, EVEX_NF; + def 64mr_NF_ND : BinOpMR_R<BaseOpc, mnemonic, Xi64>, EVEX_NF; } let Predicates = [In64BitMode] in { - def NAME#8mr_NF : BinOpMR_M<BaseOpc, mnemonic, Xi8>, NF; - def NAME#16mr_NF : BinOpMR_M<BaseOpc, mnemonic, Xi16>, NF, PD; - def NAME#32mr_NF : BinOpMR_M<BaseOpc, mnemonic, Xi32>, NF; - def NAME#64mr_NF : BinOpMR_M<BaseOpc, mnemonic, Xi64>, NF; - def NAME#8mr_EVEX : BinOpMR_MF<BaseOpc, mnemonic, Xi8 , null_frag>, PL; - def NAME#16mr_EVEX : BinOpMR_MF<BaseOpc, mnemonic, Xi16, null_frag>, PL, PD; - def NAME#32mr_EVEX : BinOpMR_MF<BaseOpc, mnemonic, Xi32, null_frag>, PL; - def NAME#64mr_EVEX : BinOpMR_MF<BaseOpc, mnemonic, Xi64, null_frag>, PL; + def 8mr_NF : BinOpMR_M<BaseOpc, mnemonic, Xi8>, NF; + def 16mr_NF : BinOpMR_M<BaseOpc, mnemonic, Xi16>, NF, PD; + def 32mr_NF : BinOpMR_M<BaseOpc, mnemonic, Xi32>, NF; + def 64mr_NF : BinOpMR_M<BaseOpc, mnemonic, Xi64>, NF; + def 8mr_EVEX : BinOpMR_MF<BaseOpc, mnemonic, Xi8 , null_frag>, PL; + def 16mr_EVEX : BinOpMR_MF<BaseOpc, mnemonic, Xi16, null_frag>, PL, PD; + def 32mr_EVEX : BinOpMR_MF<BaseOpc, mnemonic, Xi32, null_frag>, PL; + def 64mr_EVEX : BinOpMR_MF<BaseOpc, mnemonic, Xi64, null_frag>, PL; } // NOTE: These are order specific, we want the mi8 forms to be listed // first so that they are slightly preferred to the mi forms. - def NAME#16mi8 : BinOpMI8_MF<mnemonic, Xi16, MemMRM>, OpSize16; - def NAME#32mi8 : BinOpMI8_MF<mnemonic, Xi32, MemMRM>, OpSize32; + def 16mi8 : BinOpMI8_MF<mnemonic, Xi16, MemMRM>, OpSize16; + def 32mi8 : BinOpMI8_MF<mnemonic, Xi32, MemMRM>, OpSize32; let Predicates = [In64BitMode] in - def NAME#64mi8 : BinOpMI8_MF<mnemonic, Xi64, MemMRM>; - def NAME#8mi : BinOpMI_MF<0x80, mnemonic, Xi8 , opnode, MemMRM>; - def NAME#16mi : BinOpMI_MF<0x81, mnemonic, Xi16, opnode, MemMRM>, OpSize16; - def NAME#32mi : BinOpMI_MF<0x81, mnemonic, Xi32, opnode, MemMRM>, OpSize32; + def 64mi8 : BinOpMI8_MF<mnemonic, Xi64, MemMRM>; + def 8mi : BinOpMI_MF<0x80, mnemonic, Xi8 , opnode, MemMRM>; + def 16mi : BinOpMI_MF<0x81, mnemonic, Xi16, opnode, MemMRM>, OpSize16; + def 32mi : BinOpMI_MF<0x81, mnemonic, Xi32, opnode, MemMRM>, OpSize32; let Predicates = [In64BitMode] in - def NAME#64mi32 : BinOpMI_MF<0x81, mnemonic, Xi64, opnode, MemMRM>; + def 64mi32 : BinOpMI_MF<0x81, mnemonic, Xi64, opnode, MemMRM>; let Predicates = [HasNDD, In64BitMode] in { - def NAME#16mi8_ND : BinOpMI8_RF<mnemonic, Xi16, MemMRM>, PD; - def NAME#32mi8_ND : BinOpMI8_RF<mnemonic, Xi32, MemMRM>; - def NAME#64mi8_ND : BinOpMI8_RF<mnemonic, Xi64, MemMRM>; - def NAME#8mi_ND : BinOpMI_RF<0x80, mnemonic, Xi8 , opnode, MemMRM>; - def NAME#16mi_ND : BinOpMI_RF<0x81, mnemonic, Xi16, opnode, MemMRM>, PD; - def NAME#32mi_ND : BinOpMI_RF<0x81, mnemonic, Xi32, opnode, MemMRM>; - def NAME#64mi32_ND : BinOpMI_RF<0x81, mnemonic, Xi64, opnode, MemMRM>; - def NAME#16mi8_NF_ND : BinOpMI8_R<mnemonic, Xi16, MemMRM>, NF, PD; - def NAME#32mi8_NF_ND : BinOpMI8_R<mnemonic, Xi32, MemMRM>, NF; - def NAME#64mi8_NF_ND : BinOpMI8_R<mnemonic, Xi64, MemMRM>, NF; - def NAME#8mi_NF_ND : BinOpMI_R<0x80, mnemonic, Xi8, MemMRM>, NF; - def NAME#16mi_NF_ND : BinOpMI_R<0x81, mnemonic, Xi16, MemMRM>, NF, PD; - def NAME#32mi_NF_ND : BinOpMI_R<0x81, mnemonic, Xi32, MemMRM>, NF; - def NAME#64mi32_NF_ND : BinOpMI_R<0x81, mnemonic, Xi64, MemMRM>, NF; + def 16mi8_ND : BinOpMI8_RF<mnemonic, Xi16, MemMRM>, PD; + def 32mi8_ND : BinOpMI8_RF<mnemonic, Xi32, MemMRM>; + def 64mi8_ND : BinOpMI8_RF<mnemonic, Xi64, MemMRM>; + def 8mi_ND : BinOpMI_RF<0x80, mnemonic, Xi8 , opnode, MemMRM>; + def 16mi_ND : BinOpMI_RF<0x81, mnemonic, Xi16, opnode, MemMRM>, PD; + def 32mi_ND : BinOpMI_RF<0x81, mnemonic, Xi32, opnode, MemMRM>; + def 64mi32_ND : BinOpMI_RF<0x81, mnemonic, Xi64, opnode, MemMRM>; + def 16mi8_NF_ND : BinOpMI8_R<mnemonic, Xi16, MemMRM>, NF, PD; + def 32mi8_NF_ND : BinOpMI8_R<mnemonic, Xi32, MemMRM>, NF; + def 64mi8_NF_ND : BinOpMI8_R<mnemonic, Xi64, MemMRM>, NF; + def 8mi_NF_ND : BinOpMI_R<0x80, mnemonic, Xi8, MemMRM>, NF; + def 16mi_NF_ND : BinOpMI_R<0x81, mnemonic, Xi16, MemMRM>, NF, PD; + def 32mi_NF_ND : BinOpMI_R<0x81, mnemonic, Xi32, MemMRM>, NF; + def 64mi32_NF_ND : BinOpMI_R<0x81, mnemonic, Xi64, MemMRM>, NF; } let Predicates = [In64BitMode] in { - def NAME#16mi8_NF : BinOpMI8_M<mnemonic, Xi16, MemMRM>, NF, PD; - def NAME#32mi8_NF : BinOpMI8_M<mnemonic, Xi32, MemMRM>, NF; - def NAME#64mi8_NF : BinOpMI8_M<mnemonic, Xi64, MemMRM>, NF; - def NAME#8mi_NF : BinOpMI_M<0x80, mnemonic, Xi8, MemMRM>, NF; - def NAME#16mi_NF : BinOpMI_M<0x81, mnemonic, Xi16, MemMRM>, NF, PD; - def NAME#32mi_NF : BinOpMI_M<0x81, mnemonic, Xi32, MemMRM>, NF; - def NAME#64mi32_NF : BinOpMI_M<0x81, mnemonic, Xi64, MemMRM>, NF; - def NAME#16mi8_EVEX : BinOpMI8_MF<mnemonic, Xi16, MemMRM>, PL, PD; - def NAME#32mi8_EVEX : BinOpMI8_MF<mnemonic, Xi32, MemMRM>, PL; - def NAME#64mi8_EVEX : BinOpMI8_MF<mnemonic, Xi64, MemMRM>, PL; - def NAME#8mi_EVEX : BinOpMI_MF<0x80, mnemonic, Xi8 , null_frag, MemMRM>, PL; - def NAME#16mi_EVEX : BinOpMI_MF<0x81, mnemonic, Xi16, null_frag, MemMRM>, PL, PD; - def NAME#32mi_EVEX : BinOpMI_MF<0x81, mnemonic, Xi32, null_frag, MemMRM>, PL; - def NAME#64mi32_EVEX : BinOpMI_MF<0x81, mnemonic, Xi64, null_frag, MemMRM>, PL; + def 16mi8_NF : BinOpMI8_M<mnemonic, Xi16, MemMRM>, NF, PD; + def 32mi8_NF : BinOpMI8_M<mnemonic, Xi32, MemMRM>, NF; + def 64mi8_NF : BinOpMI8_M<mnemonic, Xi64, MemMRM>, NF; + def 8mi_NF : BinOpMI_M<0x80, mnemonic, Xi8, MemMRM>, NF; + def 16mi_NF : BinOpMI_M<0x81, mnemonic, Xi16, MemMRM>, NF, PD; + def 32mi_NF : BinOpMI_M<0x81, mnemonic, Xi32, MemMRM>, NF; + def 64mi32_NF : BinOpMI_M<0x81, mnemonic, Xi64, MemMRM>, NF; + def 16mi8_EVEX : BinOpMI8_MF<mnemonic, Xi16, MemMRM>, PL, PD; + def 32mi8_EVEX : BinOpMI8_MF<mnemonic, Xi32, MemMRM>, PL; + def 64mi8_EVEX : BinOpMI8_MF<mnemonic, Xi64, MemMRM>, PL; + def 8mi_EVEX : BinOpMI_MF<0x80, mnemonic, Xi8 , null_frag, MemMRM>, PL; + def 16mi_EVEX : BinOpMI_MF<0x81, mnemonic, Xi16, null_frag, MemMRM>, PL, PD; + def 32mi_EVEX : BinOpMI_MF<0x81, mnemonic, Xi32, null_frag, MemMRM>, PL; + def 64mi32_EVEX : BinOpMI_MF<0x81, mnemonic, Xi64, null_frag, MemMRM>, PL; } // These are for the disassembler since 0x82 opcode behaves like 0x80, but // not in 64-bit mode. let Predicates = [Not64BitMode] in { - def NAME#8ri8 : BinOpRI8_RF<0x82, mnemonic, Xi8, RegMRM>, DisassembleOnly; - def NAME#8mi8 : BinOpMI8_MF<mnemonic, Xi8, MemMRM>, DisassembleOnly; + def 8ri8 : BinOpRI8_RF<0x82, mnemonic, Xi8, RegMRM>, DisassembleOnly; + def 8mi8 : BinOpMI8_MF<mnemonic, Xi8, MemMRM>, DisassembleOnly; } - def NAME#8i8 : BinOpAI_AF<BaseOpc4, mnemonic, Xi8 , AL, + def 8i8 : BinOpAI_AF<BaseOpc4, mnemonic, Xi8 , AL, "{$src, %al|al, $src}">; - def NAME#16i16 : BinOpAI_AF<BaseOpc4, mnemonic, Xi16, AX, + def 16i16 : BinOpAI_AF<BaseOpc4, mnemonic, Xi16, AX, "{$src, %ax|ax, $src}">, OpSize16; - def NAME#32i32 : BinOpAI_AF<BaseOpc4, mnemonic, Xi32, EAX, + def 32i32 : BinOpAI_AF<BaseOpc4, mnemonic, Xi32, EAX, "{$src, %eax|eax, $src}">, OpSize32; - def NAME#64i32 : BinOpAI_AF<BaseOpc4, mnemonic, Xi64, RAX, + def 64i32 : BinOpAI_AF<BaseOpc4, mnemonic, Xi64, RAX, "{$src, %rax|rax, $src}">; } @@ -571,162 +811,162 @@ multiclass ArithBinOp_RFF<bits<8> BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4, bit ConvertibleToThreeAddress> { let isCommutable = CommutableRR in { let Predicates = [NoNDD] in { - def NAME#8rr : BinOpRRF_RF<BaseOpc, mnemonic, Xi8 , opnode>; + def 8rr : BinOpRRF_RF<BaseOpc, mnemonic, Xi8 , opnode>; let isConvertibleToThreeAddress = ConvertibleToThreeAddress in { - def NAME#16rr : BinOpRRF_RF<BaseOpc, mnemonic, Xi16, opnode>, OpSize16; - def NAME#32rr : BinOpRRF_RF<BaseOpc, mnemonic, Xi32, opnode>, OpSize32; - def NAME#64rr : BinOpRRF_RF<BaseOpc, mnemonic, Xi64, opnode>; + def 16rr : BinOpRRF_RF<BaseOpc, mnemonic, Xi16, opnode>, OpSize16; + def 32rr : BinOpRRF_RF<BaseOpc, mnemonic, Xi32, opnode>, OpSize32; + def 64rr : BinOpRRF_RF<BaseOpc, mnemonic, Xi64, opnode>; } } let Predicates = [HasNDD, In64BitMode] in { - def NAME#8rr_ND : BinOpRRF_RF<BaseOpc, mnemonic, Xi8 , opnode, 1>; + def 8rr_ND : BinOpRRF_RF<BaseOpc, mnemonic, Xi8 , opnode, 1>; let isConvertibleToThreeAddress = ConvertibleToThreeAddress in { - def NAME#16rr_ND : BinOpRRF_RF<BaseOpc, mnemonic, Xi16, opnode, 1>, PD; - def NAME#32rr_ND : BinOpRRF_RF<BaseOpc, mnemonic, Xi32, opnode, 1>; - def NAME#64rr_ND : BinOpRRF_RF<BaseOpc, mnemonic, Xi64, opnode, 1>; + def 16rr_ND : BinOpRRF_RF<BaseOpc, mnemonic, Xi16, opnode, 1>, PD; + def 32rr_ND : BinOpRRF_RF<BaseOpc, mnemonic, Xi32, opnode, 1>; + def 64rr_ND : BinOpRRF_RF<BaseOpc, mnemonic, Xi64, opnode, 1>; } } } // isCommutable let Predicates = [In64BitMode] in { - def NAME#8rr_EVEX : BinOpRRF_RF<BaseOpc, mnemonic, Xi8 , null_frag>, PL; - def NAME#16rr_EVEX : BinOpRRF_RF<BaseOpc, mnemonic, Xi16, null_frag>, PL, PD; - def NAME#32rr_EVEX : BinOpRRF_RF<BaseOpc, mnemonic, Xi32, null_frag>, PL; - def NAME#64rr_EVEX : BinOpRRF_RF<BaseOpc, mnemonic, Xi64, null_frag>, PL; + def 8rr_EVEX : BinOpRRF_RF<BaseOpc, mnemonic, Xi8 , null_frag>, PL; + def 16rr_EVEX : BinOpRRF_RF<BaseOpc, mnemonic, Xi16, null_frag>, PL, PD; + def 32rr_EVEX : BinOpRRF_RF<BaseOpc, mnemonic, Xi32, null_frag>, PL; + def 64rr_EVEX : BinOpRRF_RF<BaseOpc, mnemonic, Xi64, null_frag>, PL; } - def NAME#8rr_REV : BinOpRRF_RF_Rev<BaseOpc2, mnemonic, Xi8>; - def NAME#16rr_REV : BinOpRRF_RF_Rev<BaseOpc2, mnemonic, Xi16>, OpSize16; - def NAME#32rr_REV : BinOpRRF_RF_Rev<BaseOpc2, mnemonic, Xi32>, OpSize32; - def NAME#64rr_REV : BinOpRRF_RF_Rev<BaseOpc2, mnemonic, Xi64>; + def 8rr_REV : BinOpRRF_RF_Rev<BaseOpc2, mnemonic, Xi8>; + def 16rr_REV : BinOpRRF_RF_Rev<BaseOpc2, mnemonic, Xi16>, OpSize16; + def 32rr_REV : BinOpRRF_RF_Rev<BaseOpc2, mnemonic, Xi32>, OpSize32; + def 64rr_REV : BinOpRRF_RF_Rev<BaseOpc2, mnemonic, Xi64>; let Predicates = [In64BitMode] in { - def NAME#8rr_ND_REV : BinOpRRF_RF_Rev<BaseOpc2, mnemonic, Xi8, 1>; - def NAME#16rr_ND_REV : BinOpRRF_RF_Rev<BaseOpc2, mnemonic, Xi16, 1>, PD; - def NAME#32rr_ND_REV : BinOpRRF_RF_Rev<BaseOpc2, mnemonic, Xi32, 1>; - def NAME#64rr_ND_REV : BinOpRRF_RF_Rev<BaseOpc2, mnemonic, Xi64, 1>; - def NAME#8rr_EVEX_REV : BinOpRRF_RF_Rev<BaseOpc2, mnemonic, Xi8>, PL; - def NAME#16rr_EVEX_REV : BinOpRRF_RF_Rev<BaseOpc2, mnemonic, Xi16>, PL, PD; - def NAME#32rr_EVEX_REV : BinOpRRF_RF_Rev<BaseOpc2, mnemonic, Xi32>, PL; - def NAME#64rr_EVEX_REV : BinOpRRF_RF_Rev<BaseOpc2, mnemonic, Xi64>, PL; + def 8rr_ND_REV : BinOpRRF_RF_Rev<BaseOpc2, mnemonic, Xi8, 1>; + def 16rr_ND_REV : BinOpRRF_RF_Rev<BaseOpc2, mnemonic, Xi16, 1>, PD; + def 32rr_ND_REV : BinOpRRF_RF_Rev<BaseOpc2, mnemonic, Xi32, 1>; + def 64rr_ND_REV : BinOpRRF_RF_Rev<BaseOpc2, mnemonic, Xi64, 1>; + def 8rr_EVEX_REV : BinOpRRF_RF_Rev<BaseOpc2, mnemonic, Xi8>, PL; + def 16rr_EVEX_REV : BinOpRRF_RF_Rev<BaseOpc2, mnemonic, Xi16>, PL, PD; + def 32rr_EVEX_REV : BinOpRRF_RF_Rev<BaseOpc2, mnemonic, Xi32>, PL; + def 64rr_EVEX_REV : BinOpRRF_RF_Rev<BaseOpc2, mnemonic, Xi64>, PL; } let Predicates = [NoNDD] in { - def NAME#8rm : BinOpRMF_RF<BaseOpc2, mnemonic, Xi8 , opnode>; - def NAME#16rm : BinOpRMF_RF<BaseOpc2, mnemonic, Xi16, opnode>, OpSize16; - def NAME#32rm : BinOpRMF_RF<BaseOpc2, mnemonic, Xi32, opnode>, OpSize32; - def NAME#64rm : BinOpRMF_RF<BaseOpc2, mnemonic, Xi64, opnode>; + def 8rm : BinOpRMF_RF<BaseOpc2, mnemonic, Xi8 , opnode>; + def 16rm : BinOpRMF_RF<BaseOpc2, mnemonic, Xi16, opnode>, OpSize16; + def 32rm : BinOpRMF_RF<BaseOpc2, mnemonic, Xi32, opnode>, OpSize32; + def 64rm : BinOpRMF_RF<BaseOpc2, mnemonic, Xi64, opnode>; } let Predicates = [HasNDD, In64BitMode] in { - def NAME#8rm_ND : BinOpRMF_RF<BaseOpc2, mnemonic, Xi8 , opnode, 1>; - def NAME#16rm_ND : BinOpRMF_RF<BaseOpc2, mnemonic, Xi16, opnode, 1>, PD; - def NAME#32rm_ND : BinOpRMF_RF<BaseOpc2, mnemonic, Xi32, opnode, 1>; - def NAME#64rm_ND : BinOpRMF_RF<BaseOpc2, mnemonic, Xi64, opnode, 1>; + def 8rm_ND : BinOpRMF_RF<BaseOpc2, mnemonic, Xi8 , opnode, 1>; + def 16rm_ND : BinOpRMF_RF<BaseOpc2, mnemonic, Xi16, opnode, 1>, PD; + def 32rm_ND : BinOpRMF_RF<BaseOpc2, mnemonic, Xi32, opnode, 1>; + def 64rm_ND : BinOpRMF_RF<BaseOpc2, mnemonic, Xi64, opnode, 1>; } let Predicates = [In64BitMode] in { - def NAME#8rm_EVEX : BinOpRMF_RF<BaseOpc2, mnemonic, Xi8 , opnode>, PL; - def NAME#16rm_EVEX : BinOpRMF_RF<BaseOpc2, mnemonic, Xi16, opnode>, PL, PD; - def NAME#32rm_EVEX : BinOpRMF_RF<BaseOpc2, mnemonic, Xi32, opnode>, PL; - def NAME#64rm_EVEX : BinOpRMF_RF<BaseOpc2, mnemonic, Xi64, opnode>, PL; + def 8rm_EVEX : BinOpRMF_RF<BaseOpc2, mnemonic, Xi8 , opnode>, PL; + def 16rm_EVEX : BinOpRMF_RF<BaseOpc2, mnemonic, Xi16, opnode>, PL, PD; + def 32rm_EVEX : BinOpRMF_RF<BaseOpc2, mnemonic, Xi32, opnode>, PL; + def 64rm_EVEX : BinOpRMF_RF<BaseOpc2, mnemonic, Xi64, opnode>, PL; } let Predicates = [NoNDD] in { - def NAME#8ri : BinOpRIF_RF<0x80, mnemonic, Xi8 , opnode, RegMRM>; + def 8ri : BinOpRIF_RF<0x80, mnemonic, Xi8 , opnode, RegMRM>; let isConvertibleToThreeAddress = ConvertibleToThreeAddress in { // NOTE: These are order specific, we want the ri8 forms to be listed // first so that they are slightly preferred to the ri forms. - def NAME#16ri8 : BinOpRI8F_RF<0x83, mnemonic, Xi16, RegMRM>, OpSize16; - def NAME#32ri8 : BinOpRI8F_RF<0x83, mnemonic, Xi32, RegMRM>, OpSize32; - def NAME#64ri8 : BinOpRI8F_RF<0x83, mnemonic, Xi64, RegMRM>; + def 16ri8 : BinOpRI8F_RF<0x83, mnemonic, Xi16, RegMRM>, OpSize16; + def 32ri8 : BinOpRI8F_RF<0x83, mnemonic, Xi32, RegMRM>, OpSize32; + def 64ri8 : BinOpRI8F_RF<0x83, mnemonic, Xi64, RegMRM>; - def NAME#16ri : BinOpRIF_RF<0x81, mnemonic, Xi16, opnode, RegMRM>, OpSize16; - def NAME#32ri : BinOpRIF_RF<0x81, mnemonic, Xi32, opnode, RegMRM>, OpSize32; - def NAME#64ri32: BinOpRIF_RF<0x81, mnemonic, Xi64, opnode, RegMRM>; + def 16ri : BinOpRIF_RF<0x81, mnemonic, Xi16, opnode, RegMRM>, OpSize16; + def 32ri : BinOpRIF_RF<0x81, mnemonic, Xi32, opnode, RegMRM>, OpSize32; + def 64ri32: BinOpRIF_RF<0x81, mnemonic, Xi64, opnode, RegMRM>; } } let Predicates = [HasNDD, In64BitMode] in { - def NAME#8ri_ND : BinOpRIF_RF<0x80, mnemonic, Xi8 , opnode, RegMRM, 1>; + def 8ri_ND : BinOpRIF_RF<0x80, mnemonic, Xi8 , opnode, RegMRM, 1>; let isConvertibleToThreeAddress = ConvertibleToThreeAddress in { - def NAME#16ri8_ND : BinOpRI8F_RF<0x83, mnemonic, Xi16, RegMRM, 1>, PD; - def NAME#32ri8_ND : BinOpRI8F_RF<0x83, mnemonic, Xi32, RegMRM, 1>; - def NAME#64ri8_ND : BinOpRI8F_RF<0x83, mnemonic, Xi64, RegMRM, 1>; - def NAME#16ri_ND : BinOpRIF_RF<0x81, mnemonic, Xi16, opnode, RegMRM, 1>, PD; - def NAME#32ri_ND : BinOpRIF_RF<0x81, mnemonic, Xi32, opnode, RegMRM, 1>; - def NAME#64ri32_ND: BinOpRIF_RF<0x81, mnemonic, Xi64, opnode, RegMRM, 1>; + def 16ri8_ND : BinOpRI8F_RF<0x83, mnemonic, Xi16, RegMRM, 1>, PD; + def 32ri8_ND : BinOpRI8F_RF<0x83, mnemonic, Xi32, RegMRM, 1>; + def 64ri8_ND : BinOpRI8F_RF<0x83, mnemonic, Xi64, RegMRM, 1>; + def 16ri_ND : BinOpRIF_RF<0x81, mnemonic, Xi16, opnode, RegMRM, 1>, PD; + def 32ri_ND : BinOpRIF_RF<0x81, mnemonic, Xi32, opnode, RegMRM, 1>; + def 64ri32_ND: BinOpRIF_RF<0x81, mnemonic, Xi64, opnode, RegMRM, 1>; } } let Predicates = [In64BitMode] in { - def NAME#8ri_EVEX : BinOpRIF_RF<0x80, mnemonic, Xi8 , opnode, RegMRM>, PL; - def NAME#16ri8_EVEX : BinOpRI8F_RF<0x83, mnemonic, Xi16, RegMRM>, PL, PD; - def NAME#32ri8_EVEX : BinOpRI8F_RF<0x83, mnemonic, Xi32, RegMRM>, PL; - def NAME#64ri8_EVEX : BinOpRI8F_RF<0x83, mnemonic, Xi64, RegMRM>, PL; - def NAME#16ri_EVEX : BinOpRIF_RF<0x81, mnemonic, Xi16, opnode, RegMRM>, PL, PD; - def NAME#32ri_EVEX : BinOpRIF_RF<0x81, mnemonic, Xi32, opnode, RegMRM>, PL; - def NAME#64ri32_EVEX: BinOpRIF_RF<0x81, mnemonic, Xi64, opnode, RegMRM>, PL; + def 8ri_EVEX : BinOpRIF_RF<0x80, mnemonic, Xi8 , opnode, RegMRM>, PL; + def 16ri8_EVEX : BinOpRI8F_RF<0x83, mnemonic, Xi16, RegMRM>, PL, PD; + def 32ri8_EVEX : BinOpRI8F_RF<0x83, mnemonic, Xi32, RegMRM>, PL; + def 64ri8_EVEX : BinOpRI8F_RF<0x83, mnemonic, Xi64, RegMRM>, PL; + def 16ri_EVEX : BinOpRIF_RF<0x81, mnemonic, Xi16, opnode, RegMRM>, PL, PD; + def 32ri_EVEX : BinOpRIF_RF<0x81, mnemonic, Xi32, opnode, RegMRM>, PL; + def 64ri32_EVEX: BinOpRIF_RF<0x81, mnemonic, Xi64, opnode, RegMRM>, PL; } - def NAME#8mr : BinOpMRF_MF<BaseOpc, mnemonic, Xi8 , opnode>; - def NAME#16mr : BinOpMRF_MF<BaseOpc, mnemonic, Xi16, opnode>, OpSize16; - def NAME#32mr : BinOpMRF_MF<BaseOpc, mnemonic, Xi32, opnode>, OpSize32; - def NAME#64mr : BinOpMRF_MF<BaseOpc, mnemonic, Xi64, opnode>; + def 8mr : BinOpMRF_MF<BaseOpc, mnemonic, Xi8 , opnode>; + def 16mr : BinOpMRF_MF<BaseOpc, mnemonic, Xi16, opnode>, OpSize16; + def 32mr : BinOpMRF_MF<BaseOpc, mnemonic, Xi32, opnode>, OpSize32; + def 64mr : BinOpMRF_MF<BaseOpc, mnemonic, Xi64, opnode>; let Predicates = [HasNDD, In64BitMode] in { - def NAME#8mr_ND : BinOpMRF_RF<BaseOpc, mnemonic, Xi8 , opnode>; - def NAME#16mr_ND : BinOpMRF_RF<BaseOpc, mnemonic, Xi16, opnode>, PD; - def NAME#32mr_ND : BinOpMRF_RF<BaseOpc, mnemonic, Xi32, opnode>; - def NAME#64mr_ND : BinOpMRF_RF<BaseOpc, mnemonic, Xi64, opnode>; + def 8mr_ND : BinOpMRF_RF<BaseOpc, mnemonic, Xi8 , opnode>; + def 16mr_ND : BinOpMRF_RF<BaseOpc, mnemonic, Xi16, opnode>, PD; + def 32mr_ND : BinOpMRF_RF<BaseOpc, mnemonic, Xi32, opnode>; + def 64mr_ND : BinOpMRF_RF<BaseOpc, mnemonic, Xi64, opnode>; } let Predicates = [In64BitMode] in { - def NAME#8mr_EVEX : BinOpMRF_MF<BaseOpc, mnemonic, Xi8 , null_frag>, PL; - def NAME#16mr_EVEX : BinOpMRF_MF<BaseOpc, mnemonic, Xi16, null_frag>, PL, PD; - def NAME#32mr_EVEX : BinOpMRF_MF<BaseOpc, mnemonic, Xi32, null_frag>, PL; - def NAME#64mr_EVEX : BinOpMRF_MF<BaseOpc, mnemonic, Xi64, null_frag>, PL; + def 8mr_EVEX : BinOpMRF_MF<BaseOpc, mnemonic, Xi8 , null_frag>, PL; + def 16mr_EVEX : BinOpMRF_MF<BaseOpc, mnemonic, Xi16, null_frag>, PL, PD; + def 32mr_EVEX : BinOpMRF_MF<BaseOpc, mnemonic, Xi32, null_frag>, PL; + def 64mr_EVEX : BinOpMRF_MF<BaseOpc, mnemonic, Xi64, null_frag>, PL; } // NOTE: These are order specific, we want the mi8 forms to be listed // first so that they are slightly preferred to the mi forms. - def NAME#8mi : BinOpMIF_MF<0x80, mnemonic, Xi8 , opnode, MemMRM>; - def NAME#16mi8 : BinOpMI8F_MF<mnemonic, Xi16, MemMRM>, OpSize16; - def NAME#32mi8 : BinOpMI8F_MF<mnemonic, Xi32, MemMRM>, OpSize32; + def 8mi : BinOpMIF_MF<0x80, mnemonic, Xi8 , opnode, MemMRM>; + def 16mi8 : BinOpMI8F_MF<mnemonic, Xi16, MemMRM>, OpSize16; + def 32mi8 : BinOpMI8F_MF<mnemonic, Xi32, MemMRM>, OpSize32; let Predicates = [In64BitMode] in - def NAME#64mi8 : BinOpMI8F_MF<mnemonic, Xi64, MemMRM>; - def NAME#16mi : BinOpMIF_MF<0x81, mnemonic, Xi16, opnode, MemMRM>, OpSize16; - def NAME#32mi : BinOpMIF_MF<0x81, mnemonic, Xi32, opnode, MemMRM>, OpSize32; + def 64mi8 : BinOpMI8F_MF<mnemonic, Xi64, MemMRM>; + def 16mi : BinOpMIF_MF<0x81, mnemonic, Xi16, opnode, MemMRM>, OpSize16; + def 32mi : BinOpMIF_MF<0x81, mnemonic, Xi32, opnode, MemMRM>, OpSize32; let Predicates = [In64BitMode] in - def NAME#64mi32 : BinOpMIF_MF<0x81, mnemonic, Xi64, opnode, MemMRM>; + def 64mi32 : BinOpMIF_MF<0x81, mnemonic, Xi64, opnode, MemMRM>; let Predicates = [HasNDD, In64BitMode] in { - def NAME#8mi_ND : BinOpMIF_RF<0x80, mnemonic, Xi8 , opnode, MemMRM>; - def NAME#16mi8_ND : BinOpMI8F_RF<mnemonic, Xi16, MemMRM>, PD; - def NAME#32mi8_ND : BinOpMI8F_RF<mnemonic, Xi32, MemMRM>; - def NAME#64mi8_ND : BinOpMI8F_RF<mnemonic, Xi64, MemMRM>; - def NAME#16mi_ND : BinOpMIF_RF<0x81, mnemonic, Xi16, opnode, MemMRM>, PD; - def NAME#32mi_ND : BinOpMIF_RF<0x81, mnemonic, Xi32, opnode, MemMRM>; - def NAME#64mi32_ND : BinOpMIF_RF<0x81, mnemonic, Xi64, opnode, MemMRM>; + def 8mi_ND : BinOpMIF_RF<0x80, mnemonic, Xi8 , opnode, MemMRM>; + def 16mi8_ND : BinOpMI8F_RF<mnemonic, Xi16, MemMRM>, PD; + def 32mi8_ND : BinOpMI8F_RF<mnemonic, Xi32, MemMRM>; + def 64mi8_ND : BinOpMI8F_RF<mnemonic, Xi64, MemMRM>; + def 16mi_ND : BinOpMIF_RF<0x81, mnemonic, Xi16, opnode, MemMRM>, PD; + def 32mi_ND : BinOpMIF_RF<0x81, mnemonic, Xi32, opnode, MemMRM>; + def 64mi32_ND : BinOpMIF_RF<0x81, mnemonic, Xi64, opnode, MemMRM>; } let Predicates = [In64BitMode] in { - def NAME#8mi_EVEX : BinOpMIF_MF<0x80, mnemonic, Xi8 , opnode, MemMRM>, PL; - def NAME#16mi8_EVEX : BinOpMI8F_MF<mnemonic, Xi16, MemMRM>, PL, PD; - def NAME#32mi8_EVEX : BinOpMI8F_MF<mnemonic, Xi32, MemMRM>, PL; - def NAME#64mi8_EVEX : BinOpMI8F_MF<mnemonic, Xi64, MemMRM>, PL; - def NAME#16mi_EVEX : BinOpMIF_MF<0x81, mnemonic, Xi16, opnode, MemMRM>, PL, PD; - def NAME#32mi_EVEX : BinOpMIF_MF<0x81, mnemonic, Xi32, opnode, MemMRM>, PL; - def NAME#64mi32_EVEX : BinOpMIF_MF<0x81, mnemonic, Xi64, opnode, MemMRM>, PL; + def 8mi_EVEX : BinOpMIF_MF<0x80, mnemonic, Xi8 , opnode, MemMRM>, PL; + def 16mi8_EVEX : BinOpMI8F_MF<mnemonic, Xi16, MemMRM>, PL, PD; + def 32mi8_EVEX : BinOpMI8F_MF<mnemonic, Xi32, MemMRM>, PL; + def 64mi8_EVEX : BinOpMI8F_MF<mnemonic, Xi64, MemMRM>, PL; + def 16mi_EVEX : BinOpMIF_MF<0x81, mnemonic, Xi16, opnode, MemMRM>, PL, PD; + def 32mi_EVEX : BinOpMIF_MF<0x81, mnemonic, Xi32, opnode, MemMRM>, PL; + def 64mi32_EVEX : BinOpMIF_MF<0x81, mnemonic, Xi64, opnode, MemMRM>, PL; } // These are for the disassembler since 0x82 opcode behaves like 0x80, but // not in 64-bit mode. let Predicates = [Not64BitMode] in { - def NAME#8ri8 : BinOpRI8F_RF<0x82, mnemonic, Xi8, RegMRM>, DisassembleOnly; - def NAME#8mi8 : BinOpMI8F_MF<mnemonic, Xi8, MemMRM>, DisassembleOnly; + def 8ri8 : BinOpRI8F_RF<0x82, mnemonic, Xi8, RegMRM>, DisassembleOnly; + def 8mi8 : BinOpMI8F_MF<mnemonic, Xi8, MemMRM>, DisassembleOnly; } - def NAME#8i8 : BinOpAIF_AF<BaseOpc4, mnemonic, Xi8 , AL, + def 8i8 : BinOpAIF_AF<BaseOpc4, mnemonic, Xi8 , AL, "{$src, %al|al, $src}">; - def NAME#16i16 : BinOpAIF_AF<BaseOpc4, mnemonic, Xi16, AX, + def 16i16 : BinOpAIF_AF<BaseOpc4, mnemonic, Xi16, AX, "{$src, %ax|ax, $src}">, OpSize16; - def NAME#32i32 : BinOpAIF_AF<BaseOpc4, mnemonic, Xi32, EAX, + def 32i32 : BinOpAIF_AF<BaseOpc4, mnemonic, Xi32, EAX, "{$src, %eax|eax, $src}">, OpSize32; - def NAME#64i32 : BinOpAIF_AF<BaseOpc4, mnemonic, Xi64, RAX, + def 64i32 : BinOpAIF_AF<BaseOpc4, mnemonic, Xi64, RAX, "{$src, %rax|rax, $src}">; } @@ -739,71 +979,71 @@ multiclass ArithBinOp_F<bits<8> BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4, SDNode opnode, bit CommutableRR, bit ConvertibleToThreeAddress> { let isCommutable = CommutableRR in { - def NAME#8rr : BinOpRR_F<BaseOpc, mnemonic, Xi8 , opnode>; + def 8rr : BinOpRR_F<BaseOpc, mnemonic, Xi8 , opnode>; let isConvertibleToThreeAddress = ConvertibleToThreeAddress in { - def NAME#16rr : BinOpRR_F<BaseOpc, mnemonic, Xi16, opnode>, OpSize16; - def NAME#32rr : BinOpRR_F<BaseOpc, mnemonic, Xi32, opnode>, OpSize32; - def NAME#64rr : BinOpRR_F<BaseOpc, mnemonic, Xi64, opnode>; + def 16rr : BinOpRR_F<BaseOpc, mnemonic, Xi16, opnode>, OpSize16; + def 32rr : BinOpRR_F<BaseOpc, mnemonic, Xi32, opnode>, OpSize32; + def 64rr : BinOpRR_F<BaseOpc, mnemonic, Xi64, opnode>; } // isConvertibleToThreeAddress } // isCommutable - def NAME#8rr_REV : BinOpRR_F_Rev<BaseOpc2, mnemonic, Xi8>; - def NAME#16rr_REV : BinOpRR_F_Rev<BaseOpc2, mnemonic, Xi16>, OpSize16; - def NAME#32rr_REV : BinOpRR_F_Rev<BaseOpc2, mnemonic, Xi32>, OpSize32; - def NAME#64rr_REV : BinOpRR_F_Rev<BaseOpc2, mnemonic, Xi64>; + def 8rr_REV : BinOpRR_F_Rev<BaseOpc2, mnemonic, Xi8>; + def 16rr_REV : BinOpRR_F_Rev<BaseOpc2, mnemonic, Xi16>, OpSize16; + def 32rr_REV : BinOpRR_F_Rev<BaseOpc2, mnemonic, Xi32>, OpSize32; + def 64rr_REV : BinOpRR_F_Rev<BaseOpc2, mnemonic, Xi64>; - def NAME#8rm : BinOpRM_F<BaseOpc2, mnemonic, Xi8 , opnode>; - def NAME#16rm : BinOpRM_F<BaseOpc2, mnemonic, Xi16, opnode>, OpSize16; - def NAME#32rm : BinOpRM_F<BaseOpc2, mnemonic, Xi32, opnode>, OpSize32; - def NAME#64rm : BinOpRM_F<BaseOpc2, mnemonic, Xi64, opnode>; + def 8rm : BinOpRM_F<BaseOpc2, mnemonic, Xi8 , opnode>; + def 16rm : BinOpRM_F<BaseOpc2, mnemonic, Xi16, opnode>, OpSize16; + def 32rm : BinOpRM_F<BaseOpc2, mnemonic, Xi32, opnode>, OpSize32; + def 64rm : BinOpRM_F<BaseOpc2, mnemonic, Xi64, opnode>; - def NAME#8ri : BinOpRI_F<0x80, mnemonic, Xi8 , opnode, RegMRM>; + def 8ri : BinOpRI_F<0x80, mnemonic, Xi8 , opnode, RegMRM>; let isConvertibleToThreeAddress = ConvertibleToThreeAddress in { // NOTE: These are order specific, we want the ri8 forms to be listed // first so that they are slightly preferred to the ri forms. - def NAME#16ri8 : BinOpRI8_F<0x83, mnemonic, Xi16, RegMRM>, OpSize16; - def NAME#32ri8 : BinOpRI8_F<0x83, mnemonic, Xi32, RegMRM>, OpSize32; - def NAME#64ri8 : BinOpRI8_F<0x83, mnemonic, Xi64, RegMRM>; + def 16ri8 : BinOpRI8_F<0x83, mnemonic, Xi16, RegMRM>, OpSize16; + def 32ri8 : BinOpRI8_F<0x83, mnemonic, Xi32, RegMRM>, OpSize32; + def 64ri8 : BinOpRI8_F<0x83, mnemonic, Xi64, RegMRM>; - def NAME#16ri : BinOpRI_F<0x81, mnemonic, Xi16, opnode, RegMRM>, OpSize16; - def NAME#32ri : BinOpRI_F<0x81, mnemonic, Xi32, opnode, RegMRM>, OpSize32; - def NAME#64ri32: BinOpRI_F<0x81, mnemonic, Xi64, opnode, RegMRM>; + def 16ri : BinOpRI_F<0x81, mnemonic, Xi16, opnode, RegMRM>, OpSize16; + def 32ri : BinOpRI_F<0x81, mnemonic, Xi32, opnode, RegMRM>, OpSize32; + def 64ri32: BinOpRI_F<0x81, mnemonic, Xi64, opnode, RegMRM>; } - def NAME#8mr : BinOpMR_F<BaseOpc, mnemonic, Xi8 , opnode>; - def NAME#16mr : BinOpMR_F<BaseOpc, mnemonic, Xi16, opnode>, OpSize16; - def NAME#32mr : BinOpMR_F<BaseOpc, mnemonic, Xi32, opnode>, OpSize32; - def NAME#64mr : BinOpMR_F<BaseOpc, mnemonic, Xi64, opnode>; + def 8mr : BinOpMR_F<BaseOpc, mnemonic, Xi8 , opnode>; + def 16mr : BinOpMR_F<BaseOpc, mnemonic, Xi16, opnode>, OpSize16; + def 32mr : BinOpMR_F<BaseOpc, mnemonic, Xi32, opnode>, OpSize32; + def 64mr : BinOpMR_F<BaseOpc, mnemonic, Xi64, opnode>; // NOTE: These are order specific, we want the mi8 forms to be listed // first so that they are slightly preferred to the mi forms. - def NAME#16mi8 : BinOpMI8_F<mnemonic, Xi16, MemMRM>, OpSize16; - def NAME#32mi8 : BinOpMI8_F<mnemonic, Xi32, MemMRM>, OpSize32; + def 16mi8 : BinOpMI8_F<mnemonic, Xi16, MemMRM>, OpSize16; + def 32mi8 : BinOpMI8_F<mnemonic, Xi32, MemMRM>, OpSize32; let Predicates = [In64BitMode] in - def NAME#64mi8 : BinOpMI8_F<mnemonic, Xi64, MemMRM>; + def 64mi8 : BinOpMI8_F<mnemonic, Xi64, MemMRM>; - def NAME#8mi : BinOpMI_F<0x80, mnemonic, Xi8 , opnode, MemMRM>; - def NAME#16mi : BinOpMI_F<0x81, mnemonic, Xi16, opnode, MemMRM>, OpSize16; - def NAME#32mi : BinOpMI_F<0x81, mnemonic, Xi32, opnode, MemMRM>, OpSize32; + def 8mi : BinOpMI_F<0x80, mnemonic, Xi8 , opnode, MemMRM>; + def 16mi : BinOpMI_F<0x81, mnemonic, Xi16, opnode, MemMRM>, OpSize16; + def 32mi : BinOpMI_F<0x81, mnemonic, Xi32, opnode, MemMRM>, OpSize32; let Predicates = [In64BitMode] in - def NAME#64mi32 : BinOpMI_F<0x81, mnemonic, Xi64, opnode, MemMRM>; + def 64mi32 : BinOpMI_F<0x81, mnemonic, Xi64, opnode, MemMRM>; // These are for the disassembler since 0x82 opcode behaves like 0x80, but // not in 64-bit mode. let Predicates = [Not64BitMode] in { - def NAME#8ri8 : BinOpRI8_F<0x82, mnemonic, Xi8, RegMRM>, DisassembleOnly; + def 8ri8 : BinOpRI8_F<0x82, mnemonic, Xi8, RegMRM>, DisassembleOnly; let mayLoad = 1 in - def NAME#8mi8 : BinOpMI8_F<mnemonic, Xi8, MemMRM>; + def 8mi8 : BinOpMI8_F<mnemonic, Xi8, MemMRM>; } - def NAME#8i8 : BinOpAI_F<BaseOpc4, mnemonic, Xi8 , AL, + def 8i8 : BinOpAI_F<BaseOpc4, mnemonic, Xi8 , AL, "{$src, %al|al, $src}">; - def NAME#16i16 : BinOpAI_F<BaseOpc4, mnemonic, Xi16, AX, + def 16i16 : BinOpAI_F<BaseOpc4, mnemonic, Xi16, AX, "{$src, %ax|ax, $src}">, OpSize16; - def NAME#32i32 : BinOpAI_F<BaseOpc4, mnemonic, Xi32, EAX, + def 32i32 : BinOpAI_F<BaseOpc4, mnemonic, Xi32, EAX, "{$src, %eax|eax, $src}">, OpSize32; - def NAME#64i32 : BinOpAI_F<BaseOpc4, mnemonic, Xi64, RAX, + def 64i32 : BinOpAI_F<BaseOpc4, mnemonic, Xi64, RAX, "{$src, %rax|rax, $src}">; } @@ -1119,14 +1359,34 @@ defm MULX64 : MulX<Xi64, WriteMULX64>, REX_W; // We don't have patterns for these as there is no advantage over ADC for // most code. let Form = MRMSrcReg in { -def ADCX32rr : BinOpRRF_RF<0xF6, "adcx", Xi32, null_frag>, T8, PD; -def ADCX64rr : BinOpRRF_RF<0xF6, "adcx", Xi64, null_frag>, T8, PD; -def ADOX32rr : BinOpRRF_RF<0xF6, "adox", Xi32, null_frag>, T8, XS; -def ADOX64rr : BinOpRRF_RF<0xF6, "adox", Xi64, null_frag>, T8, XS; + def ADCX32rr : BinOpRRF_RF<0xF6, "adcx", Xi32>, T8, PD; + def ADCX64rr : BinOpRRF_RF<0xF6, "adcx", Xi64>, T8, PD; + def ADOX32rr : BinOpRRF_RF<0xF6, "adox", Xi32>, T8, XS; + def ADOX64rr : BinOpRRF_RF<0xF6, "adox", Xi64>, T8, XS; + let Predicates =[In64BitMode] in { + def ADCX32rr_EVEX : BinOpRRF_RF<0x66, "adcx", Xi32>, EVEX, T_MAP4, PD; + def ADCX64rr_EVEX : BinOpRRF_RF<0x66, "adcx", Xi64>, EVEX, T_MAP4, PD; + def ADOX32rr_EVEX : BinOpRRF_RF<0x66, "adox", Xi32>, EVEX, T_MAP4, XS; + def ADOX64rr_EVEX : BinOpRRF_RF<0x66, "adox", Xi64>, EVEX, T_MAP4, XS; + def ADCX32rr_ND : BinOpRRF_RF<0x66, "adcx", Xi32, null_frag, 1>, PD; + def ADCX64rr_ND : BinOpRRF_RF<0x66, "adcx", Xi64, null_frag, 1>, PD; + def ADOX32rr_ND : BinOpRRF_RF<0x66, "adox", Xi32, null_frag, 1>, XS; + def ADOX64rr_ND : BinOpRRF_RF<0x66, "adox", Xi64, null_frag, 1>, XS; + } } let Form = MRMSrcMem in { -def ADCX32rm : BinOpRMF_RF<0xF6, "adcx", Xi32, null_frag>, T8, PD; -def ADCX64rm : BinOpRMF_RF<0xF6, "adcx", Xi64, null_frag>, T8, PD; -def ADOX32rm : BinOpRMF_RF<0xF6, "adox", Xi32, null_frag>, T8, XS; -def ADOX64rm : BinOpRMF_RF<0xF6, "adox", Xi64, null_frag>, T8, XS; + def ADCX32rm : BinOpRMF_RF<0xF6, "adcx", Xi32>, T8, PD; + def ADCX64rm : BinOpRMF_RF<0xF6, "adcx", Xi64>, T8, PD; + def ADOX32rm : BinOpRMF_RF<0xF6, "adox", Xi32>, T8, XS; + def ADOX64rm : BinOpRMF_RF<0xF6, "adox", Xi64>, T8, XS; + let Predicates =[In64BitMode] in { + def ADCX32rm_EVEX : BinOpRMF_RF<0x66, "adcx", Xi32>, EVEX, T_MAP4, PD; + def ADCX64rm_EVEX : BinOpRMF_RF<0x66, "adcx", Xi64>, EVEX, T_MAP4, PD; + def ADOX32rm_EVEX : BinOpRMF_RF<0x66, "adox", Xi32>, EVEX, T_MAP4, XS; + def ADOX64rm_EVEX : BinOpRMF_RF<0x66, "adox", Xi64>, EVEX, T_MAP4, XS; + def ADCX32rm_ND : BinOpRMF_RF<0x66, "adcx", Xi32, null_frag, 1>, PD; + def ADCX64rm_ND : BinOpRMF_RF<0x66, "adcx", Xi64, null_frag, 1>, PD; + def ADOX32rm_ND : BinOpRMF_RF<0x66, "adox", Xi32, null_frag, 1>, XS; + def ADOX64rm_ND : BinOpRMF_RF<0x66, "adox", Xi64, null_frag, 1>, XS; + } } diff --git a/llvm/lib/Target/X86/X86InstrFormats.td b/llvm/lib/Target/X86/X86InstrFormats.td index 6e76b44b66a3..8798b13a1761 100644 --- a/llvm/lib/Target/X86/X86InstrFormats.td +++ b/llvm/lib/Target/X86/X86InstrFormats.td @@ -247,8 +247,6 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins, bit hasREPPrefix = 0; // Does this inst have a REP prefix? bits<2> OpEncBits = OpEnc.Value; bit IgnoresW = 0; // Does this inst ignore REX_W field? - bit EVEX_W1_VEX_W0 = 0; // This EVEX inst with VEX.W==1 can become a VEX - // instruction with VEX.W == 0. bit hasVEX_4V = 0; // Does this inst require the VEX.VVVV field? bit hasVEX_L = 0; // Does this inst use large (256-bit) registers? bit ignoresVEX_L = 0; // Does this instruction ignore the L-bit @@ -279,10 +277,6 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins, CD8_EltSize, !srl(VectSize, CD8_Form{1-0}))), 0); - // Used to prevent an explicit EVEX2VEX override for this instruction. - string EVEX2VEXOverride = ?; - - bit notEVEX2VEXConvertible = 0; // Prevent EVEX->VEX conversion. ExplicitOpPrefix explicitOpPrefix = NoExplicitOpPrefix; bits<2> explicitOpPrefixBits = explicitOpPrefix.Value; // TSFlags layout should be kept in sync with X86BaseInfo.h. diff --git a/llvm/lib/Target/X86/X86InstrInfo.h b/llvm/lib/Target/X86/X86InstrInfo.h index eac8d79eb8a3..eb0734f9a618 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.h +++ b/llvm/lib/Target/X86/X86InstrInfo.h @@ -29,8 +29,10 @@ class X86Subtarget; namespace X86 { enum AsmComments { + // For instr that was compressed from EVEX to LEGACY. + AC_EVEX_2_LEGACY = MachineInstr::TAsmComments, // For instr that was compressed from EVEX to VEX. - AC_EVEX_2_VEX = MachineInstr::TAsmComments + AC_EVEX_2_VEX = AC_EVEX_2_LEGACY << 1 }; /// Return a pair of condition code for the given predicate and whether diff --git a/llvm/lib/Target/X86/X86InstrMisc.td b/llvm/lib/Target/X86/X86InstrMisc.td index 305bd74f7bd7..97c625a64cfc 100644 --- a/llvm/lib/Target/X86/X86InstrMisc.td +++ b/llvm/lib/Target/X86/X86InstrMisc.td @@ -1212,36 +1212,33 @@ let Predicates = [HasBMI], Defs = [EFLAGS] in { (implicit EFLAGS)]>, TB, XS, Sched<[WriteTZCNTLd]>; } -multiclass bmi_bls<string mnemonic, Format RegMRM, Format MemMRM, - RegisterClass RC, X86MemOperand x86memop, - X86FoldableSchedWrite sched, string Suffix = ""> { -let hasSideEffects = 0 in { - def rr#Suffix : I<0xF3, RegMRM, (outs RC:$dst), (ins RC:$src), - !strconcat(mnemonic, "\t{$src, $dst|$dst, $src}"), []>, - T8, VEX, VVVV, Sched<[sched]>; - let mayLoad = 1 in - def rm#Suffix : I<0xF3, MemMRM, (outs RC:$dst), (ins x86memop:$src), - !strconcat(mnemonic, "\t{$src, $dst|$dst, $src}"), []>, - T8, VEX, VVVV, Sched<[sched.Folded]>; -} +multiclass Bls<string m, Format RegMRM, Format MemMRM, X86TypeInfo t, string Suffix = ""> { + let SchedRW = [WriteBLS] in { + def rr#Suffix : UnaryOpR<0xF3, RegMRM, m, unaryop_ndd_args, t, + (outs t.RegClass:$dst), []>, T8, VVVV; + } + + let SchedRW = [WriteBLS.Folded] in + def rm#Suffix : UnaryOpM<0xF3, MemMRM, m, unaryop_ndd_args, t, + (outs t.RegClass:$dst), []>, T8, VVVV; } -let Predicates = [HasBMI, NoEGPR], Defs = [EFLAGS] in { - defm BLSR32 : bmi_bls<"blsr{l}", MRM1r, MRM1m, GR32, i32mem, WriteBLS>; - defm BLSR64 : bmi_bls<"blsr{q}", MRM1r, MRM1m, GR64, i64mem, WriteBLS>, REX_W; - defm BLSMSK32 : bmi_bls<"blsmsk{l}", MRM2r, MRM2m, GR32, i32mem, WriteBLS>; - defm BLSMSK64 : bmi_bls<"blsmsk{q}", MRM2r, MRM2m, GR64, i64mem, WriteBLS>, REX_W; - defm BLSI32 : bmi_bls<"blsi{l}", MRM3r, MRM3m, GR32, i32mem, WriteBLS>; - defm BLSI64 : bmi_bls<"blsi{q}", MRM3r, MRM3m, GR64, i64mem, WriteBLS>, REX_W; +let Predicates = [HasBMI], Defs = [EFLAGS] in { + defm BLSR32 : Bls<"blsr", MRM1r, MRM1m, Xi32>, VEX; + defm BLSR64 : Bls<"blsr", MRM1r, MRM1m, Xi64>, VEX; + defm BLSMSK32 : Bls<"blsmsk", MRM2r, MRM2m, Xi32>, VEX; + defm BLSMSK64 : Bls<"blsmsk", MRM2r, MRM2m, Xi64>, VEX; + defm BLSI32 : Bls<"blsi", MRM3r, MRM3m, Xi32>, VEX; + defm BLSI64 : Bls<"blsi", MRM3r, MRM3m, Xi64>, VEX; } -let Predicates = [HasBMI, HasEGPR], Defs = [EFLAGS] in { - defm BLSR32 : bmi_bls<"blsr{l}", MRM1r, MRM1m, GR32, i32mem, WriteBLS, "_EVEX">, EVEX; - defm BLSR64 : bmi_bls<"blsr{q}", MRM1r, MRM1m, GR64, i64mem, WriteBLS, "_EVEX">, REX_W, EVEX; - defm BLSMSK32 : bmi_bls<"blsmsk{l}", MRM2r, MRM2m, GR32, i32mem, WriteBLS, "_EVEX">, EVEX; - defm BLSMSK64 : bmi_bls<"blsmsk{q}", MRM2r, MRM2m, GR64, i64mem, WriteBLS, "_EVEX">, REX_W, EVEX; - defm BLSI32 : bmi_bls<"blsi{l}", MRM3r, MRM3m, GR32, i32mem, WriteBLS, "_EVEX">, EVEX; - defm BLSI64 : bmi_bls<"blsi{q}", MRM3r, MRM3m, GR64, i64mem, WriteBLS, "_EVEX">, REX_W, EVEX; +let Predicates = [HasBMI, In64BitMode], Defs = [EFLAGS] in { + defm BLSR32 : Bls<"blsr", MRM1r, MRM1m, Xi32, "_EVEX">, EVEX; + defm BLSR64 : Bls<"blsr", MRM1r, MRM1m, Xi64, "_EVEX">, EVEX; + defm BLSMSK32 : Bls<"blsmsk", MRM2r, MRM2m, Xi32, "_EVEX">, EVEX; + defm BLSMSK64 : Bls<"blsmsk", MRM2r, MRM2m, Xi64, "_EVEX">, EVEX; + defm BLSI32 : Bls<"blsi", MRM3r, MRM3m, Xi32, "_EVEX">, EVEX; + defm BLSI64 : Bls<"blsi", MRM3r, MRM3m, Xi64, "_EVEX">, EVEX; } let Predicates = [HasBMI] in { @@ -1281,50 +1278,35 @@ let Predicates = [HasBMI] in { (BLSI64rr GR64:$src)>; } -multiclass bmi4VOp3_base<bits<8> opc, string mnemonic, RegisterClass RC, - X86MemOperand x86memop, SDPatternOperator OpNode, - PatFrag ld_frag, X86FoldableSchedWrite Sched, - string Suffix = ""> { - def rr#Suffix : I<opc, MRMSrcReg4VOp3, (outs RC:$dst), (ins RC:$src1, RC:$src2), - !strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(set RC:$dst, (OpNode RC:$src1, RC:$src2)), (implicit EFLAGS)]>, - T8, VEX, Sched<[Sched]>; -let mayLoad = 1 in - def rm#Suffix : I<opc, MRMSrcMem4VOp3, (outs RC:$dst), (ins x86memop:$src1, RC:$src2), - !strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(set RC:$dst, (OpNode (ld_frag addr:$src1), RC:$src2)), - (implicit EFLAGS)]>, T8, VEX, - Sched<[Sched.Folded, - // x86memop:$src1 - ReadDefault, ReadDefault, ReadDefault, ReadDefault, - ReadDefault, - // RC:$src2 - Sched.ReadAfterFold]>; +multiclass Bmi4VOp3<bits<8> o, string m, X86TypeInfo t, SDPatternOperator node, + X86FoldableSchedWrite sched, string Suffix = ""> { + let SchedRW = [sched], Form = MRMSrcReg4VOp3 in + def rr#Suffix : BinOpRR<o, m, binop_ndd_args, t, (outs t.RegClass:$dst), + [(set t.RegClass:$dst, EFLAGS, + (node t.RegClass:$src1, t.RegClass:$src2))]>, T8; + let SchedRW = [sched.Folded, + ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault, + sched.ReadAfterFold], Form = MRMSrcMem4VOp3 in + def rm#Suffix : BinOpMR<o, m, binop_ndd_args, t, (outs t.RegClass:$dst), + [(set t.RegClass:$dst, EFLAGS, (node (t.LoadNode addr:$src1), + t.RegClass:$src2))]>, T8; } let Predicates = [HasBMI, NoEGPR], Defs = [EFLAGS] in { - defm BEXTR32 : bmi4VOp3_base<0xF7, "bextr{l}", GR32, i32mem, - X86bextr, loadi32, WriteBEXTR>; - defm BEXTR64 : bmi4VOp3_base<0xF7, "bextr{q}", GR64, i64mem, - X86bextr, loadi64, WriteBEXTR>, REX_W; + defm BEXTR32 : Bmi4VOp3<0xF7, "bextr", Xi32, X86bextr, WriteBEXTR>, VEX; + defm BEXTR64 : Bmi4VOp3<0xF7, "bextr", Xi64, X86bextr, WriteBEXTR>, VEX; } let Predicates = [HasBMI2, NoEGPR], Defs = [EFLAGS] in { - defm BZHI32 : bmi4VOp3_base<0xF5, "bzhi{l}", GR32, i32mem, - X86bzhi, loadi32, WriteBZHI>; - defm BZHI64 : bmi4VOp3_base<0xF5, "bzhi{q}", GR64, i64mem, - X86bzhi, loadi64, WriteBZHI>, REX_W; + defm BZHI32 : Bmi4VOp3<0xF5, "bzhi", Xi32, X86bzhi, WriteBZHI>, VEX; + defm BZHI64 : Bmi4VOp3<0xF5, "bzhi", Xi64, X86bzhi, WriteBZHI>, VEX; } -let Predicates = [HasBMI, HasEGPR], Defs = [EFLAGS] in { - defm BEXTR32 : bmi4VOp3_base<0xF7, "bextr{l}", GR32, i32mem, - X86bextr, loadi32, WriteBEXTR, "_EVEX">, EVEX; - defm BEXTR64 : bmi4VOp3_base<0xF7, "bextr{q}", GR64, i64mem, - X86bextr, loadi64, WriteBEXTR, "_EVEX">, EVEX, REX_W; +let Predicates = [HasBMI, HasEGPR, In64BitMode], Defs = [EFLAGS] in { + defm BEXTR32 : Bmi4VOp3<0xF7, "bextr", Xi32, X86bextr, WriteBEXTR, "_EVEX">, EVEX; + defm BEXTR64 : Bmi4VOp3<0xF7, "bextr", Xi64, X86bextr, WriteBEXTR, "_EVEX">, EVEX; } -let Predicates = [HasBMI2, HasEGPR], Defs = [EFLAGS] in { - defm BZHI32 : bmi4VOp3_base<0xF5, "bzhi{l}", GR32, i32mem, - X86bzhi, loadi32, WriteBZHI, "_EVEX">, EVEX; - defm BZHI64 : bmi4VOp3_base<0xF5, "bzhi{q}", GR64, i64mem, - X86bzhi, loadi64, WriteBZHI, "_EVEX">, EVEX, REX_W; +let Predicates = [HasBMI2, HasEGPR, In64BitMode], Defs = [EFLAGS] in { + defm BZHI32 : Bmi4VOp3<0xF5, "bzhi", Xi32, X86bzhi, WriteBZHI, "_EVEX">, EVEX; + defm BZHI64 : Bmi4VOp3<0xF5, "bzhi", Xi64, X86bzhi, WriteBZHI, "_EVEX">, EVEX; } def CountTrailingOnes : SDNodeXForm<imm, [{ @@ -1371,22 +1353,22 @@ multiclass bmi_pdep_pext<string mnemonic, RegisterClass RC, def rr#Suffix : I<0xF5, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), !strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set RC:$dst, (OpNode RC:$src1, RC:$src2))]>, - VEX, VVVV, Sched<[WriteALU]>; + NoCD8, VVVV, Sched<[WriteALU]>; def rm#Suffix : I<0xF5, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), !strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set RC:$dst, (OpNode RC:$src1, (ld_frag addr:$src2)))]>, - VEX, VVVV, Sched<[WriteALU.Folded, WriteALU.ReadAfterFold]>; + NoCD8, VVVV, Sched<[WriteALU.Folded, WriteALU.ReadAfterFold]>; } let Predicates = [HasBMI2, NoEGPR] in { defm PDEP32 : bmi_pdep_pext<"pdep{l}", GR32, i32mem, - X86pdep, loadi32>, T8, XD; + X86pdep, loadi32>, T8, XD, VEX; defm PDEP64 : bmi_pdep_pext<"pdep{q}", GR64, i64mem, - X86pdep, loadi64>, T8, XD, REX_W; + X86pdep, loadi64>, T8, XD, REX_W, VEX; defm PEXT32 : bmi_pdep_pext<"pext{l}", GR32, i32mem, - X86pext, loadi32>, T8, XS; + X86pext, loadi32>, T8, XS, VEX; defm PEXT64 : bmi_pdep_pext<"pext{q}", GR64, i64mem, - X86pext, loadi64>, T8, XS, REX_W; + X86pext, loadi64>, T8, XS, REX_W, VEX; } let Predicates = [HasBMI2, HasEGPR] in { diff --git a/llvm/lib/Target/X86/X86InstrPredicates.td b/llvm/lib/Target/X86/X86InstrPredicates.td index 94fa6e45ded9..cb751639a057 100644 --- a/llvm/lib/Target/X86/X86InstrPredicates.td +++ b/llvm/lib/Target/X86/X86InstrPredicates.td @@ -8,8 +8,41 @@ def TruePredicate : Predicate<"true">; +// Intel x86 instructions have three separate encoding spaces: legacy, VEX, and +// EVEX. Not all X86 instructions are extended for EGPR. The following is an +// overview of which instructions are extended and how we implement them. +// +// * Legacy space +// All instructions in legacy maps 0 and 1 that have explicit GPR or memory +// operands can use the REX2 prefix to access the EGPR, except XSAVE*/XRSTOR. +// +// * EVEX space +// All instructions in the EVEX space can access the EGPR in their +// register/memory operands. +// +// For the above intructions, the only difference in encoding is reflected in +// the REX2/EVEX prefix when EGPR is used, i.e. the opcode and opcode name are +// unchanged. We don’t add new entries in TD, and instead we extend GPR with +// R16-R31 and make them allocatable only when the feature EGPR is available. +// +// Besides, some instructions in legacy space with map 2/3 and VEX space are +// promoted into EVEX space. Encoding space changes after the promotion, opcode +// and opcode map may change too sometimes. For these instructions, we add new +// entries in TD to avoid overcomplicating the assembler and disassembler. +// +// HasEGPR is for the new entries and NoEGPR is for the entries before +// promotion, so that the promoted variant can be selected first to benefit RA. def HasEGPR : Predicate<"Subtarget->hasEGPR()">; def NoEGPR : Predicate<"!Subtarget->hasEGPR()">; + +// APX extends some instructions with a new form that has an extra register +// operand called a new data destination (NDD). In such forms, NDD is the new +// destination register receiving the result of the computation and all other +// operands (including the original destination operand) become read-only source +// operands. +// +// HasNDD is for the new NDD entries and NoNDD is for the legacy 2-address +// entries, so that the NDD variant can be selected first to benefit RA. def HasNDD : Predicate<"Subtarget->hasNDD()">; def NoNDD : Predicate<"!Subtarget->hasNDD()">; def HasCMOV : Predicate<"Subtarget->canUseCMOV()">; diff --git a/llvm/lib/Target/X86/X86InstrShiftRotate.td b/llvm/lib/Target/X86/X86InstrShiftRotate.td index d13e3b7af69a..f951894db189 100644 --- a/llvm/lib/Target/X86/X86InstrShiftRotate.td +++ b/llvm/lib/Target/X86/X86InstrShiftRotate.td @@ -868,7 +868,7 @@ let Predicates = [HasBMI2, NoEGPR] in { defm SHLX64 : bmi_shift<"shlx{q}", GR64, i64mem>, T8, PD, REX_W; } -let Predicates = [HasBMI2, HasEGPR] in { +let Predicates = [HasBMI2, HasEGPR, In64BitMode] in { defm RORX32 : bmi_rotate<"rorx{l}", GR32, i32mem, "_EVEX">, EVEX; defm RORX64 : bmi_rotate<"rorx{q}", GR64, i64mem, "_EVEX">, REX_W, EVEX; defm SARX32 : bmi_shift<"sarx{l}", GR32, i32mem, "_EVEX">, T8, XS, EVEX; diff --git a/llvm/lib/Target/X86/X86InstrSystem.td b/llvm/lib/Target/X86/X86InstrSystem.td index 699e5847e63f..b1be4739617d 100644 --- a/llvm/lib/Target/X86/X86InstrSystem.td +++ b/llvm/lib/Target/X86/X86InstrSystem.td @@ -695,14 +695,14 @@ def INVPCID32 : I<0x82, MRMSrcMem, (outs), (ins GR32:$src1, i128mem:$src2), Requires<[Not64BitMode, HasINVPCID]>; def INVPCID64 : I<0x82, MRMSrcMem, (outs), (ins GR64:$src1, i128mem:$src2), "invpcid\t{$src2, $src1|$src1, $src2}", []>, T8, PD, - Requires<[In64BitMode, HasINVPCID]>; + Requires<[In64BitMode]>; def INVPCID64_EVEX : I<0xF2, MRMSrcMem, (outs), (ins GR64:$src1, i128mem:$src2), "invpcid\t{$src2, $src1|$src1, $src2}", []>, - EVEX, NoCD8, T_MAP4, XS, Requires<[In64BitMode, HasINVPCID]>; + EVEX, NoCD8, T_MAP4, XS, Requires<[In64BitMode]>; } // SchedRW -let Predicates = [In64BitMode, HasINVPCID] in { +let Predicates = [HasINVPCID, NoEGPR] in { // The instruction can only use a 64 bit register as the register argument // in 64 bit mode, while the intrinsic only accepts a 32 bit argument // corresponding to it. @@ -714,6 +714,13 @@ let Predicates = [In64BitMode, HasINVPCID] in { addr:$src2)>; } +let Predicates = [HasINVPCID, HasEGPR] in { + def : Pat<(int_x86_invpcid GR32:$src1, addr:$src2), + (INVPCID64_EVEX + (SUBREG_TO_REG (i64 0), (MOV32rr GR32:$src1), sub_32bit), + addr:$src2)>; +} + //===----------------------------------------------------------------------===// // SMAP Instruction diff --git a/llvm/lib/Target/X86/X86InstrUtils.td b/llvm/lib/Target/X86/X86InstrUtils.td index da85922a018d..f4ae15837fbf 100644 --- a/llvm/lib/Target/X86/X86InstrUtils.td +++ b/llvm/lib/Target/X86/X86InstrUtils.td @@ -43,8 +43,6 @@ class XOP { Encoding OpEnc = EncXOP; } class VEX { Encoding OpEnc = EncVEX; } class EVEX { Encoding OpEnc = EncEVEX; } class WIG { bit IgnoresW = 1; } -// Special version of REX_W that can be changed to VEX.W==0 for EVEX2VEX. -class VEX_W1X { bit hasREX_W = 1; bit EVEX_W1_VEX_W0 = 1; } class VEX_L { bit hasVEX_L = 1; } class VEX_LIG { bit ignoresVEX_L = 1; } class VVVV { bit hasVEX_4V = 1; } @@ -66,9 +64,6 @@ class EVEX_CD8<int esize, CD8VForm form> { } class NoCD8 { bits<7> CD8_Scale = 0; } -class EVEX2VEXOverride<string VEXInstrName> { - string EVEX2VEXOverride = VEXInstrName; -} class AVX512BIi8Base : TB, PD { Domain ExeDomain = SSEPackedInt; ImmType ImmT = Imm8; @@ -89,7 +84,6 @@ class AVX512PDIi8Base : TB, PD { Domain ExeDomain = SSEPackedDouble; ImmType ImmT = Imm8; } -class NotEVEX2VEXConvertible { bit notEVEX2VEXConvertible = 1; } class ExplicitREX2Prefix { ExplicitOpPrefix explicitOpPrefix = ExplicitREX2; } class ExplicitVEXPrefix { ExplicitOpPrefix explicitOpPrefix = ExplicitVEX; } class ExplicitEVEXPrefix { ExplicitOpPrefix explicitOpPrefix = ExplicitEVEX; } @@ -1005,7 +999,7 @@ class BinOpRR_RF_Rev<bits<8> o, string m, X86TypeInfo t, bit ndd = 0> } // BinOpRRF_RF - Instructions that read "reg, reg", write "reg" and read/write // EFLAGS. -class BinOpRRF_RF<bits<8> o, string m, X86TypeInfo t, SDPatternOperator node, bit ndd = 0> +class BinOpRRF_RF<bits<8> o, string m, X86TypeInfo t, SDPatternOperator node = null_frag, bit ndd = 0> : BinOpRR<o, m, !if(!eq(ndd, 0), binop_args, binop_ndd_args), t, (outs t.RegClass:$dst), [(set t.RegClass:$dst, EFLAGS, (node t.RegClass:$src1, t.RegClass:$src2, @@ -1041,7 +1035,7 @@ class BinOpRM_RF<bits<8> o, string m, X86TypeInfo t, SDPatternOperator node, bit (t.LoadNode addr:$src2)))]>, DefEFLAGS, NDD<ndd>; // BinOpRMF_RF - Instructions that read "reg, [mem]", write "reg" and read/write // EFLAGS. -class BinOpRMF_RF<bits<8> o, string m, X86TypeInfo t, SDPatternOperator node, bit ndd = 0> +class BinOpRMF_RF<bits<8> o, string m, X86TypeInfo t, SDPatternOperator node = null_frag, bit ndd = 0> : BinOpRM<o, m, !if(!eq(ndd, 0), binop_args, binop_ndd_args), t, (outs t.RegClass:$dst), [(set t.RegClass:$dst, EFLAGS, (node t.RegClass:$src1, (t.LoadNode addr:$src2), EFLAGS))]>, diff --git a/llvm/lib/Target/X86/X86MCInstLower.cpp b/llvm/lib/Target/X86/X86MCInstLower.cpp index e1a67f61e766..133ee2041565 100644 --- a/llvm/lib/Target/X86/X86MCInstLower.cpp +++ b/llvm/lib/Target/X86/X86MCInstLower.cpp @@ -2055,10 +2055,11 @@ void X86AsmPrinter::emitInstruction(const MachineInstr *MI) { } } - // Add a comment about EVEX-2-VEX compression for AVX-512 instrs that - // are compressed from EVEX encoding to VEX encoding. + // Add a comment about EVEX compression if (TM.Options.MCOptions.ShowMCEncoding) { - if (MI->getAsmPrinterFlags() & X86::AC_EVEX_2_VEX) + if (MI->getAsmPrinterFlags() & X86::AC_EVEX_2_LEGACY) + OutStreamer->AddComment("EVEX TO LEGACY Compression ", false); + else if (MI->getAsmPrinterFlags() & X86::AC_EVEX_2_VEX) OutStreamer->AddComment("EVEX TO VEX Compression ", false); } diff --git a/llvm/lib/Target/X86/X86TargetMachine.cpp b/llvm/lib/Target/X86/X86TargetMachine.cpp index 5668b514d6de..b92bffbe6239 100644 --- a/llvm/lib/Target/X86/X86TargetMachine.cpp +++ b/llvm/lib/Target/X86/X86TargetMachine.cpp @@ -75,7 +75,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeX86Target() { initializeGlobalISel(PR); initializeWinEHStatePassPass(PR); initializeFixupBWInstPassPass(PR); - initializeEvexToVexInstPassPass(PR); + initializeCompressEVEXPassPass(PR); initializeFixupLEAPassPass(PR); initializeFPSPass(PR); initializeX86FixupSetCCPassPass(PR); @@ -575,7 +575,7 @@ void X86PassConfig::addPreEmitPass() { addPass(createX86FixupInstTuning()); addPass(createX86FixupVectorConstants()); } - addPass(createX86EvexToVexInsts()); + addPass(createX86CompressEVEXPass()); addPass(createX86DiscriminateMemOpsPass()); addPass(createX86InsertPrefetchPass()); addPass(createX86InsertX87waitPass()); diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp index 49631f38017a..cd40b1d3b093 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -2232,6 +2232,7 @@ InstructionCost X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, static const TypeConversionCostTblEntry AVX512FConversionTbl[] = { { ISD::FP_EXTEND, MVT::v8f64, MVT::v8f32, 1 }, { ISD::FP_EXTEND, MVT::v8f64, MVT::v16f32, 3 }, + { ISD::FP_EXTEND, MVT::v16f64, MVT::v16f32, 4 }, // 2*vcvtps2pd+vextractf64x4 { ISD::FP_ROUND, MVT::v8f32, MVT::v8f64, 1 }, { ISD::TRUNCATE, MVT::v2i1, MVT::v2i8, 3 }, // sext+vpslld+vptestmd diff --git a/llvm/lib/Target/XCore/XCoreISelDAGToDAG.cpp b/llvm/lib/Target/XCore/XCoreISelDAGToDAG.cpp index 05003ec304ad..1535eb622da6 100644 --- a/llvm/lib/Target/XCore/XCoreISelDAGToDAG.cpp +++ b/llvm/lib/Target/XCore/XCoreISelDAGToDAG.cpp @@ -142,7 +142,7 @@ void XCoreDAGToDAGISel::Select(SDNode *N) { switch (N->getOpcode()) { default: break; case ISD::Constant: { - uint64_t Val = cast<ConstantSDNode>(N)->getZExtValue(); + uint64_t Val = N->getAsZExtVal(); if (immMskBitp(N)) { // Transformation function: get the size of a mask // Look for the first non-zero bit diff --git a/llvm/lib/TargetParser/Host.cpp b/llvm/lib/TargetParser/Host.cpp index 2e08c7b12d9d..32941c013c66 100644 --- a/llvm/lib/TargetParser/Host.cpp +++ b/llvm/lib/TargetParser/Host.cpp @@ -1524,7 +1524,8 @@ StringRef sys::getHostCPUName() { // Use processor id to detect cpu name. uint32_t processor_id; __asm__("cpucfg %[prid], $zero\n\t" : [prid] "=r"(processor_id)); - switch (processor_id & 0xff00) { + // Refer PRID_SERIES_MASK in linux kernel: arch/loongarch/include/asm/cpu.h. + switch (processor_id & 0xf000) { case 0xc000: // Loongson 64bit, 4-issue return "la464"; // TODO: Others. diff --git a/llvm/lib/TargetParser/Triple.cpp b/llvm/lib/TargetParser/Triple.cpp index e93502187b54..b9971c25af71 100644 --- a/llvm/lib/TargetParser/Triple.cpp +++ b/llvm/lib/TargetParser/Triple.cpp @@ -1206,11 +1206,14 @@ static VersionTuple parseVersionFromName(StringRef Name) { } VersionTuple Triple::getEnvironmentVersion() const { + return parseVersionFromName(getEnvironmentVersionString()); +} + +StringRef Triple::getEnvironmentVersionString() const { StringRef EnvironmentName = getEnvironmentName(); StringRef EnvironmentTypeName = getEnvironmentTypeName(getEnvironment()); EnvironmentName.consume_front(EnvironmentTypeName); - - return parseVersionFromName(EnvironmentName); + return EnvironmentName; } VersionTuple Triple::getOSVersion() const { diff --git a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp index 529f7309a1a2..89a1ad2243c8 100644 --- a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp +++ b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp @@ -2953,6 +2953,9 @@ void coro::salvageDebugInfo( std::optional<BasicBlock::iterator> InsertPt; if (auto *I = dyn_cast<Instruction>(Storage)) { InsertPt = I->getInsertionPointAfterDef(); + // Update DILocation only in O0 since it is easy to get out of sync in + // optimizations. See https://github.com/llvm/llvm-project/pull/75104 for + // an example. if (!OptimizeFrame && I->getDebugLoc()) DVI.setDebugLoc(I->getDebugLoc()); } else if (isa<Argument>(Storage)) @@ -2988,9 +2991,14 @@ void coro::salvageDebugInfo( // dbg.declare does. if (DPV.getType() == DPValue::LocationType::Declare) { std::optional<BasicBlock::iterator> InsertPt; - if (auto *I = dyn_cast<Instruction>(Storage)) + if (auto *I = dyn_cast<Instruction>(Storage)) { InsertPt = I->getInsertionPointAfterDef(); - else if (isa<Argument>(Storage)) + // Update DILocation only in O0 since it is easy to get out of sync in + // optimizations. See https://github.com/llvm/llvm-project/pull/75104 for + // an example. + if (!OptimizeFrame && I->getDebugLoc()) + DPV.setDebugLoc(I->getDebugLoc()); + } else if (isa<Argument>(Storage)) InsertPt = F->getEntryBlock().begin(); if (InsertPt) { DPV.removeFromParent(); diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp index b2618e35b085..cc5a4ee8c2bd 100644 --- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp +++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp @@ -6725,10 +6725,10 @@ struct AAHeapToStackFunction final : public AAHeapToStack { LLVMContext &Ctx = AI.CB->getContext(); ObjectSizeOpts Opts; ObjectSizeOffsetEvaluator Eval(DL, TLI, Ctx, Opts); - SizeOffsetEvalType SizeOffsetPair = Eval.compute(AI.CB); + SizeOffsetValue SizeOffsetPair = Eval.compute(AI.CB); assert(SizeOffsetPair != ObjectSizeOffsetEvaluator::unknown() && - cast<ConstantInt>(SizeOffsetPair.second)->isZero()); - Size = SizeOffsetPair.first; + cast<ConstantInt>(SizeOffsetPair.Offset)->isZero()); + Size = SizeOffsetPair.Size; } Instruction *IP = diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp index 556fde37efeb..96b612254ca5 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -1666,13 +1666,6 @@ Instruction *InstCombinerImpl::visitAdd(BinaryOperator &I) { if (Instruction *Ashr = foldAddToAshr(I)) return Ashr; - // min(A, B) + max(A, B) => A + B. - if (match(&I, m_CombineOr(m_c_Add(m_SMax(m_Value(A), m_Value(B)), - m_c_SMin(m_Deferred(A), m_Deferred(B))), - m_c_Add(m_UMax(m_Value(A), m_Value(B)), - m_c_UMin(m_Deferred(A), m_Deferred(B)))))) - return BinaryOperator::CreateWithCopiedFlags(Instruction::Add, A, B, &I); - // (~X) + (~Y) --> -2 - (X + Y) { // To ensure we can save instructions we need to ensure that we consume both diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index 43d4496571be..40b48699f758 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -172,10 +172,10 @@ Instruction *InstCombinerImpl::SimplifyAnyMemTransfer(AnyMemTransferInst *MI) { // If the memcpy has metadata describing the members, see if we can get the // TBAA tag describing our copy. - MDNode *CopyMD = nullptr; - if (MDNode *M = MI->getMetadata(LLVMContext::MD_tbaa)) { - CopyMD = M; - } else if (MDNode *M = MI->getMetadata(LLVMContext::MD_tbaa_struct)) { + AAMDNodes AACopyMD = MI->getAAMetadata(); + + if (MDNode *M = AACopyMD.TBAAStruct) { + AACopyMD.TBAAStruct = nullptr; if (M->getNumOperands() == 3 && M->getOperand(0) && mdconst::hasa<ConstantInt>(M->getOperand(0)) && mdconst::extract<ConstantInt>(M->getOperand(0))->isZero() && @@ -184,7 +184,7 @@ Instruction *InstCombinerImpl::SimplifyAnyMemTransfer(AnyMemTransferInst *MI) { mdconst::extract<ConstantInt>(M->getOperand(1))->getValue() == Size && M->getOperand(2) && isa<MDNode>(M->getOperand(2))) - CopyMD = cast<MDNode>(M->getOperand(2)); + AACopyMD.TBAA = cast<MDNode>(M->getOperand(2)); } Value *Src = MI->getArgOperand(1); @@ -192,8 +192,7 @@ Instruction *InstCombinerImpl::SimplifyAnyMemTransfer(AnyMemTransferInst *MI) { LoadInst *L = Builder.CreateLoad(IntType, Src); // Alignment from the mem intrinsic will be better, so use it. L->setAlignment(*CopySrcAlign); - if (CopyMD) - L->setMetadata(LLVMContext::MD_tbaa, CopyMD); + L->setAAMetadata(AACopyMD); MDNode *LoopMemParallelMD = MI->getMetadata(LLVMContext::MD_mem_parallel_loop_access); if (LoopMemParallelMD) @@ -205,8 +204,7 @@ Instruction *InstCombinerImpl::SimplifyAnyMemTransfer(AnyMemTransferInst *MI) { StoreInst *S = Builder.CreateStore(L, Dest); // Alignment from the mem intrinsic will be better, so use it. S->setAlignment(*CopyDstAlign); - if (CopyMD) - S->setMetadata(LLVMContext::MD_tbaa, CopyMD); + S->setAAMetadata(AACopyMD); if (LoopMemParallelMD) S->setMetadata(LLVMContext::MD_mem_parallel_loop_access, LoopMemParallelMD); if (AccessGroupMD) @@ -1536,11 +1534,11 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { } if (II->isCommutative()) { - if (Instruction *I = foldCommutativeIntrinsicOverSelects(*II)) - return I; - - if (Instruction *I = foldCommutativeIntrinsicOverPhis(*II)) - return I; + if (auto Pair = matchSymmetricPair(II->getOperand(0), II->getOperand(1))) { + replaceOperand(*II, 0, Pair->first); + replaceOperand(*II, 1, Pair->second); + return II; + } if (CallInst *NewCall = canonicalizeConstantArg0ToArg1(CI)) return NewCall; @@ -4246,39 +4244,3 @@ InstCombinerImpl::transformCallThroughTrampoline(CallBase &Call, Call.setCalledFunction(FTy, NestF); return &Call; } - -// op(select(%v, %x, %y), select(%v, %y, %x)) --> op(%x, %y) -Instruction * -InstCombinerImpl::foldCommutativeIntrinsicOverSelects(IntrinsicInst &II) { - assert(II.isCommutative()); - - Value *A, *B, *C; - if (match(II.getOperand(0), m_Select(m_Value(A), m_Value(B), m_Value(C))) && - match(II.getOperand(1), - m_Select(m_Specific(A), m_Specific(C), m_Specific(B)))) { - replaceOperand(II, 0, B); - replaceOperand(II, 1, C); - return &II; - } - - return nullptr; -} - -Instruction * -InstCombinerImpl::foldCommutativeIntrinsicOverPhis(IntrinsicInst &II) { - assert(II.isCommutative() && "Instruction should be commutative"); - - PHINode *LHS = dyn_cast<PHINode>(II.getOperand(0)); - PHINode *RHS = dyn_cast<PHINode>(II.getOperand(1)); - - if (!LHS || !RHS) - return nullptr; - - if (auto P = matchSymmetricPhiNodesPair(LHS, RHS)) { - replaceOperand(II, 0, P->first); - replaceOperand(II, 1, P->second); - return &II; - } - - return nullptr; -} diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp index 3875e59c3ede..7c1aff445524 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -4920,8 +4920,9 @@ Instruction *InstCombinerImpl::foldICmpBinOp(ICmpInst &I, } } - if (BO0 && BO1 && BO0->getOpcode() == BO1->getOpcode() && BO0->hasOneUse() && - BO1->hasOneUse() && BO0->getOperand(1) == BO1->getOperand(1)) { + if (BO0 && BO1 && BO0->getOpcode() == BO1->getOpcode() && + (BO0->hasOneUse() || BO1->hasOneUse()) && + BO0->getOperand(1) == BO1->getOperand(1)) { switch (BO0->getOpcode()) { default: break; @@ -5047,8 +5048,16 @@ Instruction *InstCombinerImpl::foldICmpWithMinMax(Instruction &I, Value *Y = MinMax->getRHS(); if (ICmpInst::isSigned(Pred) && !MinMax->isSigned()) return nullptr; - if (ICmpInst::isUnsigned(Pred) && MinMax->isSigned()) - return nullptr; + if (ICmpInst::isUnsigned(Pred) && MinMax->isSigned()) { + // Revert the transform signed pred -> unsigned pred + // TODO: We can flip the signedness of predicate if both operands of icmp + // are negative. + if (isKnownNonNegative(Z, SQ.getWithInstruction(&I)) && + isKnownNonNegative(MinMax, SQ.getWithInstruction(&I))) { + Pred = ICmpInst::getFlippedSignednessPredicate(Pred); + } else + return nullptr; + } SimplifyQuery Q = SQ.getWithInstruction(&I); auto IsCondKnownTrue = [](Value *Val) -> std::optional<bool> { if (!Val) @@ -6860,6 +6869,57 @@ Instruction *InstCombinerImpl::foldICmpCommutative(ICmpInst::Predicate Pred, return foldICmpAddOpConst(X, *C, Pred); } + // abs(X) >= X --> true + // abs(X) u<= X --> true + // abs(X) < X --> false + // abs(X) u> X --> false + // abs(X) u>= X --> IsIntMinPosion ? `X > -1`: `X u<= INTMIN` + // abs(X) <= X --> IsIntMinPosion ? `X > -1`: `X u<= INTMIN` + // abs(X) == X --> IsIntMinPosion ? `X > -1`: `X u<= INTMIN` + // abs(X) u< X --> IsIntMinPosion ? `X < 0` : `X > INTMIN` + // abs(X) > X --> IsIntMinPosion ? `X < 0` : `X > INTMIN` + // abs(X) != X --> IsIntMinPosion ? `X < 0` : `X > INTMIN` + { + Value *X; + Constant *C; + if (match(Op0, m_Intrinsic<Intrinsic::abs>(m_Value(X), m_Constant(C))) && + match(Op1, m_Specific(X))) { + Value *NullValue = Constant::getNullValue(X->getType()); + Value *AllOnesValue = Constant::getAllOnesValue(X->getType()); + const APInt SMin = + APInt::getSignedMinValue(X->getType()->getScalarSizeInBits()); + bool IsIntMinPosion = C->isAllOnesValue(); + switch (Pred) { + case CmpInst::ICMP_ULE: + case CmpInst::ICMP_SGE: + return replaceInstUsesWith(CxtI, ConstantInt::getTrue(CxtI.getType())); + case CmpInst::ICMP_UGT: + case CmpInst::ICMP_SLT: + return replaceInstUsesWith(CxtI, ConstantInt::getFalse(CxtI.getType())); + case CmpInst::ICMP_UGE: + case CmpInst::ICMP_SLE: + case CmpInst::ICMP_EQ: { + return replaceInstUsesWith( + CxtI, IsIntMinPosion + ? Builder.CreateICmpSGT(X, AllOnesValue) + : Builder.CreateICmpULT( + X, ConstantInt::get(X->getType(), SMin + 1))); + } + case CmpInst::ICMP_ULT: + case CmpInst::ICMP_SGT: + case CmpInst::ICMP_NE: { + return replaceInstUsesWith( + CxtI, IsIntMinPosion + ? Builder.CreateICmpSLT(X, NullValue) + : Builder.CreateICmpUGT( + X, ConstantInt::get(X->getType(), SMin))); + } + default: + llvm_unreachable("Invalid predicate!"); + } + } + } + return nullptr; } diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h index bdaf7550b4b4..21c61bd99018 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h +++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h @@ -276,17 +276,15 @@ private: bool transformConstExprCastCall(CallBase &Call); Instruction *transformCallThroughTrampoline(CallBase &Call, IntrinsicInst &Tramp); - Instruction *foldCommutativeIntrinsicOverSelects(IntrinsicInst &II); - // Match a pair of Phi Nodes like - // phi [a, BB0], [b, BB1] & phi [b, BB0], [a, BB1] - // Return the matched two operands. - std::optional<std::pair<Value *, Value *>> - matchSymmetricPhiNodesPair(PHINode *LHS, PHINode *RHS); - - // Tries to fold (op phi(a, b) phi(b, a)) -> (op a, b) - // while op is a commutative intrinsic call. - Instruction *foldCommutativeIntrinsicOverPhis(IntrinsicInst &II); + // Return (a, b) if (LHS, RHS) is known to be (a, b) or (b, a). + // Otherwise, return std::nullopt + // Currently it matches: + // - LHS = (select c, a, b), RHS = (select c, b, a) + // - LHS = (phi [a, BB0], [b, BB1]), RHS = (phi [b, BB0], [a, BB1]) + // - LHS = min(a, b), RHS = max(a, b) + std::optional<std::pair<Value *, Value *>> matchSymmetricPair(Value *LHS, + Value *RHS); Value *simplifyMaskedLoad(IntrinsicInst &II); Instruction *simplifyMaskedStore(IntrinsicInst &II); @@ -502,11 +500,6 @@ public: /// X % (C0 * C1) Value *SimplifyAddWithRemainder(BinaryOperator &I); - // Tries to fold (Binop phi(a, b) phi(b, a)) -> (Binop a, b) - // while Binop is commutative. - Value *SimplifyPhiCommutativeBinaryOp(BinaryOperator &I, Value *LHS, - Value *RHS); - // Binary Op helper for select operations where the expression can be // efficiently reorganized. Value *SimplifySelectsFeedingBinaryOp(BinaryOperator &I, Value *LHS, diff --git a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp index f0ea3d9fcad5..e7f983a00e30 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp @@ -487,13 +487,6 @@ Instruction *InstCombinerImpl::visitMul(BinaryOperator &I) { if (Instruction *Res = foldBinOpOfSelectAndCastOfSelectCondition(I)) return Res; - // min(X, Y) * max(X, Y) => X * Y. - if (match(&I, m_CombineOr(m_c_Mul(m_SMax(m_Value(X), m_Value(Y)), - m_c_SMin(m_Deferred(X), m_Deferred(Y))), - m_c_Mul(m_UMax(m_Value(X), m_Value(Y)), - m_c_UMin(m_Deferred(X), m_Deferred(Y)))))) - return BinaryOperator::CreateWithCopiedFlags(Instruction::Mul, X, Y, &I); - // (mul Op0 Op1): // if Log2(Op0) folds away -> // (shl Op1, Log2(Op0)) diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp index 351fc3b0174f..7f2018b3a199 100644 --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -411,6 +411,14 @@ bool InstCombinerImpl::SimplifyAssociativeOrCommutative(BinaryOperator &I) { getComplexity(I.getOperand(1))) Changed = !I.swapOperands(); + if (I.isCommutative()) { + if (auto Pair = matchSymmetricPair(I.getOperand(0), I.getOperand(1))) { + replaceOperand(I, 0, Pair->first); + replaceOperand(I, 1, Pair->second); + Changed = true; + } + } + BinaryOperator *Op0 = dyn_cast<BinaryOperator>(I.getOperand(0)); BinaryOperator *Op1 = dyn_cast<BinaryOperator>(I.getOperand(1)); @@ -1096,8 +1104,8 @@ Value *InstCombinerImpl::foldUsingDistributiveLaws(BinaryOperator &I) { return SimplifySelectsFeedingBinaryOp(I, LHS, RHS); } -std::optional<std::pair<Value *, Value *>> -InstCombinerImpl::matchSymmetricPhiNodesPair(PHINode *LHS, PHINode *RHS) { +static std::optional<std::pair<Value *, Value *>> +matchSymmetricPhiNodesPair(PHINode *LHS, PHINode *RHS) { if (LHS->getParent() != RHS->getParent()) return std::nullopt; @@ -1123,25 +1131,41 @@ InstCombinerImpl::matchSymmetricPhiNodesPair(PHINode *LHS, PHINode *RHS) { return std::optional(std::pair(L0, R0)); } -Value *InstCombinerImpl::SimplifyPhiCommutativeBinaryOp(BinaryOperator &I, - Value *Op0, - Value *Op1) { - assert(I.isCommutative() && "Instruction should be commutative"); - - PHINode *LHS = dyn_cast<PHINode>(Op0); - PHINode *RHS = dyn_cast<PHINode>(Op1); - - if (!LHS || !RHS) - return nullptr; - - if (auto P = matchSymmetricPhiNodesPair(LHS, RHS)) { - Value *BI = Builder.CreateBinOp(I.getOpcode(), P->first, P->second); - if (auto *BO = dyn_cast<BinaryOperator>(BI)) - BO->copyIRFlags(&I); - return BI; +std::optional<std::pair<Value *, Value *>> +InstCombinerImpl::matchSymmetricPair(Value *LHS, Value *RHS) { + Instruction *LHSInst = dyn_cast<Instruction>(LHS); + Instruction *RHSInst = dyn_cast<Instruction>(RHS); + if (!LHSInst || !RHSInst || LHSInst->getOpcode() != RHSInst->getOpcode()) + return std::nullopt; + switch (LHSInst->getOpcode()) { + case Instruction::PHI: + return matchSymmetricPhiNodesPair(cast<PHINode>(LHS), cast<PHINode>(RHS)); + case Instruction::Select: { + Value *Cond = LHSInst->getOperand(0); + Value *TrueVal = LHSInst->getOperand(1); + Value *FalseVal = LHSInst->getOperand(2); + if (Cond == RHSInst->getOperand(0) && TrueVal == RHSInst->getOperand(2) && + FalseVal == RHSInst->getOperand(1)) + return std::pair(TrueVal, FalseVal); + return std::nullopt; + } + case Instruction::Call: { + // Match min(a, b) and max(a, b) + MinMaxIntrinsic *LHSMinMax = dyn_cast<MinMaxIntrinsic>(LHSInst); + MinMaxIntrinsic *RHSMinMax = dyn_cast<MinMaxIntrinsic>(RHSInst); + if (LHSMinMax && RHSMinMax && + LHSMinMax->getPredicate() == + ICmpInst::getSwappedPredicate(RHSMinMax->getPredicate()) && + ((LHSMinMax->getLHS() == RHSMinMax->getLHS() && + LHSMinMax->getRHS() == RHSMinMax->getRHS()) || + (LHSMinMax->getLHS() == RHSMinMax->getRHS() && + LHSMinMax->getRHS() == RHSMinMax->getLHS()))) + return std::pair(LHSMinMax->getLHS(), LHSMinMax->getRHS()); + return std::nullopt; + } + default: + return std::nullopt; } - - return nullptr; } Value *InstCombinerImpl::SimplifySelectsFeedingBinaryOp(BinaryOperator &I, @@ -1187,14 +1211,6 @@ Value *InstCombinerImpl::SimplifySelectsFeedingBinaryOp(BinaryOperator &I, }; if (LHSIsSelect && RHSIsSelect && A == D) { - // op(select(%v, %x, %y), select(%v, %y, %x)) --> op(%x, %y) - if (I.isCommutative() && B == F && C == E) { - Value *BI = Builder.CreateBinOp(I.getOpcode(), B, E); - if (auto *BO = dyn_cast<BinaryOperator>(BI)) - BO->copyIRFlags(&I); - return BI; - } - // (A ? B : C) op (A ? E : F) -> A ? (B op E) : (C op F) Cond = A; True = simplifyBinOp(Opcode, B, E, FMF, Q); @@ -1577,11 +1593,6 @@ Instruction *InstCombinerImpl::foldBinopWithPhiOperands(BinaryOperator &BO) { BO.getParent() != Phi1->getParent()) return nullptr; - if (BO.isCommutative()) { - if (Value *V = SimplifyPhiCommutativeBinaryOp(BO, Phi0, Phi1)) - return replaceInstUsesWith(BO, V); - } - // Fold if there is at least one specific constant value in phi0 or phi1's // incoming values that comes from the same block and this specific constant // value can be used to do optimization for specific binary operator. @@ -3197,6 +3208,64 @@ Instruction *InstCombinerImpl::visitSwitchInst(SwitchInst &SI) { return replaceOperand(SI, 0, Op0); } + ConstantInt *SubLHS; + if (match(Cond, m_Sub(m_ConstantInt(SubLHS), m_Value(Op0)))) { + // Change 'switch (1-X) case 1:' into 'switch (X) case 0'. + for (auto Case : SI.cases()) { + Constant *NewCase = ConstantExpr::getSub(SubLHS, Case.getCaseValue()); + assert(isa<ConstantInt>(NewCase) && + "Result of expression should be constant"); + Case.setValue(cast<ConstantInt>(NewCase)); + } + return replaceOperand(SI, 0, Op0); + } + + uint64_t ShiftAmt; + if (match(Cond, m_Shl(m_Value(Op0), m_ConstantInt(ShiftAmt))) && + ShiftAmt < Op0->getType()->getScalarSizeInBits() && + all_of(SI.cases(), [&](const auto &Case) { + return Case.getCaseValue()->getValue().countr_zero() >= ShiftAmt; + })) { + // Change 'switch (X << 2) case 4:' into 'switch (X) case 1:'. + OverflowingBinaryOperator *Shl = cast<OverflowingBinaryOperator>(Cond); + if (Shl->hasNoUnsignedWrap() || Shl->hasNoSignedWrap() || + Shl->hasOneUse()) { + Value *NewCond = Op0; + if (!Shl->hasNoUnsignedWrap() && !Shl->hasNoSignedWrap()) { + // If the shift may wrap, we need to mask off the shifted bits. + unsigned BitWidth = Op0->getType()->getScalarSizeInBits(); + NewCond = Builder.CreateAnd( + Op0, APInt::getLowBitsSet(BitWidth, BitWidth - ShiftAmt)); + } + for (auto Case : SI.cases()) { + const APInt &CaseVal = Case.getCaseValue()->getValue(); + APInt ShiftedCase = Shl->hasNoSignedWrap() ? CaseVal.ashr(ShiftAmt) + : CaseVal.lshr(ShiftAmt); + Case.setValue(ConstantInt::get(SI.getContext(), ShiftedCase)); + } + return replaceOperand(SI, 0, NewCond); + } + } + + // Fold switch(zext/sext(X)) into switch(X) if possible. + if (match(Cond, m_ZExtOrSExt(m_Value(Op0)))) { + bool IsZExt = isa<ZExtInst>(Cond); + Type *SrcTy = Op0->getType(); + unsigned NewWidth = SrcTy->getScalarSizeInBits(); + + if (all_of(SI.cases(), [&](const auto &Case) { + const APInt &CaseVal = Case.getCaseValue()->getValue(); + return IsZExt ? CaseVal.isIntN(NewWidth) + : CaseVal.isSignedIntN(NewWidth); + })) { + for (auto &Case : SI.cases()) { + APInt TruncatedCase = Case.getCaseValue()->getValue().trunc(NewWidth); + Case.setValue(ConstantInt::get(SI.getContext(), TruncatedCase)); + } + return replaceOperand(SI, 0, Op0); + } + } + KnownBits Known = computeKnownBits(Cond, 0, &SI); unsigned LeadingKnownZeros = Known.countMinLeadingZeros(); unsigned LeadingKnownOnes = Known.countMinLeadingOnes(); diff --git a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp index afb0e6cd1548..e3deafa49bd9 100644 --- a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -174,6 +174,8 @@ const char kAsanAllocasUnpoison[] = "__asan_allocas_unpoison"; const char kAMDGPUAddressSharedName[] = "llvm.amdgcn.is.shared"; const char kAMDGPUAddressPrivateName[] = "llvm.amdgcn.is.private"; +const char kAMDGPUBallotName[] = "llvm.amdgcn.ballot.i64"; +const char kAMDGPUUnreachableName[] = "llvm.amdgcn.unreachable"; // Accesses sizes are powers of two: 1, 2, 4, 8, 16. static const size_t kNumberOfAccessSizes = 5; @@ -699,6 +701,8 @@ struct AddressSanitizer { Instruction *InsertBefore, Value *Addr, uint32_t TypeStoreSize, bool IsWrite, Value *SizeArgument); + Instruction *genAMDGPUReportBlock(IRBuilder<> &IRB, Value *Cond, + bool Recover); void instrumentUnusualSizeOrAlignment(Instruction *I, Instruction *InsertBefore, Value *Addr, TypeSize TypeStoreSize, bool IsWrite, @@ -1721,6 +1725,30 @@ Instruction *AddressSanitizer::instrumentAMDGPUAddress( return InsertBefore; } +Instruction *AddressSanitizer::genAMDGPUReportBlock(IRBuilder<> &IRB, + Value *Cond, bool Recover) { + Module &M = *IRB.GetInsertBlock()->getModule(); + Value *ReportCond = Cond; + if (!Recover) { + auto Ballot = M.getOrInsertFunction(kAMDGPUBallotName, IRB.getInt64Ty(), + IRB.getInt1Ty()); + ReportCond = IRB.CreateIsNotNull(IRB.CreateCall(Ballot, {Cond})); + } + + auto *Trm = + SplitBlockAndInsertIfThen(ReportCond, &*IRB.GetInsertPoint(), false, + MDBuilder(*C).createBranchWeights(1, 100000)); + Trm->getParent()->setName("asan.report"); + + if (Recover) + return Trm; + + Trm = SplitBlockAndInsertIfThen(Cond, Trm, false); + IRB.SetInsertPoint(Trm); + return IRB.CreateCall( + M.getOrInsertFunction(kAMDGPUUnreachableName, IRB.getVoidTy()), {}); +} + void AddressSanitizer::instrumentAddress(Instruction *OrigIns, Instruction *InsertBefore, Value *Addr, MaybeAlign Alignment, @@ -1772,7 +1800,15 @@ void AddressSanitizer::instrumentAddress(Instruction *OrigIns, size_t Granularity = 1ULL << Mapping.Scale; Instruction *CrashTerm = nullptr; - if (ClAlwaysSlowPath || (TypeStoreSize < 8 * Granularity)) { + bool GenSlowPath = (ClAlwaysSlowPath || (TypeStoreSize < 8 * Granularity)); + + if (TargetTriple.isAMDGCN()) { + if (GenSlowPath) { + auto *Cmp2 = createSlowPathCmp(IRB, AddrLong, ShadowValue, TypeStoreSize); + Cmp = IRB.CreateAnd(Cmp, Cmp2); + } + CrashTerm = genAMDGPUReportBlock(IRB, Cmp, Recover); + } else if (GenSlowPath) { // We use branch weights for the slow path check, to indicate that the slow // path is rarely taken. This seems to be the case for SPEC benchmarks. Instruction *CheckTerm = SplitBlockAndInsertIfThen( @@ -3629,10 +3665,14 @@ bool AddressSanitizer::isSafeAccess(ObjectSizeOffsetVisitor &ObjSizeVis, // TODO: We can use vscale_range to convert a scalable value to an // upper bound on the access size. return false; - SizeOffsetType SizeOffset = ObjSizeVis.compute(Addr); - if (!ObjSizeVis.bothKnown(SizeOffset)) return false; - uint64_t Size = SizeOffset.first.getZExtValue(); - int64_t Offset = SizeOffset.second.getSExtValue(); + + SizeOffsetAPInt SizeOffset = ObjSizeVis.compute(Addr); + if (!SizeOffset.bothKnown()) + return false; + + uint64_t Size = SizeOffset.Size.getZExtValue(); + int64_t Offset = SizeOffset.Offset.getSExtValue(); + // Three checks are required to ensure safety: // . Offset >= 0 (since the offset is given from the base ptr) // . Size >= Offset (unsigned) diff --git a/llvm/lib/Transforms/Instrumentation/BoundsChecking.cpp b/llvm/lib/Transforms/Instrumentation/BoundsChecking.cpp index ee5b81960417..cfa8ae26c625 100644 --- a/llvm/lib/Transforms/Instrumentation/BoundsChecking.cpp +++ b/llvm/lib/Transforms/Instrumentation/BoundsChecking.cpp @@ -61,15 +61,15 @@ static Value *getBoundsCheckCond(Value *Ptr, Value *InstVal, LLVM_DEBUG(dbgs() << "Instrument " << *Ptr << " for " << Twine(NeededSize) << " bytes\n"); - SizeOffsetEvalType SizeOffset = ObjSizeEval.compute(Ptr); + SizeOffsetValue SizeOffset = ObjSizeEval.compute(Ptr); - if (!ObjSizeEval.bothKnown(SizeOffset)) { + if (!SizeOffset.bothKnown()) { ++ChecksUnable; return nullptr; } - Value *Size = SizeOffset.first; - Value *Offset = SizeOffset.second; + Value *Size = SizeOffset.Size; + Value *Offset = SizeOffset.Offset; ConstantInt *SizeCI = dyn_cast<ConstantInt>(Size); Type *IndexTy = DL.getIndexType(Ptr->getType()); diff --git a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp index fe5a0578bd97..a19b14087254 100644 --- a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp +++ b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp @@ -1189,12 +1189,10 @@ static inline Constant *getFuncAddrForProfData(Function *Fn) { } static bool needsRuntimeRegistrationOfSectionRange(const Triple &TT) { - // Don't do this for Darwin. compiler-rt uses linker magic. - if (TT.isOSDarwin()) - return false; - // Use linker script magic to get data/cnts/name start/end. - if (TT.isOSAIX() || TT.isOSLinux() || TT.isOSFreeBSD() || TT.isOSNetBSD() || - TT.isOSSolaris() || TT.isOSFuchsia() || TT.isPS() || TT.isOSWindows()) + // compiler-rt uses linker support to get data/counters/name start/end for + // ELF, COFF, Mach-O and XCOFF. + if (TT.isOSBinFormatELF() || TT.isOSBinFormatCOFF() || + TT.isOSBinFormatMachO() || TT.isOSBinFormatXCOFF()) return false; return true; diff --git a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp index 3a57709c4e8b..6b95c7028d93 100644 --- a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp +++ b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp @@ -330,10 +330,6 @@ extern cl::opt<std::string> ViewBlockFreqFuncName; extern cl::opt<InstrProfCorrelator::ProfCorrelatorKind> ProfileCorrelate; } // namespace llvm -static cl::opt<bool> - PGOOldCFGHashing("pgo-instr-old-cfg-hashing", cl::init(false), cl::Hidden, - cl::desc("Use the old CFG function hashing")); - // Return a string describing the branch condition that can be // used in static branch probability heuristics: static std::string getBranchCondString(Instruction *TI) { @@ -635,34 +631,25 @@ void FuncPGOInstrumentation<Edge, BBInfo>::computeCFGHash() { JC.update(Indexes); JamCRC JCH; - if (PGOOldCFGHashing) { - // Hash format for context sensitive profile. Reserve 4 bits for other - // information. - FunctionHash = (uint64_t)SIVisitor.getNumOfSelectInsts() << 56 | - (uint64_t)ValueSites[IPVK_IndirectCallTarget].size() << 48 | - //(uint64_t)ValueSites[IPVK_MemOPSize].size() << 40 | - (uint64_t)MST.numEdges() << 32 | JC.getCRC(); + // The higher 32 bits. + auto updateJCH = [&JCH](uint64_t Num) { + uint8_t Data[8]; + support::endian::write64le(Data, Num); + JCH.update(Data); + }; + updateJCH((uint64_t)SIVisitor.getNumOfSelectInsts()); + updateJCH((uint64_t)ValueSites[IPVK_IndirectCallTarget].size()); + updateJCH((uint64_t)ValueSites[IPVK_MemOPSize].size()); + if (BCI) { + updateJCH(BCI->getInstrumentedBlocksHash()); } else { - // The higher 32 bits. - auto updateJCH = [&JCH](uint64_t Num) { - uint8_t Data[8]; - support::endian::write64le(Data, Num); - JCH.update(Data); - }; - updateJCH((uint64_t)SIVisitor.getNumOfSelectInsts()); - updateJCH((uint64_t)ValueSites[IPVK_IndirectCallTarget].size()); - updateJCH((uint64_t)ValueSites[IPVK_MemOPSize].size()); - if (BCI) { - updateJCH(BCI->getInstrumentedBlocksHash()); - } else { - updateJCH((uint64_t)MST.numEdges()); - } - - // Hash format for context sensitive profile. Reserve 4 bits for other - // information. - FunctionHash = (((uint64_t)JCH.getCRC()) << 28) + JC.getCRC(); + updateJCH((uint64_t)MST.numEdges()); } + // Hash format for context sensitive profile. Reserve 4 bits for other + // information. + FunctionHash = (((uint64_t)JCH.getCRC()) << 28) + JC.getCRC(); + // Reserve bit 60-63 for other information purpose. FunctionHash &= 0x0FFFFFFFFFFFFFFF; if (IsCS) @@ -672,10 +659,8 @@ void FuncPGOInstrumentation<Edge, BBInfo>::computeCFGHash() { << ", Selects = " << SIVisitor.getNumOfSelectInsts() << ", Edges = " << MST.numEdges() << ", ICSites = " << ValueSites[IPVK_IndirectCallTarget].size()); - if (!PGOOldCFGHashing) { - LLVM_DEBUG(dbgs() << ", Memops = " << ValueSites[IPVK_MemOPSize].size() - << ", High32 CRC = " << JCH.getCRC()); - } + LLVM_DEBUG(dbgs() << ", Memops = " << ValueSites[IPVK_MemOPSize].size() + << ", High32 CRC = " << JCH.getCRC()); LLVM_DEBUG(dbgs() << ", Hash = " << FunctionHash << "\n";); if (PGOTraceFuncHash != "-" && F.getName().contains(PGOTraceFuncHash)) diff --git a/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp b/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp index 06c87bd6dc37..6fec54ac7922 100644 --- a/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp +++ b/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp @@ -517,6 +517,18 @@ static Decomposition decompose(Value *V, return Result; } + // (shl nsw x, shift) is (mul nsw x, (1<<shift)), with the exception of + // shift == bw-1. + if (match(V, m_NSWShl(m_Value(Op0), m_ConstantInt(CI)))) { + uint64_t Shift = CI->getValue().getLimitedValue(); + if (Shift < Ty->getIntegerBitWidth() - 1) { + assert(Shift < 64 && "Would overflow"); + auto Result = decompose(Op0, Preconditions, IsSigned, DL); + Result.mul(int64_t(1) << Shift); + return Result; + } + } + return V; } @@ -644,7 +656,7 @@ ConstraintInfo::getConstraint(CmpInst::Predicate Pred, Value *Op0, Value *Op1, // First try to look up \p V in Value2Index and NewVariables. Otherwise add a // new entry to NewVariables. - DenseMap<Value *, unsigned> NewIndexMap; + SmallDenseMap<Value *, unsigned> NewIndexMap; auto GetOrAddIndex = [&Value2Index, &NewVariables, &NewIndexMap](Value *V) -> unsigned { auto V2I = Value2Index.find(V); @@ -668,7 +680,7 @@ ConstraintInfo::getConstraint(CmpInst::Predicate Pred, Value *Op0, Value *Op1, IsSigned, IsEq, IsNe); // Collect variables that are known to be positive in all uses in the // constraint. - DenseMap<Value *, bool> KnownNonNegativeVariables; + SmallDenseMap<Value *, bool> KnownNonNegativeVariables; auto &R = Res.Coefficients; for (const auto &KV : VariablesA) { R[GetOrAddIndex(KV.Variable)] += KV.Coefficient; @@ -921,15 +933,20 @@ void State::addInfoForInductions(BasicBlock &BB) { } DomTreeNode *DTN = DT.getNode(InLoopSucc); - auto Inc = SE.getMonotonicPredicateType(AR, CmpInst::ICMP_UGT); - bool MonotonicallyIncreasing = - Inc && *Inc == ScalarEvolution::MonotonicallyIncreasing; - if (MonotonicallyIncreasing) { - // SCEV guarantees that AR does not wrap, so PN >= StartValue can be added - // unconditionally. + auto IncUnsigned = SE.getMonotonicPredicateType(AR, CmpInst::ICMP_UGT); + auto IncSigned = SE.getMonotonicPredicateType(AR, CmpInst::ICMP_SGT); + bool MonotonicallyIncreasingUnsigned = + IncUnsigned && *IncUnsigned == ScalarEvolution::MonotonicallyIncreasing; + bool MonotonicallyIncreasingSigned = + IncSigned && *IncSigned == ScalarEvolution::MonotonicallyIncreasing; + // If SCEV guarantees that AR does not wrap, PN >= StartValue can be added + // unconditionally. + if (MonotonicallyIncreasingUnsigned) WorkList.push_back( FactOrCheck::getConditionFact(DTN, CmpInst::ICMP_UGE, PN, StartValue)); - } + if (MonotonicallyIncreasingSigned) + WorkList.push_back( + FactOrCheck::getConditionFact(DTN, CmpInst::ICMP_SGE, PN, StartValue)); APInt StepOffset; if (auto *C = dyn_cast<SCEVConstant>(AR->getStepRecurrence(SE))) @@ -953,11 +970,17 @@ void State::addInfoForInductions(BasicBlock &BB) { WorkList.push_back(FactOrCheck::getConditionFact( DTN, CmpInst::ICMP_UGE, StartValue, PN, ConditionTy(CmpInst::ICMP_ULE, B, StartValue))); + WorkList.push_back(FactOrCheck::getConditionFact( + DTN, CmpInst::ICMP_SGE, StartValue, PN, + ConditionTy(CmpInst::ICMP_SLE, B, StartValue))); // Add PN > B conditional on B <= StartValue which guarantees that the loop // exits when reaching B with a step of -1. WorkList.push_back(FactOrCheck::getConditionFact( DTN, CmpInst::ICMP_UGT, PN, B, ConditionTy(CmpInst::ICMP_ULE, B, StartValue))); + WorkList.push_back(FactOrCheck::getConditionFact( + DTN, CmpInst::ICMP_SGT, PN, B, + ConditionTy(CmpInst::ICMP_SLE, B, StartValue))); return; } @@ -968,37 +991,31 @@ void State::addInfoForInductions(BasicBlock &BB) { return; if (!StepOffset.isOne()) { - auto *UpperGEP = dyn_cast<GetElementPtrInst>(B); - if (!UpperGEP || UpperGEP->getPointerOperand() != StartValue || - !UpperGEP->isInBounds()) - return; - - MapVector<Value *, APInt> UpperVariableOffsets; - APInt UpperConstantOffset(StepOffset.getBitWidth(), 0); - const DataLayout &DL = BB.getModule()->getDataLayout(); - if (!UpperGEP->collectOffset(DL, StepOffset.getBitWidth(), - UpperVariableOffsets, UpperConstantOffset)) - return; - // All variable offsets and the constant offset have to be a multiple of the - // step. - if (!UpperConstantOffset.urem(StepOffset).isZero() || - any_of(UpperVariableOffsets, [&StepOffset](const auto &P) { - return !P.second.urem(StepOffset).isZero(); - })) + // Check whether B-Start is known to be a multiple of StepOffset. + const SCEV *BMinusStart = SE.getMinusSCEV(SE.getSCEV(B), StartSCEV); + if (isa<SCEVCouldNotCompute>(BMinusStart) || + !SE.getConstantMultiple(BMinusStart).urem(StepOffset).isZero()) return; } // AR may wrap. Add PN >= StartValue conditional on StartValue <= B which // guarantees that the loop exits before wrapping in combination with the // restrictions on B and the step above. - if (!MonotonicallyIncreasing) { + if (!MonotonicallyIncreasingUnsigned) WorkList.push_back(FactOrCheck::getConditionFact( DTN, CmpInst::ICMP_UGE, PN, StartValue, ConditionTy(CmpInst::ICMP_ULE, StartValue, B))); - } + if (!MonotonicallyIncreasingSigned) + WorkList.push_back(FactOrCheck::getConditionFact( + DTN, CmpInst::ICMP_SGE, PN, StartValue, + ConditionTy(CmpInst::ICMP_SLE, StartValue, B))); + WorkList.push_back(FactOrCheck::getConditionFact( DTN, CmpInst::ICMP_ULT, PN, B, ConditionTy(CmpInst::ICMP_ULE, StartValue, B))); + WorkList.push_back(FactOrCheck::getConditionFact( + DTN, CmpInst::ICMP_SLT, PN, B, + ConditionTy(CmpInst::ICMP_SLE, StartValue, B))); } void State::addInfoFor(BasicBlock &BB) { @@ -1655,15 +1672,14 @@ static bool eliminateConstraints(Function &F, DominatorTree &DT, LoopInfo &LI, DFSInStack); } - LLVM_DEBUG(dbgs() << "Processing "); - // For a block, check if any CmpInsts become known based on the current set // of constraints. if (CB.isCheck()) { Instruction *Inst = CB.getInstructionToSimplify(); if (!Inst) continue; - LLVM_DEBUG(dbgs() << "condition to simplify: " << *Inst << "\n"); + LLVM_DEBUG(dbgs() << "Processing condition to simplify: " << *Inst + << "\n"); if (auto *II = dyn_cast<WithOverflowInst>(Inst)) { Changed |= tryToSimplifyOverflowMath(II, Info, ToRemove); } else if (auto *Cmp = dyn_cast<ICmpInst>(Inst)) { @@ -1682,7 +1698,7 @@ static bool eliminateConstraints(Function &F, DominatorTree &DT, LoopInfo &LI, } auto AddFact = [&](CmpInst::Predicate Pred, Value *A, Value *B) { - LLVM_DEBUG(dbgs() << "fact to add to the system: "; + LLVM_DEBUG(dbgs() << "Processing fact to add to the system: "; dumpUnpackedICmp(dbgs(), Pred, A, B); dbgs() << "\n"); if (Info.getCS(CmpInst::isSigned(Pred)).size() > MaxRows) { LLVM_DEBUG( @@ -1731,8 +1747,17 @@ static bool eliminateConstraints(Function &F, DominatorTree &DT, LoopInfo &LI, A = CB.Cond.Op0; B = CB.Cond.Op1; if (CB.DoesHold.Pred != CmpInst::BAD_ICMP_PREDICATE && - !Info.doesHold(CB.DoesHold.Pred, CB.DoesHold.Op0, CB.DoesHold.Op1)) + !Info.doesHold(CB.DoesHold.Pred, CB.DoesHold.Op0, CB.DoesHold.Op1)) { + LLVM_DEBUG({ + dbgs() << "Not adding fact "; + dumpUnpackedICmp(dbgs(), Pred, A, B); + dbgs() << " because precondition "; + dumpUnpackedICmp(dbgs(), CB.DoesHold.Pred, CB.DoesHold.Op0, + CB.DoesHold.Op1); + dbgs() << " does not hold.\n"; + }); continue; + } } else { bool Matched = match(CB.Inst, m_Intrinsic<Intrinsic::assume>( m_ICmp(Pred, m_Value(A), m_Value(B)))); diff --git a/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp index c44d3748a80d..9235850de92f 100644 --- a/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp +++ b/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp @@ -94,6 +94,31 @@ STATISTIC(NumUDivURemsNarrowedExpanded, "Number of bound udiv's/urem's expanded"); STATISTIC(NumZExt, "Number of non-negative deductions"); +static Constant *getConstantAt(Value *V, Instruction *At, LazyValueInfo *LVI) { + if (Constant *C = LVI->getConstant(V, At)) + return C; + + // TODO: The following really should be sunk inside LVI's core algorithm, or + // at least the outer shims around such. + auto *C = dyn_cast<CmpInst>(V); + if (!C) + return nullptr; + + Value *Op0 = C->getOperand(0); + Constant *Op1 = dyn_cast<Constant>(C->getOperand(1)); + if (!Op1) + return nullptr; + + LazyValueInfo::Tristate Result = LVI->getPredicateAt( + C->getPredicate(), Op0, Op1, At, /*UseBlockValue=*/false); + if (Result == LazyValueInfo::Unknown) + return nullptr; + + return (Result == LazyValueInfo::True) + ? ConstantInt::getTrue(C->getContext()) + : ConstantInt::getFalse(C->getContext()); +} + static bool processSelect(SelectInst *S, LazyValueInfo *LVI) { if (S->getType()->isVectorTy() || isa<Constant>(S->getCondition())) return false; @@ -106,7 +131,7 @@ static bool processSelect(SelectInst *S, LazyValueInfo *LVI) { C = LVI->getConstantOnEdge(S->getCondition(), PN->getIncomingBlock(U), I->getParent(), I); else - C = LVI->getConstant(S->getCondition(), I); + C = getConstantAt(S->getCondition(), I, LVI); auto *CI = dyn_cast_or_null<ConstantInt>(C); if (!CI) @@ -1109,30 +1134,6 @@ static bool processAnd(BinaryOperator *BinOp, LazyValueInfo *LVI) { return true; } - -static Constant *getConstantAt(Value *V, Instruction *At, LazyValueInfo *LVI) { - if (Constant *C = LVI->getConstant(V, At)) - return C; - - // TODO: The following really should be sunk inside LVI's core algorithm, or - // at least the outer shims around such. - auto *C = dyn_cast<CmpInst>(V); - if (!C) return nullptr; - - Value *Op0 = C->getOperand(0); - Constant *Op1 = dyn_cast<Constant>(C->getOperand(1)); - if (!Op1) return nullptr; - - LazyValueInfo::Tristate Result = LVI->getPredicateAt( - C->getPredicate(), Op0, Op1, At, /*UseBlockValue=*/false); - if (Result == LazyValueInfo::Unknown) - return nullptr; - - return (Result == LazyValueInfo::True) ? - ConstantInt::getTrue(C->getContext()) : - ConstantInt::getFalse(C->getContext()); -} - static bool runImpl(Function &F, LazyValueInfo *LVI, DominatorTree *DT, const SimplifyQuery &SQ) { bool FnChanged = false; diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp index 656abdb0abbf..75cddfa16d6d 100644 --- a/llvm/lib/Transforms/Scalar/SROA.cpp +++ b/llvm/lib/Transforms/Scalar/SROA.cpp @@ -1097,10 +1097,8 @@ private: // For array or vector indices, scale the index by the size of the // type. APInt Index = OpC->getValue().sextOrTrunc(Offset.getBitWidth()); - GEPOffset += - Index * - APInt(Offset.getBitWidth(), - DL.getTypeAllocSize(GTI.getIndexedType()).getFixedValue()); + GEPOffset += Index * APInt(Offset.getBitWidth(), + GTI.getSequentialElementStride(DL)); } // If this index has computed an intermediate pointer which is not diff --git a/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp b/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp index b8c9d9d100f1..225dd454068c 100644 --- a/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp +++ b/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp @@ -843,7 +843,7 @@ SeparateConstOffsetFromGEP::accumulateByteOffset(GetElementPtrInst *GEP, // constant offset to a byte offset, and later offset the remainder of // the original GEP with this byte offset. AccumulativeByteOffset += - ConstantOffset * DL->getTypeAllocSize(GTI.getIndexedType()); + ConstantOffset * GTI.getSequentialElementStride(*DL); } } else if (LowerGEP) { StructType *StTy = GTI.getStructType(); @@ -884,7 +884,7 @@ void SeparateConstOffsetFromGEP::lowerToSingleIndexGEPs( continue; APInt ElementSize = APInt(PtrIndexTy->getIntegerBitWidth(), - DL->getTypeAllocSize(GTI.getIndexedType())); + GTI.getSequentialElementStride(*DL)); // Scale the index by element size. if (ElementSize != 1) { if (ElementSize.isPowerOf2()) { @@ -946,7 +946,7 @@ SeparateConstOffsetFromGEP::lowerToArithmetics(GetElementPtrInst *Variadic, continue; APInt ElementSize = APInt(IntPtrTy->getIntegerBitWidth(), - DL->getTypeAllocSize(GTI.getIndexedType())); + GTI.getSequentialElementStride(*DL)); // Scale the index by element size. if (ElementSize != 1) { if (ElementSize.isPowerOf2()) { diff --git a/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp b/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp index 543469d62fe7..ca1f3a0c0ae3 100644 --- a/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp +++ b/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp @@ -547,7 +547,7 @@ void StraightLineStrengthReduce::allocateCandidatesAndFindBasisForGEP( // indices except this current one. const SCEV *BaseExpr = SE->getGEPExpr(cast<GEPOperator>(GEP), IndexExprs); Value *ArrayIdx = GEP->getOperand(I); - uint64_t ElementSize = DL->getTypeAllocSize(GTI.getIndexedType()); + uint64_t ElementSize = GTI.getSequentialElementStride(*DL); if (ArrayIdx->getType()->getIntegerBitWidth() <= DL->getIndexSizeInBits(GEP->getAddressSpace())) { // Skip factoring if ArrayIdx is wider than the index size, because diff --git a/llvm/lib/Transforms/Utils/InjectTLIMappings.cpp b/llvm/lib/Transforms/Utils/InjectTLIMappings.cpp index 0990c750af55..ea3135630665 100644 --- a/llvm/lib/Transforms/Utils/InjectTLIMappings.cpp +++ b/llvm/lib/Transforms/Utils/InjectTLIMappings.cpp @@ -33,37 +33,37 @@ STATISTIC(NumVFDeclAdded, STATISTIC(NumCompUsedAdded, "Number of `@llvm.compiler.used` operands that have been added."); -/// A helper function that adds the vector function declaration that -/// vectorizes the CallInst CI with a vectorization factor of VF -/// lanes. The TLI assumes that all parameters and the return type of -/// CI (other than void) need to be widened to a VectorType of VF -/// lanes. +/// A helper function that adds the vector variant declaration for vectorizing +/// the CallInst \p CI with a vectorization factor of \p VF lanes. For each +/// mapping, TLI provides a VABI prefix, which contains all information required +/// to create vector function declaration. static void addVariantDeclaration(CallInst &CI, const ElementCount &VF, - bool Predicate, const StringRef VFName) { + const VecDesc *VD) { Module *M = CI.getModule(); + FunctionType *ScalarFTy = CI.getFunctionType(); - // Add function declaration. - Type *RetTy = ToVectorTy(CI.getType(), VF); - SmallVector<Type *, 4> Tys; - for (Value *ArgOperand : CI.args()) - Tys.push_back(ToVectorTy(ArgOperand->getType(), VF)); - assert(!CI.getFunctionType()->isVarArg() && - "VarArg functions are not supported."); - if (Predicate) - Tys.push_back(ToVectorTy(Type::getInt1Ty(RetTy->getContext()), VF)); - FunctionType *FTy = FunctionType::get(RetTy, Tys, /*isVarArg=*/false); - Function *VectorF = - Function::Create(FTy, Function::ExternalLinkage, VFName, M); - VectorF->copyAttributesFrom(CI.getCalledFunction()); + assert(!ScalarFTy->isVarArg() && "VarArg functions are not supported."); + + const std::optional<VFInfo> Info = VFABI::tryDemangleForVFABI( + VD->getVectorFunctionABIVariantString(), ScalarFTy); + + assert(Info && "Failed to demangle vector variant"); + assert(Info->Shape.VF == VF && "Mangled name does not match VF"); + + const StringRef VFName = VD->getVectorFnName(); + FunctionType *VectorFTy = VFABI::createFunctionType(*Info, ScalarFTy); + Function *VecFunc = + Function::Create(VectorFTy, Function::ExternalLinkage, VFName, M); + VecFunc->copyAttributesFrom(CI.getCalledFunction()); ++NumVFDeclAdded; LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": Added to the module: `" << VFName - << "` of type " << *(VectorF->getType()) << "\n"); + << "` of type " << *VectorFTy << "\n"); // Make function declaration (without a body) "sticky" in the IR by // listing it in the @llvm.compiler.used intrinsic. - assert(!VectorF->size() && "VFABI attribute requires `@llvm.compiler.used` " + assert(!VecFunc->size() && "VFABI attribute requires `@llvm.compiler.used` " "only on declarations."); - appendToCompilerUsed(*M, {VectorF}); + appendToCompilerUsed(*M, {VecFunc}); LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": Adding `" << VFName << "` to `@llvm.compiler.used`.\n"); ++NumCompUsedAdded; @@ -100,7 +100,7 @@ static void addMappingsFromTLI(const TargetLibraryInfo &TLI, CallInst &CI) { } Function *VariantF = M->getFunction(VD->getVectorFnName()); if (!VariantF) - addVariantDeclaration(CI, VF, Predicate, VD->getVectorFnName()); + addVariantDeclaration(CI, VF, VD); } }; diff --git a/llvm/lib/Transforms/Utils/SCCPSolver.cpp b/llvm/lib/Transforms/Utils/SCCPSolver.cpp index ab95698abc43..3dc6016a0a37 100644 --- a/llvm/lib/Transforms/Utils/SCCPSolver.cpp +++ b/llvm/lib/Transforms/Utils/SCCPSolver.cpp @@ -310,6 +310,7 @@ bool SCCPSolver::removeNonFeasibleEdges(BasicBlock *BB, DomTreeUpdater &DTU, new UnreachableInst(DefaultDest->getContext(), NewUnreachableBB); } + DefaultDest->removePredecessor(BB); SI->setDefaultDest(NewUnreachableBB); Updates.push_back({DominatorTree::Delete, BB, DefaultDest}); Updates.push_back({DominatorTree::Insert, BB, NewUnreachableBB}); @@ -1063,14 +1064,17 @@ void SCCPInstVisitor::getFeasibleSuccessors(Instruction &TI, // is ready. if (SCValue.isConstantRange(/*UndefAllowed=*/false)) { const ConstantRange &Range = SCValue.getConstantRange(); + unsigned ReachableCaseCount = 0; for (const auto &Case : SI->cases()) { const APInt &CaseValue = Case.getCaseValue()->getValue(); - if (Range.contains(CaseValue)) + if (Range.contains(CaseValue)) { Succs[Case.getSuccessorIndex()] = true; + ++ReachableCaseCount; + } } - // TODO: Determine whether default case is reachable. - Succs[SI->case_default()->getSuccessorIndex()] = true; + Succs[SI->case_default()->getSuccessorIndex()] = + Range.isSizeLargerThan(ReachableCaseCount); return; } diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index 55e375670cc6..61d891d65346 100644 --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -5414,11 +5414,13 @@ static bool CasesAreContiguous(SmallVectorImpl<ConstantInt *> &Cases) { } static void createUnreachableSwitchDefault(SwitchInst *Switch, - DomTreeUpdater *DTU) { + DomTreeUpdater *DTU, + bool RemoveOrigDefaultBlock = true) { LLVM_DEBUG(dbgs() << "SimplifyCFG: switch default is dead.\n"); auto *BB = Switch->getParent(); auto *OrigDefaultBlock = Switch->getDefaultDest(); - OrigDefaultBlock->removePredecessor(BB); + if (RemoveOrigDefaultBlock) + OrigDefaultBlock->removePredecessor(BB); BasicBlock *NewDefaultBlock = BasicBlock::Create( BB->getContext(), BB->getName() + ".unreachabledefault", BB->getParent(), OrigDefaultBlock); @@ -5427,7 +5429,8 @@ static void createUnreachableSwitchDefault(SwitchInst *Switch, if (DTU) { SmallVector<DominatorTree::UpdateType, 2> Updates; Updates.push_back({DominatorTree::Insert, BB, &*NewDefaultBlock}); - if (!is_contained(successors(BB), OrigDefaultBlock)) + if (RemoveOrigDefaultBlock && + !is_contained(successors(BB), OrigDefaultBlock)) Updates.push_back({DominatorTree::Delete, BB, &*OrigDefaultBlock}); DTU->applyUpdates(Updates); } @@ -5609,10 +5612,28 @@ static bool eliminateDeadSwitchCases(SwitchInst *SI, DomTreeUpdater *DTU, Known.getBitWidth() - (Known.Zero | Known.One).popcount(); assert(NumUnknownBits <= Known.getBitWidth()); if (HasDefault && DeadCases.empty() && - NumUnknownBits < 64 /* avoid overflow */ && - SI->getNumCases() == (1ULL << NumUnknownBits)) { - createUnreachableSwitchDefault(SI, DTU); - return true; + NumUnknownBits < 64 /* avoid overflow */) { + uint64_t AllNumCases = 1ULL << NumUnknownBits; + if (SI->getNumCases() == AllNumCases) { + createUnreachableSwitchDefault(SI, DTU); + return true; + } + // When only one case value is missing, replace default with that case. + // Eliminating the default branch will provide more opportunities for + // optimization, such as lookup tables. + if (SI->getNumCases() == AllNumCases - 1) { + assert(NumUnknownBits > 1 && "Should be canonicalized to a branch"); + uint64_t MissingCaseVal = 0; + for (const auto &Case : SI->cases()) + MissingCaseVal ^= Case.getCaseValue()->getValue().getLimitedValue(); + auto *MissingCase = + cast<ConstantInt>(ConstantInt::get(Cond->getType(), MissingCaseVal)); + SwitchInstProfUpdateWrapper SIW(*SI); + SIW.addCase(MissingCase, SI->getDefaultDest(), SIW.getSuccessorWeight(0)); + createUnreachableSwitchDefault(SI, DTU, /*RemoveOrigDefaultBlock*/ false); + SIW.setSuccessorWeight(0, 0); + return true; + } } if (DeadCases.empty()) diff --git a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp index 760a626c8b6f..a7cd68e860e4 100644 --- a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -3735,26 +3735,8 @@ Value *LibCallSimplifier::optimizeCall(CallInst *CI, IRBuilderBase &Builder) { // Also try to simplify calls to fortified library functions. if (Value *SimplifiedFortifiedCI = - FortifiedSimplifier.optimizeCall(CI, Builder)) { - // Try to further simplify the result. - CallInst *SimplifiedCI = dyn_cast<CallInst>(SimplifiedFortifiedCI); - if (SimplifiedCI && SimplifiedCI->getCalledFunction()) { - // Ensure that SimplifiedCI's uses are complete, since some calls have - // their uses analyzed. - replaceAllUsesWith(CI, SimplifiedCI); - - // Set insertion point to SimplifiedCI to guarantee we reach all uses - // we might replace later on. - IRBuilderBase::InsertPointGuard Guard(Builder); - Builder.SetInsertPoint(SimplifiedCI); - if (Value *V = optimizeStringMemoryLibCall(SimplifiedCI, Builder)) { - // If we were able to further simplify, remove the now redundant call. - substituteInParent(SimplifiedCI, V); - return V; - } - } + FortifiedSimplifier.optimizeCall(CI, Builder)) return SimplifiedFortifiedCI; - } // Then check for known library functions. if (TLI->getLibFunc(*Callee, Func) && isLibFuncEmittable(M, TLI, Func)) { diff --git a/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp b/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp index fa2459d1ca02..1f11d4894f77 100644 --- a/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp @@ -1193,7 +1193,7 @@ std::optional<APInt> Vectorizer::getConstantOffsetComplexAddrs( OpA->getType() != OpB->getType()) return std::nullopt; - uint64_t Stride = DL.getTypeAllocSize(GTIA.getIndexedType()); + uint64_t Stride = GTIA.getSequentialElementStride(DL); // Only look through a ZExt/SExt. if (!isa<SExtInst>(OpA) && !isa<ZExtInst>(OpA)) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h index 577ce8000de2..cff72ae263d8 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h +++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h @@ -167,9 +167,14 @@ public: } VPValue *createSelect(VPValue *Cond, VPValue *TrueVal, VPValue *FalseVal, - DebugLoc DL, const Twine &Name = "") { - return createNaryOp(Instruction::Select, {Cond, TrueVal, FalseVal}, DL, - Name); + DebugLoc DL, const Twine &Name = "", + std::optional<FastMathFlags> FMFs = std::nullopt) { + auto *Select = + FMFs ? new VPInstruction(Instruction::Select, {Cond, TrueVal, FalseVal}, + *FMFs, DL, Name) + : new VPInstruction(Instruction::Select, {Cond, TrueVal, FalseVal}, + DL, Name); + return tryInsertInstruction(Select); } /// Create a new ICmp VPInstruction with predicate \p Pred and operands \p A @@ -341,16 +346,20 @@ public: /// Return the best VPlan for \p VF. VPlan &getBestPlanFor(ElementCount VF) const; - /// Generate the IR code for the body of the vectorized loop according to the - /// best selected \p VF, \p UF and VPlan \p BestPlan. + /// Generate the IR code for the vectorized loop captured in VPlan \p BestPlan + /// according to the best selected \p VF and \p UF. + /// /// TODO: \p IsEpilogueVectorization is needed to avoid issues due to epilogue /// vectorization re-using plans for both the main and epilogue vector loops. /// It should be removed once the re-use issue has been fixed. /// \p ExpandedSCEVs is passed during execution of the plan for epilogue loop - /// to re-use expansion results generated during main plan execution. Returns - /// a mapping of SCEVs to their expanded IR values. Note that this is a - /// temporary workaround needed due to the current epilogue handling. - DenseMap<const SCEV *, Value *> + /// to re-use expansion results generated during main plan execution. + /// + /// Returns a mapping of SCEVs to their expanded IR values and a mapping for + /// the reduction resume values. Note that this is a temporary workaround + /// needed due to the current epilogue handling. + std::pair<DenseMap<const SCEV *, Value *>, + DenseMap<const RecurrenceDescriptor *, Value *>> executePlan(ElementCount VF, unsigned UF, VPlan &BestPlan, InnerLoopVectorizer &LB, DominatorTree *DT, bool IsEpilogueVectorization, diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 8e135d80f4f2..51ce88480c08 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -584,10 +584,6 @@ public: /// able to vectorize with strict in-order reductions for the given RdxDesc. bool useOrderedReductions(const RecurrenceDescriptor &RdxDesc); - // Returns the resume value (bc.merge.rdx) for a reduction as - // generated by fixReduction. - PHINode *getReductionResumeValue(const RecurrenceDescriptor &RdxDesc); - /// Create a new phi node for the induction variable \p OrigPhi to resume /// iteration count in the scalar epilogue, from where the vectorized loop /// left off. \p Step is the SCEV-expanded induction step to use. In cases @@ -626,9 +622,6 @@ protected: BasicBlock *MiddleBlock, BasicBlock *VectorHeader, VPlan &Plan, VPTransformState &State); - /// Handle all cross-iteration phis in the header. - void fixCrossIterationPHIs(VPTransformState &State); - /// Create the exit value of first order recurrences in the middle block and /// update their users. void fixFixedOrderRecurrence(VPFirstOrderRecurrencePHIRecipe *PhiR, @@ -1166,14 +1159,6 @@ void InnerLoopVectorizer::collectPoisonGeneratingRecipes( } } -PHINode *InnerLoopVectorizer::getReductionResumeValue( - const RecurrenceDescriptor &RdxDesc) { - auto It = ReductionResumeValues.find(&RdxDesc); - assert(It != ReductionResumeValues.end() && - "Expected to find a resume value for the reduction."); - return It->second; -} - namespace llvm { // Loop vectorization cost-model hints how the scalar epilogue loop should be @@ -3434,8 +3419,15 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State, // At this point every instruction in the original loop is widened to a // vector form. Now we need to fix the recurrences in the loop. These PHI // nodes are currently empty because we did not want to introduce cycles. - // This is the second stage of vectorizing recurrences. - fixCrossIterationPHIs(State); + // This is the second stage of vectorizing recurrences. Note that fixing + // reduction phis are already modeled in VPlan. + // TODO: Also model fixing fixed-order recurrence phis in VPlan. + VPRegionBlock *VectorRegion = State.Plan->getVectorLoopRegion(); + VPBasicBlock *HeaderVPBB = VectorRegion->getEntryBasicBlock(); + for (VPRecipeBase &R : HeaderVPBB->phis()) { + if (auto *FOR = dyn_cast<VPFirstOrderRecurrencePHIRecipe>(&R)) + fixFixedOrderRecurrence(FOR, State); + } // Forget the original basic block. PSE.getSE()->forgetLoop(OrigLoop); @@ -3450,7 +3442,7 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State, for (PHINode &PN : Exit->phis()) PSE.getSE()->forgetLcssaPhiWithNewPredecessor(OrigLoop, &PN); - VPBasicBlock *LatchVPBB = Plan.getVectorLoopRegion()->getExitingBasicBlock(); + VPBasicBlock *LatchVPBB = VectorRegion->getExitingBasicBlock(); Loop *VectorLoop = LI->getLoopFor(State.CFG.VPBB2IRBB[LatchVPBB]); if (Cost->requiresScalarEpilogue(VF.isVector())) { // No edge from the middle block to the unique exit block has been inserted @@ -3503,27 +3495,6 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State, VF.getKnownMinValue() * UF); } -void InnerLoopVectorizer::fixCrossIterationPHIs(VPTransformState &State) { - // In order to support recurrences we need to be able to vectorize Phi nodes. - // Phi nodes have cycles, so we need to vectorize them in two stages. This is - // stage #2: We now need to fix the recurrences by adding incoming edges to - // the currently empty PHI nodes. At this point every instruction in the - // original loop is widened to a vector form so we can use them to construct - // the incoming edges. - VPBasicBlock *Header = - State.Plan->getVectorLoopRegion()->getEntryBasicBlock(); - - for (VPRecipeBase &R : Header->phis()) { - if (auto *ReductionPhi = dyn_cast<VPReductionPHIRecipe>(&R)) - fixReduction(ReductionPhi, State); - } - - for (VPRecipeBase &R : Header->phis()) { - if (auto *FOR = dyn_cast<VPFirstOrderRecurrencePHIRecipe>(&R)) - fixFixedOrderRecurrence(FOR, State); - } -} - void InnerLoopVectorizer::fixFixedOrderRecurrence( VPFirstOrderRecurrencePHIRecipe *PhiR, VPTransformState &State) { // This is the second phase of vectorizing first-order recurrences. An @@ -3643,169 +3614,6 @@ void InnerLoopVectorizer::fixFixedOrderRecurrence( Phi->setName("scalar.recur"); } -void InnerLoopVectorizer::fixReduction(VPReductionPHIRecipe *PhiR, - VPTransformState &State) { - PHINode *OrigPhi = cast<PHINode>(PhiR->getUnderlyingValue()); - // Get it's reduction variable descriptor. - assert(Legal->isReductionVariable(OrigPhi) && - "Unable to find the reduction variable"); - const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor(); - - RecurKind RK = RdxDesc.getRecurrenceKind(); - TrackingVH<Value> ReductionStartValue = RdxDesc.getRecurrenceStartValue(); - Instruction *LoopExitInst = RdxDesc.getLoopExitInstr(); - if (auto *I = dyn_cast<Instruction>(&*ReductionStartValue)) - State.setDebugLocFrom(I->getDebugLoc()); - - VPValue *LoopExitInstDef = PhiR->getBackedgeValue(); - - // Before each round, move the insertion point right between - // the PHIs and the values we are going to write. - // This allows us to write both PHINodes and the extractelement - // instructions. - Builder.SetInsertPoint(LoopMiddleBlock, - LoopMiddleBlock->getFirstInsertionPt()); - - State.setDebugLocFrom(LoopExitInst->getDebugLoc()); - - Type *PhiTy = OrigPhi->getType(); - // If tail is folded by masking, the vector value to leave the loop should be - // a Select choosing between the vectorized LoopExitInst and vectorized Phi, - // instead of the former. For an inloop reduction the reduction will already - // be predicated, and does not need to be handled here. - if (Cost->foldTailByMasking() && !PhiR->isInLoop()) { - VPValue *Def = nullptr; - for (VPUser *U : LoopExitInstDef->users()) { - auto *S = dyn_cast<VPInstruction>(U); - if (S && S->getOpcode() == Instruction::Select) { - Def = S; - break; - } - } - if (Def) - LoopExitInstDef = Def; - } - - VectorParts RdxParts(UF); - for (unsigned Part = 0; Part < UF; ++Part) - RdxParts[Part] = State.get(LoopExitInstDef, Part); - - // If the vector reduction can be performed in a smaller type, we truncate - // then extend the loop exit value to enable InstCombine to evaluate the - // entire expression in the smaller type. - if (VF.isVector() && PhiTy != RdxDesc.getRecurrenceType()) { - Builder.SetInsertPoint(LoopMiddleBlock, - LoopMiddleBlock->getFirstInsertionPt()); - Type *RdxVecTy = VectorType::get(RdxDesc.getRecurrenceType(), VF); - for (unsigned Part = 0; Part < UF; ++Part) { - RdxParts[Part] = Builder.CreateTrunc(RdxParts[Part], RdxVecTy); - } - } - - // Reduce all of the unrolled parts into a single vector. - Value *ReducedPartRdx = RdxParts[0]; - unsigned Op = RecurrenceDescriptor::getOpcode(RK); - - // The middle block terminator has already been assigned a DebugLoc here (the - // OrigLoop's single latch terminator). We want the whole middle block to - // appear to execute on this line because: (a) it is all compiler generated, - // (b) these instructions are always executed after evaluating the latch - // conditional branch, and (c) other passes may add new predecessors which - // terminate on this line. This is the easiest way to ensure we don't - // accidentally cause an extra step back into the loop while debugging. - State.setDebugLocFrom(LoopMiddleBlock->getTerminator()->getDebugLoc()); - if (PhiR->isOrdered()) - ReducedPartRdx = RdxParts[UF - 1]; - else { - // Floating-point operations should have some FMF to enable the reduction. - IRBuilderBase::FastMathFlagGuard FMFG(Builder); - Builder.setFastMathFlags(RdxDesc.getFastMathFlags()); - for (unsigned Part = 1; Part < UF; ++Part) { - Value *RdxPart = RdxParts[Part]; - if (Op != Instruction::ICmp && Op != Instruction::FCmp) - ReducedPartRdx = Builder.CreateBinOp( - (Instruction::BinaryOps)Op, RdxPart, ReducedPartRdx, "bin.rdx"); - else if (RecurrenceDescriptor::isAnyOfRecurrenceKind(RK)) - ReducedPartRdx = createAnyOfOp(Builder, ReductionStartValue, RK, - ReducedPartRdx, RdxPart); - else - ReducedPartRdx = createMinMaxOp(Builder, RK, ReducedPartRdx, RdxPart); - } - } - - // Create the reduction after the loop. Note that inloop reductions create the - // target reduction in the loop using a Reduction recipe. - if (VF.isVector() && !PhiR->isInLoop()) { - ReducedPartRdx = - createTargetReduction(Builder, RdxDesc, ReducedPartRdx, OrigPhi); - // If the reduction can be performed in a smaller type, we need to extend - // the reduction to the wider type before we branch to the original loop. - if (PhiTy != RdxDesc.getRecurrenceType()) - ReducedPartRdx = RdxDesc.isSigned() - ? Builder.CreateSExt(ReducedPartRdx, PhiTy) - : Builder.CreateZExt(ReducedPartRdx, PhiTy); - } - - PHINode *ResumePhi = - dyn_cast<PHINode>(PhiR->getStartValue()->getUnderlyingValue()); - - // Create a phi node that merges control-flow from the backedge-taken check - // block and the middle block. - PHINode *BCBlockPhi = PHINode::Create(PhiTy, 2, "bc.merge.rdx", - LoopScalarPreHeader->getTerminator()); - - // If we are fixing reductions in the epilogue loop then we should already - // have created a bc.merge.rdx Phi after the main vector body. Ensure that - // we carry over the incoming values correctly. - for (auto *Incoming : predecessors(LoopScalarPreHeader)) { - if (Incoming == LoopMiddleBlock) - BCBlockPhi->addIncoming(ReducedPartRdx, Incoming); - else if (ResumePhi && llvm::is_contained(ResumePhi->blocks(), Incoming)) - BCBlockPhi->addIncoming(ResumePhi->getIncomingValueForBlock(Incoming), - Incoming); - else - BCBlockPhi->addIncoming(ReductionStartValue, Incoming); - } - - // Set the resume value for this reduction - ReductionResumeValues.insert({&RdxDesc, BCBlockPhi}); - - // If there were stores of the reduction value to a uniform memory address - // inside the loop, create the final store here. - if (StoreInst *SI = RdxDesc.IntermediateStore) { - StoreInst *NewSI = - Builder.CreateAlignedStore(ReducedPartRdx, SI->getPointerOperand(), - SI->getAlign()); - propagateMetadata(NewSI, SI); - - // If the reduction value is used in other places, - // then let the code below create PHI's for that. - } - - // Now, we need to fix the users of the reduction variable - // inside and outside of the scalar remainder loop. - - // We know that the loop is in LCSSA form. We need to update the PHI nodes - // in the exit blocks. See comment on analogous loop in - // fixFixedOrderRecurrence for a more complete explaination of the logic. - if (!Cost->requiresScalarEpilogue(VF.isVector())) - for (PHINode &LCSSAPhi : LoopExitBlock->phis()) - if (llvm::is_contained(LCSSAPhi.incoming_values(), LoopExitInst)) { - LCSSAPhi.addIncoming(ReducedPartRdx, LoopMiddleBlock); - State.Plan->removeLiveOut(&LCSSAPhi); - } - - // Fix the scalar loop reduction variable with the incoming reduction sum - // from the vector body and from the backedge value. - int IncomingEdgeBlockIdx = - OrigPhi->getBasicBlockIndex(OrigLoop->getLoopLatch()); - assert(IncomingEdgeBlockIdx >= 0 && "Invalid block index"); - // Pick the other block. - int SelfEdgeBlockIdx = (IncomingEdgeBlockIdx ? 0 : 1); - OrigPhi->setIncomingValue(SelfEdgeBlockIdx, BCBlockPhi); - OrigPhi->setIncomingValue(IncomingEdgeBlockIdx, LoopExitInst); -} - void InnerLoopVectorizer::sinkScalarOperands(Instruction *PredInst) { // The basic block and loop containing the predicated instruction. auto *PredBB = PredInst->getParent(); @@ -5579,21 +5387,45 @@ LoopVectorizationCostModel::selectInterleaveCount(ElementCount VF, MaxInterleaveCount = ForceTargetMaxVectorInterleaveFactor; } - // If trip count is known or estimated compile time constant, limit the - // interleave count to be less than the trip count divided by VF, provided it - // is at least 1. - // - // For scalable vectors we can't know if interleaving is beneficial. It may - // not be beneficial for small loops if none of the lanes in the second vector - // iterations is enabled. However, for larger loops, there is likely to be a - // similar benefit as for fixed-width vectors. For now, we choose to leave - // the InterleaveCount as if vscale is '1', although if some information about - // the vector is known (e.g. min vector size), we can make a better decision. - if (BestKnownTC) { - MaxInterleaveCount = - std::min(*BestKnownTC / VF.getKnownMinValue(), MaxInterleaveCount); - // Make sure MaxInterleaveCount is greater than 0. - MaxInterleaveCount = std::max(1u, MaxInterleaveCount); + unsigned EstimatedVF = VF.getKnownMinValue(); + if (VF.isScalable()) { + if (std::optional<unsigned> VScale = getVScaleForTuning(TheLoop, TTI)) + EstimatedVF *= *VScale; + } + assert(EstimatedVF >= 1 && "Estimated VF shouldn't be less than 1"); + + unsigned KnownTC = PSE.getSE()->getSmallConstantTripCount(TheLoop); + if (KnownTC) { + // If trip count is known we select between two prospective ICs, where + // 1) the aggressive IC is capped by the trip count divided by VF + // 2) the conservative IC is capped by the trip count divided by (VF * 2) + // The final IC is selected in a way that the epilogue loop trip count is + // minimized while maximizing the IC itself, so that we either run the + // vector loop at least once if it generates a small epilogue loop, or else + // we run the vector loop at least twice. + + unsigned InterleaveCountUB = bit_floor( + std::max(1u, std::min(KnownTC / EstimatedVF, MaxInterleaveCount))); + unsigned InterleaveCountLB = bit_floor(std::max( + 1u, std::min(KnownTC / (EstimatedVF * 2), MaxInterleaveCount))); + MaxInterleaveCount = InterleaveCountLB; + + if (InterleaveCountUB != InterleaveCountLB) { + unsigned TailTripCountUB = (KnownTC % (EstimatedVF * InterleaveCountUB)); + unsigned TailTripCountLB = (KnownTC % (EstimatedVF * InterleaveCountLB)); + // If both produce same scalar tail, maximize the IC to do the same work + // in fewer vector loop iterations + if (TailTripCountUB == TailTripCountLB) + MaxInterleaveCount = InterleaveCountUB; + } + } else if (BestKnownTC) { + // If trip count is an estimated compile time constant, limit the + // IC to be capped by the trip count divided by VF * 2, such that the vector + // loop runs at least twice to make interleaving seem profitable when there + // is an epilogue loop present. Since exact Trip count is not known we + // choose to be conservative in our IC estimate. + MaxInterleaveCount = bit_floor(std::max( + 1u, std::min(*BestKnownTC / (EstimatedVF * 2), MaxInterleaveCount))); } assert(MaxInterleaveCount > 0 && @@ -7585,7 +7417,65 @@ static void AddRuntimeUnrollDisableMetaData(Loop *L) { } } -SCEV2ValueTy LoopVectorizationPlanner::executePlan( +// Check if \p RedResult is a ComputeReductionResult instruction, and if it is +// create a merge phi node for it and add it to \p ReductionResumeValues. +static void createAndCollectMergePhiForReduction( + VPInstruction *RedResult, + DenseMap<const RecurrenceDescriptor *, Value *> &ReductionResumeValues, + VPTransformState &State, Loop *OrigLoop, BasicBlock *LoopMiddleBlock) { + if (!RedResult || + RedResult->getOpcode() != VPInstruction::ComputeReductionResult) + return; + + auto *PhiR = cast<VPReductionPHIRecipe>(RedResult->getOperand(0)); + const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor(); + + TrackingVH<Value> ReductionStartValue = RdxDesc.getRecurrenceStartValue(); + Value *FinalValue = + State.get(RedResult, VPIteration(State.UF - 1, VPLane::getFirstLane())); + auto *ResumePhi = + dyn_cast<PHINode>(PhiR->getStartValue()->getUnderlyingValue()); + + // TODO: bc.merge.rdx should not be created here, instead it should be + // modeled in VPlan. + BasicBlock *LoopScalarPreHeader = OrigLoop->getLoopPreheader(); + // Create a phi node that merges control-flow from the backedge-taken check + // block and the middle block. + auto *BCBlockPhi = PHINode::Create(FinalValue->getType(), 2, "bc.merge.rdx", + LoopScalarPreHeader->getTerminator()); + + // If we are fixing reductions in the epilogue loop then we should already + // have created a bc.merge.rdx Phi after the main vector body. Ensure that + // we carry over the incoming values correctly. + for (auto *Incoming : predecessors(LoopScalarPreHeader)) { + if (Incoming == LoopMiddleBlock) + BCBlockPhi->addIncoming(FinalValue, Incoming); + else if (ResumePhi && is_contained(ResumePhi->blocks(), Incoming)) + BCBlockPhi->addIncoming(ResumePhi->getIncomingValueForBlock(Incoming), + Incoming); + else + BCBlockPhi->addIncoming(ReductionStartValue, Incoming); + } + + auto *OrigPhi = cast<PHINode>(PhiR->getUnderlyingValue()); + // TODO: This fixup should instead be modeled in VPlan. + // Fix the scalar loop reduction variable with the incoming reduction sum + // from the vector body and from the backedge value. + int IncomingEdgeBlockIdx = + OrigPhi->getBasicBlockIndex(OrigLoop->getLoopLatch()); + assert(IncomingEdgeBlockIdx >= 0 && "Invalid block index"); + // Pick the other block. + int SelfEdgeBlockIdx = (IncomingEdgeBlockIdx ? 0 : 1); + OrigPhi->setIncomingValue(SelfEdgeBlockIdx, BCBlockPhi); + Instruction *LoopExitInst = RdxDesc.getLoopExitInstr(); + OrigPhi->setIncomingValue(IncomingEdgeBlockIdx, LoopExitInst); + + ReductionResumeValues[&RdxDesc] = BCBlockPhi; +} + +std::pair<DenseMap<const SCEV *, Value *>, + DenseMap<const RecurrenceDescriptor *, Value *>> +LoopVectorizationPlanner::executePlan( ElementCount BestVF, unsigned BestUF, VPlan &BestVPlan, InnerLoopVectorizer &ILV, DominatorTree *DT, bool IsEpilogueVectorization, const DenseMap<const SCEV *, Value *> *ExpandedSCEVs) { @@ -7664,6 +7554,17 @@ SCEV2ValueTy LoopVectorizationPlanner::executePlan( BestVPlan.execute(&State); + // 2.5 Collect reduction resume values. + DenseMap<const RecurrenceDescriptor *, Value *> ReductionResumeValues; + auto *ExitVPBB = + cast<VPBasicBlock>(BestVPlan.getVectorLoopRegion()->getSingleSuccessor()); + for (VPRecipeBase &R : *ExitVPBB) { + createAndCollectMergePhiForReduction(dyn_cast<VPInstruction>(&R), + ReductionResumeValues, State, OrigLoop, + State.CFG.VPBB2IRBB[ExitVPBB]); + } + + // 2.6. Maintain Loop Hints // Keep all loop hints from the original loop on the vector loop (we'll // replace the vectorizer-specific hints below). MDNode *OrigLoopID = OrigLoop->getLoopID(); @@ -7697,7 +7598,7 @@ SCEV2ValueTy LoopVectorizationPlanner::executePlan( ILV.printDebugTracesAtEnd(); - return State.ExpandedSCEVs; + return {State.ExpandedSCEVs, ReductionResumeValues}; } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) @@ -8046,7 +7947,7 @@ VPValue *VPRecipeBuilder::createEdgeMask(BasicBlock *Src, BasicBlock *Dst, if (ECEntryIt != EdgeMaskCache.end()) return ECEntryIt->second; - VPValue *SrcMask = createBlockInMask(Src, Plan); + VPValue *SrcMask = getBlockInMask(Src); // The terminator has to be a branch inst! BranchInst *BI = dyn_cast<BranchInst>(Src->getTerminator()); @@ -8108,14 +8009,17 @@ void VPRecipeBuilder::createHeaderMask(VPlan &Plan) { BlockMaskCache[Header] = BlockMask; } -VPValue *VPRecipeBuilder::createBlockInMask(BasicBlock *BB, VPlan &Plan) { - assert(OrigLoop->contains(BB) && "Block is not a part of a loop"); - - // Look for cached value. - BlockMaskCacheTy::iterator BCEntryIt = BlockMaskCache.find(BB); - if (BCEntryIt != BlockMaskCache.end()) - return BCEntryIt->second; +VPValue *VPRecipeBuilder::getBlockInMask(BasicBlock *BB) const { + // Return the cached value. + BlockMaskCacheTy::const_iterator BCEntryIt = BlockMaskCache.find(BB); + assert(BCEntryIt != BlockMaskCache.end() && + "Trying to access mask for block without one."); + return BCEntryIt->second; +} +void VPRecipeBuilder::createBlockInMask(BasicBlock *BB, VPlan &Plan) { + assert(OrigLoop->contains(BB) && "Block is not a part of a loop"); + assert(BlockMaskCache.count(BB) == 0 && "Mask for block already computed"); assert(OrigLoop->getHeader() != BB && "Loop header must have cached block mask"); @@ -8125,8 +8029,9 @@ VPValue *VPRecipeBuilder::createBlockInMask(BasicBlock *BB, VPlan &Plan) { // This is the block mask. We OR all incoming edges. for (auto *Predecessor : predecessors(BB)) { VPValue *EdgeMask = createEdgeMask(Predecessor, BB, Plan); - if (!EdgeMask) // Mask of predecessor is all-one so mask of block is too. - return BlockMaskCache[BB] = EdgeMask; + if (!EdgeMask) { // Mask of predecessor is all-one so mask of block is too. + BlockMaskCache[BB] = EdgeMask; + } if (!BlockMask) { // BlockMask has its initialized nullptr value. BlockMask = EdgeMask; @@ -8136,7 +8041,7 @@ VPValue *VPRecipeBuilder::createBlockInMask(BasicBlock *BB, VPlan &Plan) { BlockMask = Builder.createOr(BlockMask, EdgeMask, {}); } - return BlockMaskCache[BB] = BlockMask; + BlockMaskCache[BB] = BlockMask; } VPRecipeBase *VPRecipeBuilder::tryToWidenMemory(Instruction *I, @@ -8164,7 +8069,7 @@ VPRecipeBase *VPRecipeBuilder::tryToWidenMemory(Instruction *I, VPValue *Mask = nullptr; if (Legal->isMaskRequired(I)) - Mask = createBlockInMask(I->getParent(), *Plan); + Mask = getBlockInMask(I->getParent()); // Determine if the pointer operand of the access is either consecutive or // reverse consecutive. @@ -8176,8 +8081,11 @@ VPRecipeBase *VPRecipeBuilder::tryToWidenMemory(Instruction *I, VPValue *Ptr = isa<LoadInst>(I) ? Operands[0] : Operands[1]; if (Consecutive) { - auto *VectorPtr = new VPVectorPointerRecipe(Ptr, getLoadStoreType(I), - Reverse, I->getDebugLoc()); + auto *GEP = dyn_cast<GetElementPtrInst>( + Ptr->getUnderlyingValue()->stripPointerCasts()); + auto *VectorPtr = new VPVectorPointerRecipe( + Ptr, getLoadStoreType(I), Reverse, GEP ? GEP->isInBounds() : false, + I->getDebugLoc()); Builder.getInsertBlock()->appendRecipe(VectorPtr); Ptr = VectorPtr; } @@ -8383,7 +8291,7 @@ VPWidenCallRecipe *VPRecipeBuilder::tryToWidenCall(CallInst *CI, // all-true mask. VPValue *Mask = nullptr; if (Legal->isMaskRequired(CI)) - Mask = createBlockInMask(CI->getParent(), *Plan); + Mask = getBlockInMask(CI->getParent()); else Mask = Plan->getVPValueOrAddLiveIn(ConstantInt::getTrue( IntegerType::getInt1Ty(Variant->getFunctionType()->getContext()))); @@ -8426,7 +8334,7 @@ VPRecipeBase *VPRecipeBuilder::tryToWiden(Instruction *I, // div/rem operation itself. Otherwise fall through to general handling below. if (CM.isPredicatedInst(I)) { SmallVector<VPValue *> Ops(Operands.begin(), Operands.end()); - VPValue *Mask = createBlockInMask(I->getParent(), *Plan); + VPValue *Mask = getBlockInMask(I->getParent()); VPValue *One = Plan->getVPValueOrAddLiveIn( ConstantInt::get(I->getType(), 1u, false)); auto *SafeRHS = @@ -8520,7 +8428,7 @@ VPRecipeOrVPValueTy VPRecipeBuilder::handleReplication(Instruction *I, // added initially. Masked replicate recipes will later be placed under an // if-then construct to prevent side-effects. Generate recipes to compute // the block mask for this region. - BlockInMask = createBlockInMask(I->getParent(), Plan); + BlockInMask = getBlockInMask(I->getParent()); } auto *Recipe = new VPReplicateRecipe(I, Plan.mapToVPValues(I->operands()), @@ -8755,16 +8663,16 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) { bool HasNUW = Style == TailFoldingStyle::None; addCanonicalIVRecipes(*Plan, Legal->getWidestInductionType(), HasNUW, DL); - // Proactively create header mask. Masks for other blocks are created on - // demand. - RecipeBuilder.createHeaderMask(*Plan); - // Scan the body of the loop in a topological order to visit each basic block // after having visited its predecessor basic blocks. LoopBlocksDFS DFS(OrigLoop); DFS.perform(LI); VPBasicBlock *VPBB = HeaderVPBB; + bool NeedsMasks = CM.foldTailByMasking() || + any_of(OrigLoop->blocks(), [this](BasicBlock *BB) { + return Legal->blockNeedsPredication(BB); + }); for (BasicBlock *BB : make_range(DFS.beginRPO(), DFS.endRPO())) { // Relevant instructions from basic block BB will be grouped into VPRecipe // ingredients and fill a new VPBasicBlock. @@ -8772,6 +8680,11 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) { VPBB->setName(BB->getName()); Builder.setInsertPoint(VPBB); + if (VPBB == HeaderVPBB) + RecipeBuilder.createHeaderMask(*Plan); + else if (NeedsMasks) + RecipeBuilder.createBlockInMask(BB, *Plan); + // Introduce each ingredient into VPlan. // TODO: Model and preserve debug intrinsics in VPlan. for (Instruction &I : drop_end(BB->instructionsWithoutDebug(false))) { @@ -8977,10 +8890,15 @@ VPlanPtr LoopVectorizationPlanner::buildVPlan(VFRange &Range) { // to reductions, with one operand being vector and the other being the scalar // reduction chain. For other reductions, a select is introduced between the phi // and live-out recipes when folding the tail. +// +// A ComputeReductionResult recipe is added to the middle block, also for +// in-loop reductions which compute their result in-loop, because generating +// the subsequent bc.merge.rdx phi is driven by ComputeReductionResult recipes. void LoopVectorizationPlanner::adjustRecipesForReductions( VPBasicBlock *LatchVPBB, VPlanPtr &Plan, VPRecipeBuilder &RecipeBuilder, ElementCount MinVF) { - VPBasicBlock *Header = Plan->getVectorLoopRegion()->getEntryBasicBlock(); + VPRegionBlock *VectorLoopRegion = Plan->getVectorLoopRegion(); + VPBasicBlock *Header = VectorLoopRegion->getEntryBasicBlock(); // Gather all VPReductionPHIRecipe and sort them so that Intermediate stores // sank outside of the loop would keep the same order as they had in the // original loop. @@ -9020,15 +8938,11 @@ void LoopVectorizationPlanner::adjustRecipesForReductions( for (VPRecipeBase *R : ReductionPHIList) R->moveBefore(*Header, Header->getFirstNonPhi()); - SmallVector<VPReductionPHIRecipe *> InLoopReductionPhis; for (VPRecipeBase &R : Header->phis()) { auto *PhiR = dyn_cast<VPReductionPHIRecipe>(&R); if (!PhiR || !PhiR->isInLoop() || (MinVF.isScalar() && !PhiR->isOrdered())) continue; - InLoopReductionPhis.push_back(PhiR); - } - for (VPReductionPHIRecipe *PhiR : InLoopReductionPhis) { const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor(); RecurKind Kind = RdxDesc.getRecurrenceKind(); assert(!RecurrenceDescriptor::isAnyOfRecurrenceKind(Kind) && @@ -9119,7 +9033,7 @@ void LoopVectorizationPlanner::adjustRecipesForReductions( if (CM.blockNeedsPredicationForAnyReason(BB)) { VPBuilder::InsertPointGuard Guard(Builder); Builder.setInsertPoint(CurrentLink); - CondOp = RecipeBuilder.createBlockInMask(BB, *Plan); + CondOp = RecipeBuilder.getBlockInMask(BB); } VPReductionRecipe *RedRecipe = new VPReductionRecipe( @@ -9137,36 +9051,38 @@ void LoopVectorizationPlanner::adjustRecipesForReductions( for (VPRecipeBase &R : Plan->getVectorLoopRegion()->getEntryBasicBlock()->phis()) { VPReductionPHIRecipe *PhiR = dyn_cast<VPReductionPHIRecipe>(&R); - if (!PhiR || PhiR->isInLoop()) + if (!PhiR) continue; const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor(); - auto *Result = PhiR->getBackedgeValue()->getDefiningRecipe(); // If tail is folded by masking, introduce selects between the phi // and the live-out instruction of each reduction, at the beginning of the // dedicated latch block. - if (CM.foldTailByMasking()) { - VPValue *Cond = - RecipeBuilder.createBlockInMask(OrigLoop->getHeader(), *Plan); - VPValue *Red = PhiR->getBackedgeValue(); - assert(Red->getDefiningRecipe()->getParent() != LatchVPBB && + auto *OrigExitingVPV = PhiR->getBackedgeValue(); + auto *NewExitingVPV = PhiR->getBackedgeValue(); + if (!PhiR->isInLoop() && CM.foldTailByMasking()) { + VPValue *Cond = RecipeBuilder.getBlockInMask(OrigLoop->getHeader()); + assert(OrigExitingVPV->getDefiningRecipe()->getParent() != LatchVPBB && "reduction recipe must be defined before latch"); - FastMathFlags FMFs = RdxDesc.getFastMathFlags(); Type *PhiTy = PhiR->getOperand(0)->getLiveInIRValue()->getType(); - Result = + std::optional<FastMathFlags> FMFs = PhiTy->isFloatingPointTy() - ? new VPInstruction(Instruction::Select, {Cond, Red, PhiR}, FMFs) - : new VPInstruction(Instruction::Select, {Cond, Red, PhiR}); - Result->insertBefore(&*Builder.getInsertPoint()); - Red->replaceUsesWithIf( - Result->getVPSingleValue(), - [](VPUser &U, unsigned) { return isa<VPLiveOut>(&U); }); + ? std::make_optional(RdxDesc.getFastMathFlags()) + : std::nullopt; + NewExitingVPV = + Builder.createSelect(Cond, OrigExitingVPV, PhiR, {}, "", FMFs); + OrigExitingVPV->replaceUsesWithIf(NewExitingVPV, [](VPUser &U, unsigned) { + return isa<VPInstruction>(&U) && + cast<VPInstruction>(&U)->getOpcode() == + VPInstruction::ComputeReductionResult; + }); if (PreferPredicatedReductionSelect || TTI.preferPredicatedReductionSelect( PhiR->getRecurrenceDescriptor().getOpcode(), PhiTy, TargetTransformInfo::ReductionFlags())) - PhiR->setOperand(1, Result->getVPSingleValue()); + PhiR->setOperand(1, NewExitingVPV); } + // If the vector reduction can be performed in a smaller type, we truncate // then extend the loop exit value to enable InstCombine to evaluate the // entire expression in the smaller type. @@ -9174,18 +9090,40 @@ void LoopVectorizationPlanner::adjustRecipesForReductions( if (MinVF.isVector() && PhiTy != RdxDesc.getRecurrenceType()) { assert(!PhiR->isInLoop() && "Unexpected truncated inloop reduction!"); Type *RdxTy = RdxDesc.getRecurrenceType(); - auto *Trunc = new VPWidenCastRecipe(Instruction::Trunc, - Result->getVPSingleValue(), RdxTy); + auto *Trunc = + new VPWidenCastRecipe(Instruction::Trunc, NewExitingVPV, RdxTy); auto *Extnd = RdxDesc.isSigned() ? new VPWidenCastRecipe(Instruction::SExt, Trunc, PhiTy) : new VPWidenCastRecipe(Instruction::ZExt, Trunc, PhiTy); - Trunc->insertAfter(Result); + Trunc->insertAfter(NewExitingVPV->getDefiningRecipe()); Extnd->insertAfter(Trunc); - Result->getVPSingleValue()->replaceAllUsesWith(Extnd); - Trunc->setOperand(0, Result->getVPSingleValue()); + if (PhiR->getOperand(1) == NewExitingVPV) + PhiR->setOperand(1, Extnd->getVPSingleValue()); + NewExitingVPV = Extnd; } + + // We want code in the middle block to appear to execute on the location of + // the scalar loop's latch terminator because: (a) it is all compiler + // generated, (b) these instructions are always executed after evaluating + // the latch conditional branch, and (c) other passes may add new + // predecessors which terminate on this line. This is the easiest way to + // ensure we don't accidentally cause an extra step back into the loop while + // debugging. + DebugLoc ExitDL = OrigLoop->getLoopLatch()->getTerminator()->getDebugLoc(); + + // TODO: At the moment ComputeReductionResult also drives creation of the + // bc.merge.rdx phi nodes, hence it needs to be created unconditionally here + // even for in-loop reductions, until the reduction resume value handling is + // also modeled in VPlan. + auto *FinalReductionResult = new VPInstruction( + VPInstruction::ComputeReductionResult, {PhiR, NewExitingVPV}, ExitDL); + cast<VPBasicBlock>(VectorLoopRegion->getSingleSuccessor()) + ->appendRecipe(FinalReductionResult); + OrigExitingVPV->replaceUsesWithIf( + FinalReductionResult, + [](VPUser &User, unsigned) { return isa<VPLiveOut>(&User); }); } VPlanTransforms::clearReductionWrapFlags(*Plan); @@ -10152,8 +10090,8 @@ bool LoopVectorizePass::processLoop(Loop *L) { EPI, &LVL, &CM, BFI, PSI, Checks); VPlan &BestMainPlan = LVP.getBestPlanFor(EPI.MainLoopVF); - auto ExpandedSCEVs = LVP.executePlan(EPI.MainLoopVF, EPI.MainLoopUF, - BestMainPlan, MainILV, DT, true); + const auto &[ExpandedSCEVs, ReductionResumeValues] = LVP.executePlan( + EPI.MainLoopVF, EPI.MainLoopUF, BestMainPlan, MainILV, DT, true); ++LoopsVectorized; // Second pass vectorizes the epilogue and adjusts the control flow @@ -10194,8 +10132,9 @@ bool LoopVectorizePass::processLoop(Loop *L) { Value *ResumeV = nullptr; // TODO: Move setting of resume values to prepareToExecute. if (auto *ReductionPhi = dyn_cast<VPReductionPHIRecipe>(&R)) { - ResumeV = MainILV.getReductionResumeValue( - ReductionPhi->getRecurrenceDescriptor()); + ResumeV = ReductionResumeValues + .find(&ReductionPhi->getRecurrenceDescriptor()) + ->second; } else { // Create induction resume values for both widened pointer and // integer/fp inductions and update the start value of the induction diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 304991526064..8e22b54f002d 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -10596,7 +10596,8 @@ ResTy BoUpSLP::processBuildVector(const TreeEntry *E, Args &...Params) { inversePermutation(E->ReorderIndices, ReorderMask); if (!ReorderMask.empty()) reorderScalars(GatheredScalars, ReorderMask); - auto FindReusedSplat = [&](MutableArrayRef<int> Mask, unsigned InputVF) { + auto FindReusedSplat = [&](MutableArrayRef<int> Mask, unsigned InputVF, + unsigned I, unsigned SliceSize) { if (!isSplat(E->Scalars) || none_of(E->Scalars, [](Value *V) { return isa<UndefValue>(V) && !isa<PoisonValue>(V); })) @@ -10619,11 +10620,13 @@ ResTy BoUpSLP::processBuildVector(const TreeEntry *E, Args &...Params) { Idx == 0) || (Mask.size() == InputVF && ShuffleVectorInst::isIdentityMask(Mask, Mask.size()))) { - std::iota(Mask.begin(), Mask.end(), 0); + std::iota(std::next(Mask.begin(), I * SliceSize), + std::next(Mask.begin(), (I + 1) * SliceSize), 0); } else { - unsigned I = + unsigned IVal = *find_if_not(Mask, [](int Idx) { return Idx == PoisonMaskElem; }); - std::fill(Mask.begin(), Mask.end(), I); + std::fill(std::next(Mask.begin(), I * SliceSize), + std::next(Mask.begin(), (I + 1) * SliceSize), IVal); } return true; }; @@ -10872,7 +10875,8 @@ ResTy BoUpSLP::processBuildVector(const TreeEntry *E, Args &...Params) { } else if (Vec1) { IsUsedInExpr &= FindReusedSplat( ExtractMask, - cast<FixedVectorType>(Vec1->getType())->getNumElements()); + cast<FixedVectorType>(Vec1->getType())->getNumElements(), 0, + ExtractMask.size()); ShuffleBuilder.add(Vec1, ExtractMask, /*ForExtracts=*/true); IsNonPoisoned &= isGuaranteedNotToBePoison(Vec1); } else { @@ -10898,7 +10902,7 @@ ResTy BoUpSLP::processBuildVector(const TreeEntry *E, Args &...Params) { copy(SubMask, std::next(VecMask.begin(), I * SliceSize)); if (TEs.size() == 1) { IsUsedInExpr &= - FindReusedSplat(VecMask, TEs.front()->getVectorFactor()); + FindReusedSplat(VecMask, TEs.front()->getVectorFactor(), I, SliceSize); ShuffleBuilder.add(*TEs.front(), VecMask); if (TEs.front()->VectorizedValue) IsNonPoisoned &= @@ -11139,6 +11143,8 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) { case Instruction::ExtractElement: { Value *V = E->getSingleOperand(0); + if (const TreeEntry *TE = getTreeEntry(V)) + V = TE->VectorizedValue; setInsertPointAfterBundle(E); V = FinalShuffle(V, E, VecTy, IsSigned); E->VectorizedValue = V; @@ -11903,8 +11909,10 @@ Value *BoUpSLP::vectorizeTree( if (!Ex) { // "Reuse" the existing extract to improve final codegen. if (auto *ES = dyn_cast<ExtractElementInst>(Scalar)) { - Ex = Builder.CreateExtractElement(ES->getOperand(0), - ES->getOperand(1)); + Value *V = ES->getVectorOperand(); + if (const TreeEntry *ETE = getTreeEntry(V)) + V = ETE->VectorizedValue; + Ex = Builder.CreateExtractElement(V, ES->getIndexOperand()); } else { Ex = Builder.CreateExtractElement(Vec, Lane); } diff --git a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h index 7ff6749a0908..4b3143aead46 100644 --- a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h +++ b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h @@ -138,8 +138,11 @@ public: /// A helper function that computes the predicate of the block BB, assuming /// that the header block of the loop is set to True or the loop mask when - /// tail folding. It returns the *entry* mask for the block BB. - VPValue *createBlockInMask(BasicBlock *BB, VPlan &Plan); + /// tail folding. + void createBlockInMask(BasicBlock *BB, VPlan &Plan); + + /// Returns the *entry* mask for the block \p BB. + VPValue *getBlockInMask(BasicBlock *BB) const; /// A helper function that computes the predicate of the edge between SRC /// and DST. diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp index 1d7df9c9575a..b6e56c47c227 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -446,6 +446,7 @@ void VPBasicBlock::execute(VPTransformState *State) { // ExitBB can be re-used for the exit block of the Plan. NewBB = State->CFG.ExitBB; State->CFG.PrevBB = NewBB; + State->Builder.SetInsertPoint(NewBB->getFirstNonPHI()); // Update the branch instruction in the predecessor to branch to ExitBB. VPBlockBase *PredVPB = getSingleHierarchicalPredecessor(); diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 7d33baac52c9..4b4f4911eb64 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -842,6 +842,12 @@ public: WrapFlagsTy(bool HasNUW, bool HasNSW) : HasNUW(HasNUW), HasNSW(HasNSW) {} }; +protected: + struct GEPFlagsTy { + char IsInBounds : 1; + GEPFlagsTy(bool IsInBounds) : IsInBounds(IsInBounds) {} + }; + private: struct DisjointFlagsTy { char IsDisjoint : 1; @@ -849,9 +855,6 @@ private: struct ExactFlagsTy { char IsExact : 1; }; - struct GEPFlagsTy { - char IsInBounds : 1; - }; struct NonNegFlagsTy { char NonNeg : 1; }; @@ -933,12 +936,21 @@ public: : VPRecipeBase(SC, Operands, DL), OpType(OperationType::FPMathOp), FMFs(FMFs) {} +protected: + template <typename IterT> + VPRecipeWithIRFlags(const unsigned char SC, IterT Operands, + GEPFlagsTy GEPFlags, DebugLoc DL = {}) + : VPRecipeBase(SC, Operands, DL), OpType(OperationType::GEPOp), + GEPFlags(GEPFlags) {} + +public: static inline bool classof(const VPRecipeBase *R) { return R->getVPDefID() == VPRecipeBase::VPInstructionSC || R->getVPDefID() == VPRecipeBase::VPWidenSC || R->getVPDefID() == VPRecipeBase::VPWidenGEPSC || R->getVPDefID() == VPRecipeBase::VPWidenCastSC || - R->getVPDefID() == VPRecipeBase::VPReplicateSC; + R->getVPDefID() == VPRecipeBase::VPReplicateSC || + R->getVPDefID() == VPRecipeBase::VPVectorPointerSC; } /// Drop all poison-generating flags. @@ -1061,7 +1073,8 @@ public: // Increment the canonical IV separately for each unrolled part. CanonicalIVIncrementForPart, BranchOnCount, - BranchOnCond + BranchOnCond, + ComputeReductionResult, }; private: @@ -1360,15 +1373,16 @@ public: /// A recipe to compute the pointers for widened memory accesses of IndexTy for /// all parts. If IsReverse is true, compute pointers for accessing the input in /// reverse order per part. -class VPVectorPointerRecipe : public VPRecipeBase, public VPValue { +class VPVectorPointerRecipe : public VPRecipeWithIRFlags, public VPValue { Type *IndexedTy; bool IsReverse; public: VPVectorPointerRecipe(VPValue *Ptr, Type *IndexedTy, bool IsReverse, - DebugLoc DL) - : VPRecipeBase(VPDef::VPVectorPointerSC, {Ptr}, DL), VPValue(this), - IndexedTy(IndexedTy), IsReverse(IsReverse) {} + bool IsInBounds, DebugLoc DL) + : VPRecipeWithIRFlags(VPDef::VPVectorPointerSC, ArrayRef<VPValue *>(Ptr), + GEPFlagsTy(IsInBounds), DL), + VPValue(this), IndexedTy(IndexedTy), IsReverse(IsReverse) {} VP_CLASSOF_IMPL(VPDef::VPVectorPointerSC) @@ -3132,6 +3146,8 @@ inline bool isUniformAfterVectorization(VPValue *VPV) { return Rep->isUniform(); if (auto *GEP = dyn_cast<VPWidenGEPRecipe>(Def)) return all_of(GEP->operands(), isUniformAfterVectorization); + if (auto *VPI = dyn_cast<VPInstruction>(Def)) + return VPI->getOpcode() == VPInstruction::ComputeReductionResult; return false; } } // end namespace vputils diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 76961629aece..1f844bce2310 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -28,6 +28,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/LoopUtils.h" #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h" #include <cassert> @@ -119,7 +120,9 @@ bool VPRecipeBase::mayHaveSideEffects() const { return false; case VPInstructionSC: switch (cast<VPInstruction>(this)->getOpcode()) { + case Instruction::Or: case Instruction::ICmp: + case Instruction::Select: case VPInstruction::Not: case VPInstruction::CalculateTripCountMinusVF: case VPInstruction::CanonicalIVIncrementForPart: @@ -401,6 +404,84 @@ Value *VPInstruction::generateInstruction(VPTransformState &State, Builder.GetInsertBlock()->getTerminator()->eraseFromParent(); return CondBr; } + case VPInstruction::ComputeReductionResult: { + if (Part != 0) + return State.get(this, 0); + + // FIXME: The cross-recipe dependency on VPReductionPHIRecipe is temporary + // and will be removed by breaking up the recipe further. + auto *PhiR = cast<VPReductionPHIRecipe>(getOperand(0)); + auto *OrigPhi = cast<PHINode>(PhiR->getUnderlyingValue()); + // Get its reduction variable descriptor. + const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor(); + + RecurKind RK = RdxDesc.getRecurrenceKind(); + + State.setDebugLocFrom(getDebugLoc()); + + VPValue *LoopExitingDef = getOperand(1); + Type *PhiTy = OrigPhi->getType(); + VectorParts RdxParts(State.UF); + for (unsigned Part = 0; Part < State.UF; ++Part) + RdxParts[Part] = State.get(LoopExitingDef, Part); + + // If the vector reduction can be performed in a smaller type, we truncate + // then extend the loop exit value to enable InstCombine to evaluate the + // entire expression in the smaller type. + // TODO: Handle this in truncateToMinBW. + if (State.VF.isVector() && PhiTy != RdxDesc.getRecurrenceType()) { + Type *RdxVecTy = VectorType::get(RdxDesc.getRecurrenceType(), State.VF); + for (unsigned Part = 0; Part < State.UF; ++Part) + RdxParts[Part] = Builder.CreateTrunc(RdxParts[Part], RdxVecTy); + } + // Reduce all of the unrolled parts into a single vector. + Value *ReducedPartRdx = RdxParts[0]; + unsigned Op = RecurrenceDescriptor::getOpcode(RK); + + if (PhiR->isOrdered()) { + ReducedPartRdx = RdxParts[State.UF - 1]; + } else { + // Floating-point operations should have some FMF to enable the reduction. + IRBuilderBase::FastMathFlagGuard FMFG(Builder); + Builder.setFastMathFlags(RdxDesc.getFastMathFlags()); + for (unsigned Part = 1; Part < State.UF; ++Part) { + Value *RdxPart = RdxParts[Part]; + if (Op != Instruction::ICmp && Op != Instruction::FCmp) + ReducedPartRdx = Builder.CreateBinOp( + (Instruction::BinaryOps)Op, RdxPart, ReducedPartRdx, "bin.rdx"); + else if (RecurrenceDescriptor::isAnyOfRecurrenceKind(RK)) { + TrackingVH<Value> ReductionStartValue = + RdxDesc.getRecurrenceStartValue(); + ReducedPartRdx = createAnyOfOp(Builder, ReductionStartValue, RK, + ReducedPartRdx, RdxPart); + } else + ReducedPartRdx = createMinMaxOp(Builder, RK, ReducedPartRdx, RdxPart); + } + } + + // Create the reduction after the loop. Note that inloop reductions create + // the target reduction in the loop using a Reduction recipe. + if (State.VF.isVector() && !PhiR->isInLoop()) { + ReducedPartRdx = + createTargetReduction(Builder, RdxDesc, ReducedPartRdx, OrigPhi); + // If the reduction can be performed in a smaller type, we need to extend + // the reduction to the wider type before we branch to the original loop. + if (PhiTy != RdxDesc.getRecurrenceType()) + ReducedPartRdx = RdxDesc.isSigned() + ? Builder.CreateSExt(ReducedPartRdx, PhiTy) + : Builder.CreateZExt(ReducedPartRdx, PhiTy); + } + + // If there were stores of the reduction value to a uniform memory address + // inside the loop, create the final store here. + if (StoreInst *SI = RdxDesc.IntermediateStore) { + auto *NewSI = Builder.CreateAlignedStore( + ReducedPartRdx, SI->getPointerOperand(), SI->getAlign()); + propagateMetadata(NewSI, SI); + } + + return ReducedPartRdx; + } default: llvm_unreachable("Unsupported opcode for instruction"); } @@ -477,6 +558,9 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent, case VPInstruction::BranchOnCount: O << "branch-on-count"; break; + case VPInstruction::ComputeReductionResult: + O << "compute-reduction-result"; + break; default: O << Instruction::getOpcodeName(getOpcode()); } @@ -1225,9 +1309,7 @@ void VPVectorPointerRecipe ::execute(VPTransformState &State) { ? DL.getIndexType(IndexedTy->getPointerTo()) : Builder.getInt32Ty(); Value *Ptr = State.get(getOperand(0), VPIteration(0, 0)); - bool InBounds = false; - if (auto *GEP = dyn_cast<GetElementPtrInst>(Ptr->stripPointerCasts())) - InBounds = GEP->isInBounds(); + bool InBounds = isInBounds(); if (IsReverse) { // If the address is consecutive but reversed, then the // wide store needs to start at the last vector element. diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 33132880d5a4..5c430620a2dc 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -829,15 +829,20 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) { Type *ATy = TypeInfo.inferScalarType(A); if (TruncTy == ATy) { Trunc->replaceAllUsesWith(A); - } else if (ATy->getScalarSizeInBits() < TruncTy->getScalarSizeInBits()) { - auto *VPC = - new VPWidenCastRecipe(Instruction::CastOps(ExtOpcode), A, TruncTy); - VPC->insertBefore(&R); - Trunc->replaceAllUsesWith(VPC); - } else if (ATy->getScalarSizeInBits() > TruncTy->getScalarSizeInBits()) { - auto *VPC = new VPWidenCastRecipe(Instruction::Trunc, A, TruncTy); - VPC->insertBefore(&R); - Trunc->replaceAllUsesWith(VPC); + } else { + // Don't replace a scalarizing recipe with a widened cast. + if (isa<VPReplicateRecipe>(&R)) + break; + if (ATy->getScalarSizeInBits() < TruncTy->getScalarSizeInBits()) { + auto *VPC = + new VPWidenCastRecipe(Instruction::CastOps(ExtOpcode), A, TruncTy); + VPC->insertBefore(&R); + Trunc->replaceAllUsesWith(VPC); + } else if (ATy->getScalarSizeInBits() > TruncTy->getScalarSizeInBits()) { + auto *VPC = new VPWidenCastRecipe(Instruction::Trunc, A, TruncTy); + VPC->insertBefore(&R); + Trunc->replaceAllUsesWith(VPC); + } } #ifndef NDEBUG // Verify that the cached type info is for both A and its users is still diff --git a/llvm/tools/lli/ExecutionUtils.cpp b/llvm/tools/lli/ExecutionUtils.cpp index 55370ed40f2b..b6cc3bb174d3 100644 --- a/llvm/tools/lli/ExecutionUtils.cpp +++ b/llvm/tools/lli/ExecutionUtils.cpp @@ -8,6 +8,7 @@ #include "ExecutionUtils.h" +#include "llvm/ExecutionEngine/Orc/TargetProcess/JITLoaderGDB.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/FormatVariadic.h" #include "llvm/Support/raw_ostream.h" @@ -15,34 +16,6 @@ #include <cstdint> #include <vector> -// Declarations follow the GDB JIT interface (version 1, 2009) and must match -// those of the DYLD used for testing. See: -// -// llvm/lib/ExecutionEngine/Orc/TargetProcess/JITLoaderGDB.cpp -// llvm/lib/ExecutionEngine/GDBRegistrationListener.cpp -// -typedef enum { - JIT_NOACTION = 0, - JIT_REGISTER_FN, - JIT_UNREGISTER_FN -} jit_actions_t; - -struct jit_code_entry { - struct jit_code_entry *next_entry; - struct jit_code_entry *prev_entry; - const char *symfile_addr; - uint64_t symfile_size; -}; - -struct jit_descriptor { - uint32_t version; - // This should be jit_actions_t, but we want to be specific about the - // bit-width. - uint32_t action_flag; - struct jit_code_entry *relevant_entry; - struct jit_code_entry *first_entry; -}; - namespace llvm { template <typename... Ts> static void outsv(const char *Fmt, Ts &&...Vals) { @@ -61,6 +34,9 @@ static const char *actionFlagToStr(uint32_t ActionFlag) { return "<invalid action_flag>"; } +// Declarations follow the GDB JIT interface (version 1, 2009) and must match +// those of the DYLD used for testing. +// // Sample output: // // Reading __jit_debug_descriptor at 0x0000000000404048 diff --git a/llvm/tools/lli/ForwardingMemoryManager.h b/llvm/tools/lli/ForwardingMemoryManager.h index f1de7a153a27..2cc669953ee6 100644 --- a/llvm/tools/lli/ForwardingMemoryManager.h +++ b/llvm/tools/lli/ForwardingMemoryManager.h @@ -105,13 +105,14 @@ public: JITSymbol findSymbol(const std::string &Name) override { orc::RemoteSymbolLookupSet R; R.push_back({std::move(Name), false}); - if (auto Addrs = DylibMgr.lookup(H, R)) { - if (Addrs->size() != 1) + if (auto Syms = DylibMgr.lookup(H, R)) { + if (Syms->size() != 1) return make_error<StringError>("Unexpected remote lookup result", inconvertibleErrorCode()); - return JITSymbol(Addrs->front().getValue(), JITSymbolFlags::Exported); + return JITSymbol(Syms->front().getAddress().getValue(), + Syms->front().getFlags()); } else - return Addrs.takeError(); + return Syms.takeError(); } JITSymbol findSymbolInLogicalDylib(const std::string &Name) override { diff --git a/llvm/tools/lli/lli.cpp b/llvm/tools/lli/lli.cpp index 5f1fd1578764..8a7ea2d3d0c5 100644 --- a/llvm/tools/lli/lli.cpp +++ b/llvm/tools/lli/lli.cpp @@ -965,9 +965,12 @@ int runOrcJIT(const char *ProgName) { EPC = ExitOnErr(orc::SelfExecutorProcessControl::Create( std::make_shared<orc::SymbolStringPool>())); - Builder.setObjectLinkingLayerCreator([&EPC, &P](orc::ExecutionSession &ES, - const Triple &TT) { - auto L = std::make_unique<orc::ObjectLinkingLayer>(ES, EPC->getMemMgr()); + Builder.getJITTargetMachineBuilder() + ->setRelocationModel(Reloc::PIC_) + .setCodeModel(CodeModel::Small); + Builder.setObjectLinkingLayerCreator([&P](orc::ExecutionSession &ES, + const Triple &TT) { + auto L = std::make_unique<orc::ObjectLinkingLayer>(ES); if (P != LLJITPlatform::ExecutorNative) L->addPlugin(std::make_unique<orc::EHFrameRegistrationPlugin>( ES, ExitOnErr(orc::EPCEHFrameRegistrar::Create(ES)))); diff --git a/llvm/tools/llvm-cxxfilt/Opts.td b/llvm/tools/llvm-cxxfilt/Opts.td index f652a1a7f88b..034cb267aab8 100644 --- a/llvm/tools/llvm-cxxfilt/Opts.td +++ b/llvm/tools/llvm-cxxfilt/Opts.td @@ -17,6 +17,7 @@ multiclass Eq<string name, string help> { def help : FF<"help", "Display this help">; defm strip_underscore : BB<"strip-underscore", "Strip the leading underscore", "Don't strip the leading underscore">; def types : FF<"types", "Attempt to demangle types as well as function names">; +def no_params : FF<"no-params", "Skip function parameters and return types">; def version : FF<"version", "Display the version">; defm : Eq<"format", "Specify mangling format. Currently ignored because only 'gnu' is supported">; @@ -25,4 +26,5 @@ def : F<"s", "Alias for --format">; def : F<"_", "Alias for --strip-underscore">, Alias<strip_underscore>; def : F<"h", "Alias for --help">, Alias<help>; def : F<"n", "Alias for --no-strip-underscore">, Alias<no_strip_underscore>; +def : F<"p", "Alias for --no-params">, Alias<no_params>; def : F<"t", "Alias for --types">, Alias<types>; diff --git a/llvm/tools/llvm-cxxfilt/llvm-cxxfilt.cpp b/llvm/tools/llvm-cxxfilt/llvm-cxxfilt.cpp index 4b9d88a65066..26a1f2f4afeb 100644 --- a/llvm/tools/llvm-cxxfilt/llvm-cxxfilt.cpp +++ b/llvm/tools/llvm-cxxfilt/llvm-cxxfilt.cpp @@ -54,6 +54,7 @@ public: }; } // namespace +static bool ParseParams; static bool StripUnderscore; static bool Types; @@ -74,18 +75,19 @@ static std::string demangle(const std::string &Mangled) { } std::string Result; - if (nonMicrosoftDemangle(DecoratedStr, Result, CanHaveLeadingDot)) + if (nonMicrosoftDemangle(DecoratedStr, Result, CanHaveLeadingDot, + ParseParams)) return Result; std::string Prefix; char *Undecorated = nullptr; if (Types) - Undecorated = itaniumDemangle(DecoratedStr); + Undecorated = itaniumDemangle(DecoratedStr, ParseParams); if (!Undecorated && starts_with(DecoratedStr, "__imp_")) { Prefix = "import thunk for "; - Undecorated = itaniumDemangle(DecoratedStr.substr(6)); + Undecorated = itaniumDemangle(DecoratedStr.substr(6), ParseParams); } Result = Undecorated ? Prefix + Undecorated : Mangled; @@ -173,6 +175,8 @@ int llvm_cxxfilt_main(int argc, char **argv, const llvm::ToolContext &) { else StripUnderscore = Triple(sys::getProcessTriple()).isOSBinFormatMachO(); + ParseParams = !Args.hasArg(OPT_no_params); + Types = Args.hasArg(OPT_types); std::vector<std::string> Decorated = Args.getAllArgValues(OPT_INPUT); diff --git a/llvm/tools/llvm-dwarfutil/DebugInfoLinker.cpp b/llvm/tools/llvm-dwarfutil/DebugInfoLinker.cpp index 02a94596ec76..d6504992b56e 100644 --- a/llvm/tools/llvm-dwarfutil/DebugInfoLinker.cpp +++ b/llvm/tools/llvm-dwarfutil/DebugInfoLinker.cpp @@ -9,9 +9,9 @@ #include "DebugInfoLinker.h" #include "Error.h" #include "llvm/ADT/StringSwitch.h" -#include "llvm/DWARFLinker/DWARFLinker.h" -#include "llvm/DWARFLinker/DWARFStreamer.h" -#include "llvm/DWARFLinkerParallel/DWARFLinker.h" +#include "llvm/DWARFLinker/Classic/DWARFLinker.h" +#include "llvm/DWARFLinker/Classic/DWARFStreamer.h" +#include "llvm/DWARFLinker/Parallel/DWARFLinker.h" #include "llvm/DebugInfo/DWARF/DWARFContext.h" #include "llvm/DebugInfo/DWARF/DWARFExpression.h" #include "llvm/Object/ObjectFile.h" @@ -19,6 +19,8 @@ #include <vector> namespace llvm { +using namespace dwarf_linker; + namespace dwarfutil { // ObjFileAddressMap allows to check whether specified DIE referencing @@ -37,8 +39,7 @@ namespace dwarfutil { // exec: [LowPC, HighPC] is not inside address ranges of .text sections // // universal: maxpc and bfd -template <typename AddressMapBase> -class ObjFileAddressMap : public AddressMapBase { +class ObjFileAddressMap : public AddressesMap { public: ObjFileAddressMap(DWARFContext &Context, const Options &Options, object::ObjectFile &ObjFile) @@ -298,7 +299,7 @@ static std::string getMessageForDeletedAcceleratorTables( return Message; } -template <typename Linker, typename OutDwarfFile, typename AddressMapBase> +template <typename Linker> Error linkDebugInfoImpl(object::ObjectFile &File, const Options &Options, raw_pwrite_stream &OutStream) { std::mutex ErrorHandlerMutex; @@ -345,7 +346,7 @@ Error linkDebugInfoImpl(object::ObjectFile &File, const Options &Options, DebugInfoLinker->setVerbosity(Options.Verbose); DebugInfoLinker->setUpdateIndexTablesOnly(!Options.DoGarbageCollection); - std::vector<std::unique_ptr<OutDwarfFile>> ObjectsForLinking(1); + std::vector<std::unique_ptr<DWARFFile>> ObjectsForLinking(1); // Add object files to the DWARFLinker. std::unique_ptr<DWARFContext> Context = DWARFContext::create( @@ -360,11 +361,10 @@ Error linkDebugInfoImpl(object::ObjectFile &File, const Options &Options, ReportWarn(Info.message(), "", nullptr); }); }); - std::unique_ptr<ObjFileAddressMap<AddressMapBase>> AddressesMap( - std::make_unique<ObjFileAddressMap<AddressMapBase>>(*Context, Options, - File)); + std::unique_ptr<ObjFileAddressMap> AddressesMap( + std::make_unique<ObjFileAddressMap>(*Context, Options, File)); - ObjectsForLinking[0] = std::make_unique<OutDwarfFile>( + ObjectsForLinking[0] = std::make_unique<DWARFFile>( File.getFileName(), std::move(Context), std::move(AddressesMap)); uint16_t MaxDWARFVersion = 0; @@ -400,7 +400,7 @@ Error linkDebugInfoImpl(object::ObjectFile &File, const Options &Options, for (typename Linker::AccelTableKind Table : AccelTables) DebugInfoLinker->addAccelTableKind(Table); - for (std::unique_ptr<OutDwarfFile> &CurFile : ObjectsForLinking) { + for (std::unique_ptr<DWARFFile> &CurFile : ObjectsForLinking) { SmallVector<StringRef> AccelTableNamesToReplace; SmallVector<StringRef> AccelTableNamesToDelete; @@ -452,13 +452,9 @@ Error linkDebugInfoImpl(object::ObjectFile &File, const Options &Options, Error linkDebugInfo(object::ObjectFile &File, const Options &Options, raw_pwrite_stream &OutStream) { if (Options.UseLLVMDWARFLinker) - return linkDebugInfoImpl<dwarflinker_parallel::DWARFLinker, - dwarflinker_parallel::DWARFFile, - dwarflinker_parallel::AddressesMap>(File, Options, - OutStream); + return linkDebugInfoImpl<parallel::DWARFLinker>(File, Options, OutStream); else - return linkDebugInfoImpl<DWARFLinker, DWARFFile, AddressesMap>( - File, Options, OutStream); + return linkDebugInfoImpl<classic::DWARFLinker>(File, Options, OutStream); } } // end of namespace dwarfutil diff --git a/llvm/tools/llvm-profdata/llvm-profdata.cpp b/llvm/tools/llvm-profdata/llvm-profdata.cpp index 12b81d411cfa..05e96f48cf12 100644 --- a/llvm/tools/llvm-profdata/llvm-profdata.cpp +++ b/llvm/tools/llvm-profdata/llvm-profdata.cpp @@ -998,13 +998,14 @@ adjustInstrProfile(std::unique_ptr<WriterContext> &WC, auto buildStaticFuncMap = [&StaticFuncMap, SampleProfileHasFUnique](const StringRef Name) { - std::string Prefixes[] = {".cpp:", "cc:", ".c:", ".hpp:", ".h:"}; + std::string FilePrefixes[] = {".cpp", "cc", ".c", ".hpp", ".h"}; size_t PrefixPos = StringRef::npos; - for (auto &Prefix : Prefixes) { - PrefixPos = Name.find_insensitive(Prefix); + for (auto &FilePrefix : FilePrefixes) { + std::string NamePrefix = FilePrefix + kGlobalIdentifierDelimiter; + PrefixPos = Name.find_insensitive(NamePrefix); if (PrefixPos == StringRef::npos) continue; - PrefixPos += Prefix.size(); + PrefixPos += NamePrefix.size(); break; } @@ -1088,17 +1089,17 @@ adjustInstrProfile(std::unique_ptr<WriterContext> &WC, // // InstrProfile has two entries: // foo - // bar.cc:bar + // bar.cc;bar // // After BuildMaxSampleMap, we should have the following in FlattenSampleMap: // {"foo", {1000, 5000}} - // {"bar.cc:bar", {11000, 30000}} + // {"bar.cc;bar", {11000, 30000}} // // foo's has an entry count of 1000, and max body count of 5000. - // bar.cc:bar has an entry count of 11000 (sum two callsites of 1000 and + // bar.cc;bar has an entry count of 11000 (sum two callsites of 1000 and // 10000), and max count of 30000 (from the callsite in line 8). // - // Note that goo's count will remain in bar.cc:bar() as it does not have an + // Note that goo's count will remain in bar.cc;bar() as it does not have an // entry in InstrProfile. llvm::StringMap<std::pair<uint64_t, uint64_t>> FlattenSampleMap; auto BuildMaxSampleMap = [&FlattenSampleMap, &StaticFuncMap, @@ -3157,7 +3158,11 @@ static int order_main(int argc, const char *argv[]) { BalancedPartitioning BP(Config); BP.run(Nodes); - WithColor::note() << "# Ordered " << Nodes.size() << " functions\n"; + OS << "# Ordered " << Nodes.size() << " functions\n"; + OS << "# Warning: Mach-O may prefix symbols with \"_\" depending on the " + "linkage and this output does not take that into account. Some " + "post-processing may be required before passing to the linker via " + "-order_file.\n"; for (auto &N : Nodes) { auto [Filename, ParsedFuncName] = getParsedIRPGOFuncName(Reader->getSymtab().getFuncOrVarName(N.Id)); diff --git a/llvm/tools/llvm-readobj/ELFDumper.cpp b/llvm/tools/llvm-readobj/ELFDumper.cpp index abf7ba6ba1c3..f369a63add11 100644 --- a/llvm/tools/llvm-readobj/ELFDumper.cpp +++ b/llvm/tools/llvm-readobj/ELFDumper.cpp @@ -1084,6 +1084,7 @@ const EnumEntry<unsigned> ElfOSABI[] = { {"AROS", "AROS", ELF::ELFOSABI_AROS}, {"FenixOS", "FenixOS", ELF::ELFOSABI_FENIXOS}, {"CloudABI", "CloudABI", ELF::ELFOSABI_CLOUDABI}, + {"CUDA", "NVIDIA - CUDA", ELF::ELFOSABI_CUDA}, {"Standalone", "Standalone App", ELF::ELFOSABI_STANDALONE} }; diff --git a/llvm/tools/opt/opt.cpp b/llvm/tools/opt/opt.cpp index b6068513d230..c649e6ecddc0 100644 --- a/llvm/tools/opt/opt.cpp +++ b/llvm/tools/opt/opt.cpp @@ -426,7 +426,7 @@ int main(int argc, char **argv) { initializeScalarizeMaskedMemIntrinLegacyPassPass(Registry); initializeSelectOptimizePass(Registry); initializeCallBrPreparePass(Registry); - initializeCodeGenPreparePass(Registry); + initializeCodeGenPrepareLegacyPassPass(Registry); initializeAtomicExpandPass(Registry); initializeWinEHPreparePass(Registry); initializeDwarfEHPrepareLegacyPassPass(Registry); diff --git a/llvm/utils/TableGen/GlobalISelCombinerEmitter.cpp b/llvm/utils/TableGen/GlobalISelCombinerEmitter.cpp index 89aca87a28ec..348b3b3e0898 100644 --- a/llvm/utils/TableGen/GlobalISelCombinerEmitter.cpp +++ b/llvm/utils/TableGen/GlobalISelCombinerEmitter.cpp @@ -284,7 +284,8 @@ private: /// succeed. PatternType inferNamedOperandType(const InstructionPattern &IP, StringRef OpName, - const TypeEquivalenceClasses &TECs) const; + const TypeEquivalenceClasses &TECs, + bool AllowSelf = false) const; const Record &RuleDef; SmallVector<InstructionPattern *, 8> MatchPats; @@ -427,8 +428,8 @@ PatternType CombineRuleOperandTypeChecker::inferImmediateType( continue; // Named operand with the same name, try to infer that. - if (PatternType InferTy = - inferNamedOperandType(IP, Op.getOperandName(), TECs)) + if (PatternType InferTy = inferNamedOperandType(IP, Op.getOperandName(), + TECs, /*AllowSelf=*/true)) return InferTy; } } @@ -438,16 +439,17 @@ PatternType CombineRuleOperandTypeChecker::inferImmediateType( PatternType CombineRuleOperandTypeChecker::inferNamedOperandType( const InstructionPattern &IP, StringRef OpName, - const TypeEquivalenceClasses &TECs) const { + const TypeEquivalenceClasses &TECs, bool AllowSelf) const { // This is the simplest possible case, we just need to find a TEC that - // contains OpName. Look at all other operands in equivalence class and try to - // find a suitable one. + // contains OpName. Look at all operands in equivalence class and try to + // find a suitable one. If `AllowSelf` is true, the operand itself is also + // considered suitable. // Check for a def of a matched pattern. This is guaranteed to always // be a register so we can blindly use that. StringRef GoodOpName; for (auto It = TECs.findLeader(OpName); It != TECs.member_end(); ++It) { - if (*It == OpName) + if (!AllowSelf && *It == OpName) continue; const auto LookupRes = MatchOpTable.lookup(*It); diff --git a/llvm/utils/TableGen/MacroFusionPredicatorEmitter.cpp b/llvm/utils/TableGen/MacroFusionPredicatorEmitter.cpp new file mode 100644 index 000000000000..78dcd4471ae7 --- /dev/null +++ b/llvm/utils/TableGen/MacroFusionPredicatorEmitter.cpp @@ -0,0 +1,236 @@ +//===------ MacroFusionPredicatorEmitter.cpp - Generator for Fusion ------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===---------------------------------------------------------------------===// +// +// MacroFusionPredicatorEmitter implements a TableGen-driven predicators +// generator for macro-op fusions. +// +// This TableGen backend processes `Fusion` definitions and generates +// predicators for checking if input instructions can be fused. These +// predicators can used in `MacroFusion` DAG mutation. +// +// The generated header file contains two parts: one for predicator +// declarations and one for predicator implementations. The user can get them +// by defining macro `GET_<TargetName>_MACRO_FUSION_PRED_DECL` or +// `GET_<TargetName>_MACRO_FUSION_PRED_IMPL` and then including the generated +// header file. +// +// The generated predicator will be like: +// +// ``` +// bool isNAME(const TargetInstrInfo &TII, +// const TargetSubtargetInfo &STI, +// const MachineInstr *FirstMI, +// const MachineInstr &SecondMI) { +// auto &MRI = SecondMI.getMF()->getRegInfo(); +// /* Predicates */ +// return true; +// } +// ``` +// +// The `Predicates` part is generated from a list of `FusionPredicate`, which +// can be predefined predicates, a raw code string or `MCInstPredicate` defined +// in TargetInstrPredicate.td. +// +//===---------------------------------------------------------------------===// + +#include "CodeGenTarget.h" +#include "PredicateExpander.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Support/Debug.h" +#include "llvm/TableGen/Error.h" +#include "llvm/TableGen/Record.h" +#include "llvm/TableGen/TableGenBackend.h" +#include <set> +#include <vector> + +using namespace llvm; + +#define DEBUG_TYPE "macro-fusion-predicator" + +namespace { +class MacroFusionPredicatorEmitter { + RecordKeeper &Records; + CodeGenTarget Target; + + void emitMacroFusionDecl(std::vector<Record *> Fusions, PredicateExpander &PE, + raw_ostream &OS); + void emitMacroFusionImpl(std::vector<Record *> Fusions, PredicateExpander &PE, + raw_ostream &OS); + void emitPredicates(std::vector<Record *> &FirstPredicate, + PredicateExpander &PE, raw_ostream &OS); + void emitFirstPredicate(Record *SecondPredicate, PredicateExpander &PE, + raw_ostream &OS); + void emitSecondPredicate(Record *SecondPredicate, PredicateExpander &PE, + raw_ostream &OS); + void emitBothPredicate(Record *Predicates, PredicateExpander &PE, + raw_ostream &OS); + +public: + MacroFusionPredicatorEmitter(RecordKeeper &R) : Records(R), Target(R) {} + + void run(raw_ostream &OS); +}; +} // End anonymous namespace. + +void MacroFusionPredicatorEmitter::emitMacroFusionDecl( + std::vector<Record *> Fusions, PredicateExpander &PE, raw_ostream &OS) { + OS << "#ifdef GET_" << Target.getName() << "_MACRO_FUSION_PRED_DECL\n"; + OS << "#undef GET_" << Target.getName() << "_MACRO_FUSION_PRED_DECL\n\n"; + OS << "namespace llvm {\n"; + + for (Record *Fusion : Fusions) { + OS << "bool is" << Fusion->getName() << "(const TargetInstrInfo &, " + << "const TargetSubtargetInfo &, " + << "const MachineInstr *, " + << "const MachineInstr &);\n"; + } + + OS << "} // end namespace llvm\n"; + OS << "\n#endif\n"; +} + +void MacroFusionPredicatorEmitter::emitMacroFusionImpl( + std::vector<Record *> Fusions, PredicateExpander &PE, raw_ostream &OS) { + OS << "#ifdef GET_" << Target.getName() << "_MACRO_FUSION_PRED_IMPL\n"; + OS << "#undef GET_" << Target.getName() << "_MACRO_FUSION_PRED_IMPL\n\n"; + OS << "namespace llvm {\n"; + + for (Record *Fusion : Fusions) { + std::vector<Record *> Predicates = + Fusion->getValueAsListOfDefs("Predicates"); + + OS << "bool is" << Fusion->getName() << "(\n"; + OS.indent(4) << "const TargetInstrInfo &TII,\n"; + OS.indent(4) << "const TargetSubtargetInfo &STI,\n"; + OS.indent(4) << "const MachineInstr *FirstMI,\n"; + OS.indent(4) << "const MachineInstr &SecondMI) {\n"; + OS.indent(2) << "auto &MRI = SecondMI.getMF()->getRegInfo();\n"; + + emitPredicates(Predicates, PE, OS); + + OS.indent(2) << "return true;\n"; + OS << "}\n"; + } + + OS << "} // end namespace llvm\n"; + OS << "\n#endif\n"; +} + +void MacroFusionPredicatorEmitter::emitPredicates( + std::vector<Record *> &Predicates, PredicateExpander &PE, raw_ostream &OS) { + for (Record *Predicate : Predicates) { + Record *Target = Predicate->getValueAsDef("Target"); + if (Target->getName() == "first_fusion_target") + emitFirstPredicate(Predicate, PE, OS); + else if (Target->getName() == "second_fusion_target") + emitSecondPredicate(Predicate, PE, OS); + else if (Target->getName() == "both_fusion_target") + emitBothPredicate(Predicate, PE, OS); + else + PrintFatalError(Target->getLoc(), + "Unsupported 'FusionTarget': " + Target->getName()); + } +} + +void MacroFusionPredicatorEmitter::emitFirstPredicate(Record *Predicate, + PredicateExpander &PE, + raw_ostream &OS) { + if (Predicate->isSubClassOf("WildcardPred")) { + OS.indent(2) << "if (!FirstMI)\n"; + OS.indent(2) << " return " + << (Predicate->getValueAsBit("ReturnValue") ? "true" : "false") + << ";\n"; + } else if (Predicate->isSubClassOf("OneUsePred")) { + OS.indent(2) << "{\n"; + OS.indent(4) << "Register FirstDest = FirstMI->getOperand(0).getReg();\n"; + OS.indent(4) + << "if (FirstDest.isVirtual() && !MRI.hasOneNonDBGUse(FirstDest))\n"; + OS.indent(4) << " return false;\n"; + OS.indent(2) << "}\n"; + } else if (Predicate->isSubClassOf( + "FirstFusionPredicateWithMCInstPredicate")) { + OS.indent(2) << "{\n"; + OS.indent(4) << "const MachineInstr *MI = FirstMI;\n"; + OS.indent(4) << "if ("; + PE.setNegatePredicate(true); + PE.setIndentLevel(3); + PE.expandPredicate(OS, Predicate->getValueAsDef("Predicate")); + OS << ")\n"; + OS.indent(4) << " return false;\n"; + OS.indent(2) << "}\n"; + } else { + PrintFatalError(Predicate->getLoc(), + "Unsupported predicate for first instruction: " + + Predicate->getType()->getAsString()); + } +} + +void MacroFusionPredicatorEmitter::emitSecondPredicate(Record *Predicate, + PredicateExpander &PE, + raw_ostream &OS) { + if (Predicate->isSubClassOf("SecondFusionPredicateWithMCInstPredicate")) { + OS.indent(2) << "{\n"; + OS.indent(4) << "const MachineInstr *MI = &SecondMI;\n"; + OS.indent(4) << "if ("; + PE.setNegatePredicate(true); + PE.setIndentLevel(3); + PE.expandPredicate(OS, Predicate->getValueAsDef("Predicate")); + OS << ")\n"; + OS.indent(4) << " return false;\n"; + OS.indent(2) << "}\n"; + } else { + PrintFatalError(Predicate->getLoc(), + "Unsupported predicate for first instruction: " + + Predicate->getType()->getAsString()); + } +} + +void MacroFusionPredicatorEmitter::emitBothPredicate(Record *Predicate, + PredicateExpander &PE, + raw_ostream &OS) { + if (Predicate->isSubClassOf("FusionPredicateWithCode")) + OS << Predicate->getValueAsString("Predicate"); + else if (Predicate->isSubClassOf("BothFusionPredicateWithMCInstPredicate")) { + Record *MCPred = Predicate->getValueAsDef("Predicate"); + emitFirstPredicate(MCPred, PE, OS); + emitSecondPredicate(MCPred, PE, OS); + } else if (Predicate->isSubClassOf("TieReg")) { + int FirstOpIdx = Predicate->getValueAsInt("FirstOpIdx"); + int SecondOpIdx = Predicate->getValueAsInt("SecondOpIdx"); + OS.indent(2) << "if (!(FirstMI->getOperand(" << FirstOpIdx + << ").isReg() &&\n"; + OS.indent(2) << " SecondMI.getOperand(" << SecondOpIdx + << ").isReg() &&\n"; + OS.indent(2) << " FirstMI->getOperand(" << FirstOpIdx + << ").getReg() == SecondMI.getOperand(" << SecondOpIdx + << ").getReg()))\n"; + OS.indent(2) << " return false;\n"; + } else + PrintFatalError(Predicate->getLoc(), + "Unsupported predicate for both instruction: " + + Predicate->getType()->getAsString()); +} + +void MacroFusionPredicatorEmitter::run(raw_ostream &OS) { + // Emit file header. + emitSourceFileHeader("Macro Fusion Predicators", OS); + + PredicateExpander PE(Target.getName()); + PE.setByRef(false); + PE.setExpandForMC(false); + + std::vector<Record *> Fusions = Records.getAllDerivedDefinitions("Fusion"); + // Sort macro fusions by name. + sort(Fusions, LessRecord()); + emitMacroFusionDecl(Fusions, PE, OS); + OS << "\n"; + emitMacroFusionImpl(Fusions, PE, OS); +} + +static TableGen::Emitter::OptClass<MacroFusionPredicatorEmitter> + X("gen-macro-fusion-pred", "Generate macro fusion predicators."); diff --git a/llvm/utils/TableGen/PredicateExpander.cpp b/llvm/utils/TableGen/PredicateExpander.cpp index 8f96d3307ded..d3a73e02cd91 100644 --- a/llvm/utils/TableGen/PredicateExpander.cpp +++ b/llvm/utils/TableGen/PredicateExpander.cpp @@ -194,6 +194,11 @@ void PredicateExpander::expandCheckIsRegOperand(raw_ostream &OS, int OpIndex) { << "getOperand(" << OpIndex << ").isReg() "; } +void PredicateExpander::expandCheckIsVRegOperand(raw_ostream &OS, int OpIndex) { + OS << (shouldNegate() ? "!" : "") << "MI" << (isByRef() ? "." : "->") + << "getOperand(" << OpIndex << ").getReg().isVirtual()"; +} + void PredicateExpander::expandCheckIsImmOperand(raw_ostream &OS, int OpIndex) { OS << (shouldNegate() ? "!" : "") << "MI" << (isByRef() ? "." : "->") << "getOperand(" << OpIndex << ").isImm() "; @@ -319,6 +324,9 @@ void PredicateExpander::expandPredicate(raw_ostream &OS, const Record *Rec) { if (Rec->isSubClassOf("CheckIsRegOperand")) return expandCheckIsRegOperand(OS, Rec->getValueAsInt("OpIndex")); + if (Rec->isSubClassOf("CheckIsVRegOperand")) + return expandCheckIsVRegOperand(OS, Rec->getValueAsInt("OpIndex")); + if (Rec->isSubClassOf("CheckIsImmOperand")) return expandCheckIsImmOperand(OS, Rec->getValueAsInt("OpIndex")); diff --git a/llvm/utils/TableGen/PredicateExpander.h b/llvm/utils/TableGen/PredicateExpander.h index 27f049a715aa..cfb0a3d51e67 100644 --- a/llvm/utils/TableGen/PredicateExpander.h +++ b/llvm/utils/TableGen/PredicateExpander.h @@ -75,6 +75,7 @@ public: bool IsCheckAll); void expandTIIFunctionCall(raw_ostream &OS, StringRef MethodName); void expandCheckIsRegOperand(raw_ostream &OS, int OpIndex); + void expandCheckIsVRegOperand(raw_ostream &OS, int OpIndex); void expandCheckIsImmOperand(raw_ostream &OS, int OpIndex); void expandCheckInvalidRegOperand(raw_ostream &OS, int OpIndex); void expandCheckFunctionPredicate(raw_ostream &OS, StringRef MCInstFn, diff --git a/llvm/utils/TableGen/X86CompressEVEXTablesEmitter.cpp b/llvm/utils/TableGen/X86CompressEVEXTablesEmitter.cpp new file mode 100644 index 000000000000..aa8527e75380 --- /dev/null +++ b/llvm/utils/TableGen/X86CompressEVEXTablesEmitter.cpp @@ -0,0 +1,206 @@ +//==- utils/TableGen/X86CompressEVEXTablesEmitter.cpp - X86 backend-*- C++ -*-// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// This tablegen backend is responsible for emitting the X86 backend EVEX +/// compression tables. +/// +//===----------------------------------------------------------------------===// + +#include "CodeGenInstruction.h" +#include "CodeGenTarget.h" +#include "X86RecognizableInstr.h" +#include "llvm/TableGen/Error.h" +#include "llvm/TableGen/Record.h" +#include "llvm/TableGen/TableGenBackend.h" +#include <map> +#include <set> + +using namespace llvm; +using namespace X86Disassembler; + +namespace { + +const std::map<StringRef, StringRef> ManualMap = { +#define ENTRY(OLD, NEW) {#OLD, #NEW}, +#include "X86ManualCompressEVEXTables.def" +}; +const std::set<StringRef> NoCompressSet = { +#define NOCOMP(INSN) #INSN, +#include "X86ManualCompressEVEXTables.def" +}; + +class X86CompressEVEXTablesEmitter { + RecordKeeper &Records; + CodeGenTarget Target; + + // Hold all pontentially compressible EVEX instructions + std::vector<const CodeGenInstruction *> PreCompressionInsts; + // Hold all compressed instructions. Divided into groups with same opcodes + // to make the search more efficient + std::map<uint64_t, std::vector<const CodeGenInstruction *>> CompressedInsts; + + typedef std::pair<const CodeGenInstruction *, const CodeGenInstruction *> + Entry; + + std::vector<Entry> Table; + +public: + X86CompressEVEXTablesEmitter(RecordKeeper &R) : Records(R), Target(R) {} + + // run - Output X86 EVEX compression tables. + void run(raw_ostream &OS); + +private: + // Prints the given table as a C++ array of type X86CompressEVEXTableEntry + void printTable(const std::vector<Entry> &Table, raw_ostream &OS); +}; + +void X86CompressEVEXTablesEmitter::printTable(const std::vector<Entry> &Table, + raw_ostream &OS) { + + OS << "static const X86CompressEVEXTableEntry X86CompressEVEXTable[] = { \n"; + + // Print all entries added to the table + for (const auto &Pair : Table) + OS << " { X86::" << Pair.first->TheDef->getName() + << ", X86::" << Pair.second->TheDef->getName() << " },\n"; + + OS << "};\n\n"; +} + +static uint8_t byteFromBitsInit(const BitsInit *B) { + unsigned N = B->getNumBits(); + assert(N <= 8 && "Field is too large for uint8_t!"); + + uint8_t Value = 0; + for (unsigned I = 0; I != N; ++I) { + BitInit *Bit = cast<BitInit>(B->getBit(I)); + Value |= Bit->getValue() << I; + } + return Value; +} + +class IsMatch { + const CodeGenInstruction *OldInst; + +public: + IsMatch(const CodeGenInstruction *OldInst) : OldInst(OldInst) {} + + bool operator()(const CodeGenInstruction *NewInst) { + RecognizableInstrBase NewRI(*NewInst); + RecognizableInstrBase OldRI(*OldInst); + + // Return false if any of the following fields of does not match. + if (std::make_tuple(OldRI.IsCodeGenOnly, OldRI.OpMap, NewRI.OpPrefix, + OldRI.HasVEX_4V, OldRI.HasVEX_L, OldRI.HasREX_W, + OldRI.Form) != + std::make_tuple(NewRI.IsCodeGenOnly, NewRI.OpMap, OldRI.OpPrefix, + NewRI.HasVEX_4V, NewRI.HasVEX_L, NewRI.HasREX_W, + NewRI.Form)) + return false; + + for (unsigned I = 0, E = OldInst->Operands.size(); I < E; ++I) { + Record *OldOpRec = OldInst->Operands[I].Rec; + Record *NewOpRec = NewInst->Operands[I].Rec; + + if (OldOpRec == NewOpRec) + continue; + + if (isRegisterOperand(OldOpRec) && isRegisterOperand(NewOpRec)) { + if (getRegOperandSize(OldOpRec) != getRegOperandSize(NewOpRec)) + return false; + } else if (isMemoryOperand(OldOpRec) && isMemoryOperand(NewOpRec)) { + if (getMemOperandSize(OldOpRec) != getMemOperandSize(NewOpRec)) + return false; + } else if (isImmediateOperand(OldOpRec) && isImmediateOperand(NewOpRec)) { + if (OldOpRec->getValueAsDef("Type") != NewOpRec->getValueAsDef("Type")) + return false; + } + } + + return true; + } +}; + +void X86CompressEVEXTablesEmitter::run(raw_ostream &OS) { + emitSourceFileHeader("X86 EVEX compression tables", OS); + + ArrayRef<const CodeGenInstruction *> NumberedInstructions = + Target.getInstructionsByEnumValue(); + + for (const CodeGenInstruction *Inst : NumberedInstructions) { + const Record *Rec = Inst->TheDef; + StringRef Name = Rec->getName(); + // _REV instruction should not appear before encoding optimization + if (!Rec->isSubClassOf("X86Inst") || + Rec->getValueAsBit("isAsmParserOnly") || Name.ends_with("_REV")) + continue; + + // Promoted legacy instruction is in EVEX space, and has REX2-encoding + // alternative. It's added due to HW design and never emitted by compiler. + if (byteFromBitsInit(Rec->getValueAsBitsInit("OpMapBits")) == + X86Local::T_MAP4 && + byteFromBitsInit(Rec->getValueAsBitsInit("explicitOpPrefixBits")) == + X86Local::ExplicitEVEX) + continue; + + if (NoCompressSet.find(Name) != NoCompressSet.end()) + continue; + + RecognizableInstrBase RI(*Inst); + + bool IsND = RI.OpMap == X86Local::T_MAP4 && RI.HasEVEX_B && RI.HasVEX_4V; + // Add VEX encoded instructions to one of CompressedInsts vectors according + // to it's opcode. + if (RI.Encoding == X86Local::VEX) + CompressedInsts[RI.Opcode].push_back(Inst); + // Add relevant EVEX encoded instructions to PreCompressionInsts + else if (RI.Encoding == X86Local::EVEX && !RI.HasEVEX_K && !RI.HasEVEX_L2 && + (!RI.HasEVEX_B || IsND)) + PreCompressionInsts.push_back(Inst); + } + + for (const CodeGenInstruction *Inst : PreCompressionInsts) { + const Record *Rec = Inst->TheDef; + uint8_t Opcode = byteFromBitsInit(Rec->getValueAsBitsInit("Opcode")); + StringRef Name = Rec->getName(); + const CodeGenInstruction *NewInst = nullptr; + if (ManualMap.find(Name) != ManualMap.end()) { + Record *NewRec = Records.getDef(ManualMap.at(Rec->getName())); + assert(NewRec && "Instruction not found!"); + NewInst = &Target.getInstruction(NewRec); + } else if (Name.ends_with("_EVEX")) { + if (auto *NewRec = Records.getDef(Name.drop_back(5))) + NewInst = &Target.getInstruction(NewRec); + } else if (Name.ends_with("_ND")) { + if (auto *NewRec = Records.getDef(Name.drop_back(3))) { + auto &TempInst = Target.getInstruction(NewRec); + if (isRegisterOperand(TempInst.Operands[0].Rec)) + NewInst = &TempInst; + } + } else { + // For each pre-compression instruction look for a match in the appropriate + // vector (instructions with the same opcode) using function object + // IsMatch. + auto Match = llvm::find_if(CompressedInsts[Opcode], IsMatch(Inst)); + if (Match != CompressedInsts[Opcode].end()) + NewInst = *Match; + } + + if (!NewInst) + continue; + + Table.push_back(std::make_pair(Inst, NewInst)); + } + + printTable(Table, OS); +} +} // namespace + +static TableGen::Emitter::OptClass<X86CompressEVEXTablesEmitter> + X("gen-x86-compress-evex-tables", "Generate X86 EVEX compression tables"); diff --git a/llvm/utils/TableGen/X86EVEX2VEXTablesEmitter.cpp b/llvm/utils/TableGen/X86EVEX2VEXTablesEmitter.cpp deleted file mode 100644 index c80d9a199fa3..000000000000 --- a/llvm/utils/TableGen/X86EVEX2VEXTablesEmitter.cpp +++ /dev/null @@ -1,210 +0,0 @@ -//===- utils/TableGen/X86EVEX2VEXTablesEmitter.cpp - X86 backend-*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -/// -/// This tablegen backend is responsible for emitting the X86 backend EVEX2VEX -/// compression tables. -/// -//===----------------------------------------------------------------------===// - -#include "CodeGenInstruction.h" -#include "CodeGenTarget.h" -#include "X86RecognizableInstr.h" -#include "llvm/TableGen/Error.h" -#include "llvm/TableGen/Record.h" -#include "llvm/TableGen/TableGenBackend.h" - -using namespace llvm; -using namespace X86Disassembler; - -namespace { - -class X86EVEX2VEXTablesEmitter { - RecordKeeper &Records; - CodeGenTarget Target; - - // Hold all non-masked & non-broadcasted EVEX encoded instructions - std::vector<const CodeGenInstruction *> EVEXInsts; - // Hold all VEX encoded instructions. Divided into groups with same opcodes - // to make the search more efficient - std::map<uint64_t, std::vector<const CodeGenInstruction *>> VEXInsts; - - typedef std::pair<const CodeGenInstruction *, const CodeGenInstruction *> - Entry; - - // Represent both compress tables - std::vector<Entry> EVEX2VEX128; - std::vector<Entry> EVEX2VEX256; - -public: - X86EVEX2VEXTablesEmitter(RecordKeeper &R) : Records(R), Target(R) {} - - // run - Output X86 EVEX2VEX tables. - void run(raw_ostream &OS); - -private: - // Prints the given table as a C++ array of type - // X86EvexToVexCompressTableEntry - void printTable(const std::vector<Entry> &Table, raw_ostream &OS); -}; - -void X86EVEX2VEXTablesEmitter::printTable(const std::vector<Entry> &Table, - raw_ostream &OS) { - StringRef Size = (Table == EVEX2VEX128) ? "128" : "256"; - - OS << "// X86 EVEX encoded instructions that have a VEX " << Size - << " encoding\n" - << "// (table format: <EVEX opcode, VEX-" << Size << " opcode>).\n" - << "static const X86EvexToVexCompressTableEntry X86EvexToVex" << Size - << "CompressTable[] = {\n" - << " // EVEX scalar with corresponding VEX.\n"; - - // Print all entries added to the table - for (const auto &Pair : Table) { - OS << " { X86::" << Pair.first->TheDef->getName() - << ", X86::" << Pair.second->TheDef->getName() << " },\n"; - } - - OS << "};\n\n"; -} - -// Return true if the 2 BitsInits are equal -// Calculates the integer value residing BitsInit object -static inline uint64_t getValueFromBitsInit(const BitsInit *B) { - uint64_t Value = 0; - for (unsigned i = 0, e = B->getNumBits(); i != e; ++i) { - if (BitInit *Bit = dyn_cast<BitInit>(B->getBit(i))) - Value |= uint64_t(Bit->getValue()) << i; - else - PrintFatalError("Invalid VectSize bit"); - } - return Value; -} - -// Function object - Operator() returns true if the given VEX instruction -// matches the EVEX instruction of this object. -class IsMatch { - const CodeGenInstruction *EVEXInst; - -public: - IsMatch(const CodeGenInstruction *EVEXInst) : EVEXInst(EVEXInst) {} - - bool operator()(const CodeGenInstruction *VEXInst) { - RecognizableInstrBase VEXRI(*VEXInst); - RecognizableInstrBase EVEXRI(*EVEXInst); - bool VEX_W = VEXRI.HasREX_W; - bool EVEX_W = EVEXRI.HasREX_W; - bool VEX_WIG = VEXRI.IgnoresW; - bool EVEX_WIG = EVEXRI.IgnoresW; - bool EVEX_W1_VEX_W0 = EVEXInst->TheDef->getValueAsBit("EVEX_W1_VEX_W0"); - - if (VEXRI.IsCodeGenOnly != EVEXRI.IsCodeGenOnly || - // VEX/EVEX fields - VEXRI.OpPrefix != EVEXRI.OpPrefix || VEXRI.OpMap != EVEXRI.OpMap || - VEXRI.HasVEX_4V != EVEXRI.HasVEX_4V || - VEXRI.HasVEX_L != EVEXRI.HasVEX_L || - // Match is allowed if either is VEX_WIG, or they match, or EVEX - // is VEX_W1X and VEX is VEX_W0. - (!(VEX_WIG || (!EVEX_WIG && EVEX_W == VEX_W) || - (EVEX_W1_VEX_W0 && EVEX_W && !VEX_W))) || - // Instruction's format - VEXRI.Form != EVEXRI.Form) - return false; - - // This is needed for instructions with intrinsic version (_Int). - // Where the only difference is the size of the operands. - // For example: VUCOMISDZrm and Int_VUCOMISDrm - // Also for instructions that their EVEX version was upgraded to work with - // k-registers. For example VPCMPEQBrm (xmm output register) and - // VPCMPEQBZ128rm (k register output register). - for (unsigned i = 0, e = EVEXInst->Operands.size(); i < e; i++) { - Record *OpRec1 = EVEXInst->Operands[i].Rec; - Record *OpRec2 = VEXInst->Operands[i].Rec; - - if (OpRec1 == OpRec2) - continue; - - if (isRegisterOperand(OpRec1) && isRegisterOperand(OpRec2)) { - if (getRegOperandSize(OpRec1) != getRegOperandSize(OpRec2)) - return false; - } else if (isMemoryOperand(OpRec1) && isMemoryOperand(OpRec2)) { - return false; - } else if (isImmediateOperand(OpRec1) && isImmediateOperand(OpRec2)) { - if (OpRec1->getValueAsDef("Type") != OpRec2->getValueAsDef("Type")) { - return false; - } - } else - return false; - } - - return true; - } -}; - -void X86EVEX2VEXTablesEmitter::run(raw_ostream &OS) { - emitSourceFileHeader("X86 EVEX2VEX tables", OS); - - ArrayRef<const CodeGenInstruction *> NumberedInstructions = - Target.getInstructionsByEnumValue(); - - for (const CodeGenInstruction *Inst : NumberedInstructions) { - const Record *Def = Inst->TheDef; - // Filter non-X86 instructions. - if (!Def->isSubClassOf("X86Inst")) - continue; - // _REV instruction should not appear before encoding optimization - if (Def->getName().ends_with("_REV")) - continue; - RecognizableInstrBase RI(*Inst); - - // Add VEX encoded instructions to one of VEXInsts vectors according to - // it's opcode. - if (RI.Encoding == X86Local::VEX) - VEXInsts[RI.Opcode].push_back(Inst); - // Add relevant EVEX encoded instructions to EVEXInsts - else if (RI.Encoding == X86Local::EVEX && !RI.HasEVEX_K && !RI.HasEVEX_B && - !RI.HasEVEX_L2 && !Def->getValueAsBit("notEVEX2VEXConvertible")) - EVEXInsts.push_back(Inst); - } - - for (const CodeGenInstruction *EVEXInst : EVEXInsts) { - uint64_t Opcode = getValueFromBitsInit(EVEXInst->TheDef-> - getValueAsBitsInit("Opcode")); - // For each EVEX instruction look for a VEX match in the appropriate vector - // (instructions with the same opcode) using function object IsMatch. - // Allow EVEX2VEXOverride to explicitly specify a match. - const CodeGenInstruction *VEXInst = nullptr; - if (!EVEXInst->TheDef->isValueUnset("EVEX2VEXOverride")) { - StringRef AltInstStr = - EVEXInst->TheDef->getValueAsString("EVEX2VEXOverride"); - Record *AltInstRec = Records.getDef(AltInstStr); - assert(AltInstRec && "EVEX2VEXOverride instruction not found!"); - VEXInst = &Target.getInstruction(AltInstRec); - } else { - auto Match = llvm::find_if(VEXInsts[Opcode], IsMatch(EVEXInst)); - if (Match != VEXInsts[Opcode].end()) - VEXInst = *Match; - } - - if (!VEXInst) - continue; - - // In case a match is found add new entry to the appropriate table - if (EVEXInst->TheDef->getValueAsBit("hasVEX_L")) - EVEX2VEX256.push_back(std::make_pair(EVEXInst, VEXInst)); // {0,1} - else - EVEX2VEX128.push_back(std::make_pair(EVEXInst, VEXInst)); // {0,0} - } - - // Print both tables - printTable(EVEX2VEX128, OS); - printTable(EVEX2VEX256, OS); -} -} // namespace - -static TableGen::Emitter::OptClass<X86EVEX2VEXTablesEmitter> - X("gen-x86-EVEX2VEX-tables", "Generate X86 EVEX to VEX compress tables"); diff --git a/llvm/utils/TableGen/X86FoldTablesEmitter.cpp b/llvm/utils/TableGen/X86FoldTablesEmitter.cpp index 101b75e2f087..8a860d0945bb 100644 --- a/llvm/utils/TableGen/X86FoldTablesEmitter.cpp +++ b/llvm/utils/TableGen/X86FoldTablesEmitter.cpp @@ -374,8 +374,7 @@ public: RegRI.HasEVEX_L2, RegRI.HasEVEX_NF, RegRec->getValueAsBit("hasEVEX_RC"), RegRec->getValueAsBit("hasLockPrefix"), - RegRec->getValueAsBit("hasNoTrackPrefix"), - RegRec->getValueAsBit("EVEX_W1_VEX_W0")) != + RegRec->getValueAsBit("hasNoTrackPrefix")) != std::make_tuple(MemRI.Encoding, MemRI.Opcode, MemRI.OpPrefix, MemRI.OpMap, MemRI.OpSize, MemRI.AdSize, MemRI.HasREX_W, MemRI.HasVEX_4V, MemRI.HasVEX_L, MemRI.IgnoresVEX_L, @@ -383,8 +382,7 @@ public: MemRI.HasEVEX_L2, MemRI.HasEVEX_NF, MemRec->getValueAsBit("hasEVEX_RC"), MemRec->getValueAsBit("hasLockPrefix"), - MemRec->getValueAsBit("hasNoTrackPrefix"), - MemRec->getValueAsBit("EVEX_W1_VEX_W0"))) + MemRec->getValueAsBit("hasNoTrackPrefix"))) return false; // Make sure the sizes of the operands of both instructions suit each other. diff --git a/llvm/utils/TableGen/X86ManualCompressEVEXTables.def b/llvm/utils/TableGen/X86ManualCompressEVEXTables.def new file mode 100644 index 000000000000..58ca10e9e10f --- /dev/null +++ b/llvm/utils/TableGen/X86ManualCompressEVEXTables.def @@ -0,0 +1,331 @@ +//===- X86ManualCompressEVEXTables.def ---------------------------*- C++ -*-==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// \file +// This file defines all the entries in X86 EVEX compression tables that need +// special handling. +//===----------------------------------------------------------------------===// + +#ifndef NOCOMP +#define NOCOMP(INSN) +#endif +NOCOMP(VCVTQQ2PDZ128rr) +NOCOMP(VCVTQQ2PSZ128rm) +NOCOMP(VCVTQQ2PSZ128rr) +NOCOMP(VDBPSADBWZ128rmi) +NOCOMP(VDBPSADBWZ128rri) +NOCOMP(VPMAXSQZ128rm) +NOCOMP(VPMAXSQZ128rr) +NOCOMP(VPMAXUQZ128rm) +NOCOMP(VPMAXUQZ128rr) +NOCOMP(VPMINSQZ128rm) +NOCOMP(VPMINSQZ128rr) +NOCOMP(VPMINUQZ128rm) +NOCOMP(VPMINUQZ128rr) +NOCOMP(VPMULLQZ128rm) +NOCOMP(VPMULLQZ128rr) +NOCOMP(VPSRAQZ128ri) +NOCOMP(VPSRAQZ128rm) +NOCOMP(VPSRAQZ128rr) +NOCOMP(VSCALEFPSZ128rm) +NOCOMP(VDBPSADBWZ256rmi) +NOCOMP(VDBPSADBWZ256rri) +NOCOMP(VPMAXSQZ256rm) +NOCOMP(VPMAXSQZ256rr) +NOCOMP(VPMAXUQZ256rm) +NOCOMP(VPMAXUQZ256rr) +NOCOMP(VPMINSQZ256rm) +NOCOMP(VPMINSQZ256rr) +NOCOMP(VPMINUQZ256rm) +NOCOMP(VPMINUQZ256rr) +NOCOMP(VPMULLQZ256rm) +NOCOMP(VPMULLQZ256rr) +NOCOMP(VPSRAQZ256ri) +NOCOMP(VPSRAQZ256rm) +NOCOMP(VPSRAQZ256rr) +NOCOMP(VSCALEFPSZ256rm) +#undef NOCOMP + +#ifndef ENTRY +#define ENTRY(OLD, NEW) +#endif +ENTRY(VALIGNDZ128rmi, VPALIGNRrmi) +ENTRY(VALIGNDZ128rri, VPALIGNRrri) +ENTRY(VALIGNQZ128rmi, VPALIGNRrmi) +ENTRY(VALIGNQZ128rri, VPALIGNRrri) +ENTRY(VMAXSDZrm, VMAXSDrm) +ENTRY(VMAXSDZrr, VMAXSDrr) +ENTRY(VMAXSSZrm, VMAXSSrm) +ENTRY(VMAXSSZrr, VMAXSSrr) +ENTRY(VMINSDZrm, VMINSDrm) +ENTRY(VMINSDZrr, VMINSDrr) +ENTRY(VMINSSZrm, VMINSSrm) +ENTRY(VMINSSZrr, VMINSSrr) +ENTRY(VMOVDQU16Z128mr, VMOVDQUmr) +ENTRY(VMOVDQU16Z128rm, VMOVDQUrm) +ENTRY(VMOVDQU16Z128rr, VMOVDQUrr) +ENTRY(VMOVDQU8Z128mr, VMOVDQUmr) +ENTRY(VMOVDQU8Z128rm, VMOVDQUrm) +ENTRY(VMOVDQU8Z128rr, VMOVDQUrr) +ENTRY(VMOVDQU16Z256mr, VMOVDQUYmr) +ENTRY(VMOVDQU16Z256rm, VMOVDQUYrm) +ENTRY(VMOVDQU16Z256rr, VMOVDQUYrr) +ENTRY(VMOVDQU8Z256mr, VMOVDQUYmr) +ENTRY(VMOVDQU8Z256rm, VMOVDQUYrm) +ENTRY(VMOVDQU8Z256rr, VMOVDQUYrr) +ENTRY(VSHUFF32X4Z256rmi, VPERM2F128rm) +ENTRY(VSHUFF32X4Z256rri, VPERM2F128rr) +ENTRY(VSHUFF64X2Z256rmi, VPERM2F128rm) +ENTRY(VSHUFF64X2Z256rri, VPERM2F128rr) +ENTRY(VSHUFI32X4Z256rmi, VPERM2I128rm) +ENTRY(VSHUFI32X4Z256rri, VPERM2I128rr) +ENTRY(VSHUFI64X2Z256rmi, VPERM2I128rm) +ENTRY(VSHUFI64X2Z256rri, VPERM2I128rr) +// W bit does not match +ENTRY(VADDPDZ128rm, VADDPDrm) +ENTRY(VADDPDZ128rr, VADDPDrr) +ENTRY(VADDSDZrm, VADDSDrm) +ENTRY(VADDSDZrm_Int, VADDSDrm_Int) +ENTRY(VADDSDZrr, VADDSDrr) +ENTRY(VADDSDZrr_Int, VADDSDrr_Int) +ENTRY(VANDNPDZ128rm, VANDNPDrm) +ENTRY(VANDNPDZ128rr, VANDNPDrr) +ENTRY(VANDPDZ128rm, VANDPDrm) +ENTRY(VANDPDZ128rr, VANDPDrr) +ENTRY(VCOMISDZrm, VCOMISDrm) +ENTRY(VCOMISDZrm_Int, VCOMISDrm_Int) +ENTRY(VCOMISDZrr, VCOMISDrr) +ENTRY(VCOMISDZrr_Int, VCOMISDrr_Int) +ENTRY(VCVTPD2DQZ128rm, VCVTPD2DQrm) +ENTRY(VCVTPD2DQZ128rr, VCVTPD2DQrr) +ENTRY(VCVTPD2PSZ128rm, VCVTPD2PSrm) +ENTRY(VCVTPD2PSZ128rr, VCVTPD2PSrr) +ENTRY(VCVTSD2SSZrm, VCVTSD2SSrm) +ENTRY(VCVTSD2SSZrm_Int, VCVTSD2SSrm_Int) +ENTRY(VCVTSD2SSZrr, VCVTSD2SSrr) +ENTRY(VCVTSD2SSZrr_Int, VCVTSD2SSrr_Int) +ENTRY(VCVTTPD2DQZ128rm, VCVTTPD2DQrm) +ENTRY(VCVTTPD2DQZ128rr, VCVTTPD2DQrr) +ENTRY(VDIVPDZ128rm, VDIVPDrm) +ENTRY(VDIVPDZ128rr, VDIVPDrr) +ENTRY(VDIVSDZrm, VDIVSDrm) +ENTRY(VDIVSDZrm_Int, VDIVSDrm_Int) +ENTRY(VDIVSDZrr, VDIVSDrr) +ENTRY(VDIVSDZrr_Int, VDIVSDrr_Int) +ENTRY(VMAXCPDZ128rm, VMAXCPDrm) +ENTRY(VMAXCPDZ128rr, VMAXCPDrr) +ENTRY(VMAXCSDZrm, VMAXCSDrm) +ENTRY(VMAXCSDZrr, VMAXCSDrr) +ENTRY(VMAXPDZ128rm, VMAXPDrm) +ENTRY(VMAXPDZ128rr, VMAXPDrr) +ENTRY(VMAXSDZrm_Int, VMAXSDrm_Int) +ENTRY(VMAXSDZrr_Int, VMAXSDrr_Int) +ENTRY(VMINCPDZ128rm, VMINCPDrm) +ENTRY(VMINCPDZ128rr, VMINCPDrr) +ENTRY(VMINCSDZrm, VMINCSDrm) +ENTRY(VMINCSDZrr, VMINCSDrr) +ENTRY(VMINPDZ128rm, VMINPDrm) +ENTRY(VMINPDZ128rr, VMINPDrr) +ENTRY(VMINSDZrm_Int, VMINSDrm_Int) +ENTRY(VMINSDZrr_Int, VMINSDrr_Int) +ENTRY(VMOVAPDZ128mr, VMOVAPDmr) +ENTRY(VMOVAPDZ128rm, VMOVAPDrm) +ENTRY(VMOVAPDZ128rr, VMOVAPDrr) +ENTRY(VMOVDDUPZ128rm, VMOVDDUPrm) +ENTRY(VMOVDDUPZ128rr, VMOVDDUPrr) +ENTRY(VMOVDQA64Z128mr, VMOVDQAmr) +ENTRY(VMOVDQA64Z128rm, VMOVDQArm) +ENTRY(VMOVDQA64Z128rr, VMOVDQArr) +ENTRY(VMOVDQU64Z128mr, VMOVDQUmr) +ENTRY(VMOVDQU64Z128rm, VMOVDQUrm) +ENTRY(VMOVDQU64Z128rr, VMOVDQUrr) +ENTRY(VMOVHPDZ128mr, VMOVHPDmr) +ENTRY(VMOVHPDZ128rm, VMOVHPDrm) +ENTRY(VMOVLPDZ128mr, VMOVLPDmr) +ENTRY(VMOVLPDZ128rm, VMOVLPDrm) +ENTRY(VMOVNTPDZ128mr, VMOVNTPDmr) +ENTRY(VMOVPQI2QIZmr, VMOVPQI2QImr) +ENTRY(VMOVPQI2QIZrr, VMOVPQI2QIrr) +ENTRY(VMOVQI2PQIZrm, VMOVQI2PQIrm) +ENTRY(VMOVSDZmr, VMOVSDmr) +ENTRY(VMOVSDZrm, VMOVSDrm) +ENTRY(VMOVSDZrm_alt, VMOVSDrm_alt) +ENTRY(VMOVSDZrr, VMOVSDrr) +ENTRY(VMOVUPDZ128mr, VMOVUPDmr) +ENTRY(VMOVUPDZ128rm, VMOVUPDrm) +ENTRY(VMOVUPDZ128rr, VMOVUPDrr) +ENTRY(VMOVZPQILo2PQIZrr, VMOVZPQILo2PQIrr) +ENTRY(VMULPDZ128rm, VMULPDrm) +ENTRY(VMULPDZ128rr, VMULPDrr) +ENTRY(VMULSDZrm, VMULSDrm) +ENTRY(VMULSDZrm_Int, VMULSDrm_Int) +ENTRY(VMULSDZrr, VMULSDrr) +ENTRY(VMULSDZrr_Int, VMULSDrr_Int) +ENTRY(VORPDZ128rm, VORPDrm) +ENTRY(VORPDZ128rr, VORPDrr) +ENTRY(VPADDQZ128rm, VPADDQrm) +ENTRY(VPADDQZ128rr, VPADDQrr) +ENTRY(VPANDNQZ128rm, VPANDNrm) +ENTRY(VPANDNQZ128rr, VPANDNrr) +ENTRY(VPANDQZ128rm, VPANDrm) +ENTRY(VPANDQZ128rr, VPANDrr) +ENTRY(VPERMILPDZ128mi, VPERMILPDmi) +ENTRY(VPERMILPDZ128ri, VPERMILPDri) +ENTRY(VPERMILPDZ128rm, VPERMILPDrm) +ENTRY(VPERMILPDZ128rr, VPERMILPDrr) +ENTRY(VPMULDQZ128rm, VPMULDQrm) +ENTRY(VPMULDQZ128rr, VPMULDQrr) +ENTRY(VPMULUDQZ128rm, VPMULUDQrm) +ENTRY(VPMULUDQZ128rr, VPMULUDQrr) +ENTRY(VPORQZ128rm, VPORrm) +ENTRY(VPORQZ128rr, VPORrr) +ENTRY(VPSLLQZ128ri, VPSLLQri) +ENTRY(VPSLLQZ128rm, VPSLLQrm) +ENTRY(VPSLLQZ128rr, VPSLLQrr) +ENTRY(VPSRLQZ128ri, VPSRLQri) +ENTRY(VPSRLQZ128rm, VPSRLQrm) +ENTRY(VPSRLQZ128rr, VPSRLQrr) +ENTRY(VPSUBQZ128rm, VPSUBQrm) +ENTRY(VPSUBQZ128rr, VPSUBQrr) +ENTRY(VPUNPCKHQDQZ128rm, VPUNPCKHQDQrm) +ENTRY(VPUNPCKHQDQZ128rr, VPUNPCKHQDQrr) +ENTRY(VPUNPCKLQDQZ128rm, VPUNPCKLQDQrm) +ENTRY(VPUNPCKLQDQZ128rr, VPUNPCKLQDQrr) +ENTRY(VPXORQZ128rm, VPXORrm) +ENTRY(VPXORQZ128rr, VPXORrr) +ENTRY(VRNDSCALEPDZ128rmi, VROUNDPDm) +ENTRY(VRNDSCALEPDZ128rri, VROUNDPDr) +ENTRY(VRNDSCALESDZm, VROUNDSDm) +ENTRY(VRNDSCALESDZm_Int, VROUNDSDm_Int) +ENTRY(VRNDSCALESDZr, VROUNDSDr) +ENTRY(VRNDSCALESDZr_Int, VROUNDSDr_Int) +ENTRY(VSHUFPDZ128rmi, VSHUFPDrmi) +ENTRY(VSHUFPDZ128rri, VSHUFPDrri) +ENTRY(VSQRTPDZ128m, VSQRTPDm) +ENTRY(VSQRTPDZ128r, VSQRTPDr) +ENTRY(VSQRTSDZm, VSQRTSDm) +ENTRY(VSQRTSDZm_Int, VSQRTSDm_Int) +ENTRY(VSQRTSDZr, VSQRTSDr) +ENTRY(VSQRTSDZr_Int, VSQRTSDr_Int) +ENTRY(VSUBPDZ128rm, VSUBPDrm) +ENTRY(VSUBPDZ128rr, VSUBPDrr) +ENTRY(VSUBSDZrm, VSUBSDrm) +ENTRY(VSUBSDZrm_Int, VSUBSDrm_Int) +ENTRY(VSUBSDZrr, VSUBSDrr) +ENTRY(VSUBSDZrr_Int, VSUBSDrr_Int) +ENTRY(VUCOMISDZrm, VUCOMISDrm) +ENTRY(VUCOMISDZrm_Int, VUCOMISDrm_Int) +ENTRY(VUCOMISDZrr, VUCOMISDrr) +ENTRY(VUCOMISDZrr_Int, VUCOMISDrr_Int) +ENTRY(VUNPCKHPDZ128rm, VUNPCKHPDrm) +ENTRY(VUNPCKHPDZ128rr, VUNPCKHPDrr) +ENTRY(VUNPCKLPDZ128rm, VUNPCKLPDrm) +ENTRY(VUNPCKLPDZ128rr, VUNPCKLPDrr) +ENTRY(VXORPDZ128rm, VXORPDrm) +ENTRY(VXORPDZ128rr, VXORPDrr) +ENTRY(VADDPDZ256rm, VADDPDYrm) +ENTRY(VADDPDZ256rr, VADDPDYrr) +ENTRY(VANDNPDZ256rm, VANDNPDYrm) +ENTRY(VANDNPDZ256rr, VANDNPDYrr) +ENTRY(VANDPDZ256rm, VANDPDYrm) +ENTRY(VANDPDZ256rr, VANDPDYrr) +ENTRY(VCVTPD2DQZ256rm, VCVTPD2DQYrm) +ENTRY(VCVTPD2DQZ256rr, VCVTPD2DQYrr) +ENTRY(VCVTPD2PSZ256rm, VCVTPD2PSYrm) +ENTRY(VCVTPD2PSZ256rr, VCVTPD2PSYrr) +ENTRY(VCVTTPD2DQZ256rm, VCVTTPD2DQYrm) +ENTRY(VCVTTPD2DQZ256rr, VCVTTPD2DQYrr) +ENTRY(VDIVPDZ256rm, VDIVPDYrm) +ENTRY(VDIVPDZ256rr, VDIVPDYrr) +ENTRY(VEXTRACTF64x2Z256mr, VEXTRACTF128mr) +ENTRY(VEXTRACTF64x2Z256rr, VEXTRACTF128rr) +ENTRY(VEXTRACTI64x2Z256mr, VEXTRACTI128mr) +ENTRY(VEXTRACTI64x2Z256rr, VEXTRACTI128rr) +ENTRY(VINSERTF64x2Z256rm, VINSERTF128rm) +ENTRY(VINSERTF64x2Z256rr, VINSERTF128rr) +ENTRY(VINSERTI64x2Z256rm, VINSERTI128rm) +ENTRY(VINSERTI64x2Z256rr, VINSERTI128rr) +ENTRY(VMAXCPDZ256rm, VMAXCPDYrm) +ENTRY(VMAXCPDZ256rr, VMAXCPDYrr) +ENTRY(VMAXPDZ256rm, VMAXPDYrm) +ENTRY(VMAXPDZ256rr, VMAXPDYrr) +ENTRY(VMINCPDZ256rm, VMINCPDYrm) +ENTRY(VMINCPDZ256rr, VMINCPDYrr) +ENTRY(VMINPDZ256rm, VMINPDYrm) +ENTRY(VMINPDZ256rr, VMINPDYrr) +ENTRY(VMOVAPDZ256mr, VMOVAPDYmr) +ENTRY(VMOVAPDZ256rm, VMOVAPDYrm) +ENTRY(VMOVAPDZ256rr, VMOVAPDYrr) +ENTRY(VMOVDDUPZ256rm, VMOVDDUPYrm) +ENTRY(VMOVDDUPZ256rr, VMOVDDUPYrr) +ENTRY(VMOVDQA64Z256mr, VMOVDQAYmr) +ENTRY(VMOVDQA64Z256rm, VMOVDQAYrm) +ENTRY(VMOVDQA64Z256rr, VMOVDQAYrr) +ENTRY(VMOVDQU64Z256mr, VMOVDQUYmr) +ENTRY(VMOVDQU64Z256rm, VMOVDQUYrm) +ENTRY(VMOVDQU64Z256rr, VMOVDQUYrr) +ENTRY(VMOVNTPDZ256mr, VMOVNTPDYmr) +ENTRY(VMOVUPDZ256mr, VMOVUPDYmr) +ENTRY(VMOVUPDZ256rm, VMOVUPDYrm) +ENTRY(VMOVUPDZ256rr, VMOVUPDYrr) +ENTRY(VMULPDZ256rm, VMULPDYrm) +ENTRY(VMULPDZ256rr, VMULPDYrr) +ENTRY(VORPDZ256rm, VORPDYrm) +ENTRY(VORPDZ256rr, VORPDYrr) +ENTRY(VPADDQZ256rm, VPADDQYrm) +ENTRY(VPADDQZ256rr, VPADDQYrr) +ENTRY(VPANDNQZ256rm, VPANDNYrm) +ENTRY(VPANDNQZ256rr, VPANDNYrr) +ENTRY(VPANDQZ256rm, VPANDYrm) +ENTRY(VPANDQZ256rr, VPANDYrr) +ENTRY(VPERMILPDZ256mi, VPERMILPDYmi) +ENTRY(VPERMILPDZ256ri, VPERMILPDYri) +ENTRY(VPERMILPDZ256rm, VPERMILPDYrm) +ENTRY(VPERMILPDZ256rr, VPERMILPDYrr) +ENTRY(VPMULDQZ256rm, VPMULDQYrm) +ENTRY(VPMULDQZ256rr, VPMULDQYrr) +ENTRY(VPMULUDQZ256rm, VPMULUDQYrm) +ENTRY(VPMULUDQZ256rr, VPMULUDQYrr) +ENTRY(VPORQZ256rm, VPORYrm) +ENTRY(VPORQZ256rr, VPORYrr) +ENTRY(VPSLLQZ256ri, VPSLLQYri) +ENTRY(VPSLLQZ256rm, VPSLLQYrm) +ENTRY(VPSLLQZ256rr, VPSLLQYrr) +ENTRY(VPSRLQZ256ri, VPSRLQYri) +ENTRY(VPSRLQZ256rm, VPSRLQYrm) +ENTRY(VPSRLQZ256rr, VPSRLQYrr) +ENTRY(VPSUBQZ256rm, VPSUBQYrm) +ENTRY(VPSUBQZ256rr, VPSUBQYrr) +ENTRY(VPUNPCKHQDQZ256rm, VPUNPCKHQDQYrm) +ENTRY(VPUNPCKHQDQZ256rr, VPUNPCKHQDQYrr) +ENTRY(VPUNPCKLQDQZ256rm, VPUNPCKLQDQYrm) +ENTRY(VPUNPCKLQDQZ256rr, VPUNPCKLQDQYrr) +ENTRY(VPXORQZ256rm, VPXORYrm) +ENTRY(VPXORQZ256rr, VPXORYrr) +ENTRY(VRNDSCALEPDZ256rmi, VROUNDPDYm) +ENTRY(VRNDSCALEPDZ256rri, VROUNDPDYr) +ENTRY(VSHUFPDZ256rmi, VSHUFPDYrmi) +ENTRY(VSHUFPDZ256rri, VSHUFPDYrri) +ENTRY(VSQRTPDZ256m, VSQRTPDYm) +ENTRY(VSQRTPDZ256r, VSQRTPDYr) +ENTRY(VSUBPDZ256rm, VSUBPDYrm) +ENTRY(VSUBPDZ256rr, VSUBPDYrr) +ENTRY(VUNPCKHPDZ256rm, VUNPCKHPDYrm) +ENTRY(VUNPCKHPDZ256rr, VUNPCKHPDYrr) +ENTRY(VUNPCKLPDZ256rm, VUNPCKLPDYrm) +ENTRY(VUNPCKLPDZ256rr, VUNPCKLPDYrr) +ENTRY(VXORPDZ256rm, VXORPDYrm) +ENTRY(VXORPDZ256rr, VXORPDYrr) +ENTRY(VPBROADCASTQZ128rm, VPBROADCASTQrm) +ENTRY(VPBROADCASTQZ128rr, VPBROADCASTQrr) +ENTRY(VBROADCASTF64X2Z128rm, VBROADCASTF128rm) +ENTRY(VBROADCASTI64X2Z128rm, VBROADCASTI128rm) +ENTRY(VBROADCASTSDZ256rm, VBROADCASTSDYrm) +ENTRY(VBROADCASTSDZ256rr, VBROADCASTSDYrr) +ENTRY(VPBROADCASTQZ256rm, VPBROADCASTQYrm) +ENTRY(VPBROADCASTQZ256rr, VPBROADCASTQYrr) +#undef ENTRY diff --git a/openmp/runtime/src/kmp.h b/openmp/runtime/src/kmp.h index 3dbf8c71c48d..c287a31e0b1b 100644 --- a/openmp/runtime/src/kmp.h +++ b/openmp/runtime/src/kmp.h @@ -1192,6 +1192,9 @@ extern void __kmp_init_target_task(); // Minimum stack size for pthread for VE is 4MB. // https://www.hpc.nec/documents/veos/en/glibc/Difference_Points_glibc.htm #define KMP_DEFAULT_STKSIZE ((size_t)(4 * 1024 * 1024)) +#elif KMP_OS_AIX +// The default stack size for worker threads on AIX is 4MB. +#define KMP_DEFAULT_STKSIZE ((size_t)(4 * 1024 * 1024)) #else #define KMP_DEFAULT_STKSIZE ((size_t)(1024 * 1024)) #endif @@ -1354,6 +1357,10 @@ extern kmp_uint64 __kmp_now_nsec(); /* TODO: tune for KMP_OS_WASI */ #define KMP_INIT_WAIT 1024U /* initial number of spin-tests */ #define KMP_NEXT_WAIT 512U /* susequent number of spin-tests */ +#elif KMP_OS_AIX +/* TODO: tune for KMP_OS_AIX */ +#define KMP_INIT_WAIT 1024U /* initial number of spin-tests */ +#define KMP_NEXT_WAIT 512U /* susequent number of spin-tests */ #endif #if KMP_ARCH_X86 || KMP_ARCH_X86_64 diff --git a/openmp/runtime/src/kmp_config.h.cmake b/openmp/runtime/src/kmp_config.h.cmake index 5f04301c91c6..b0cd0ed296e7 100644 --- a/openmp/runtime/src/kmp_config.h.cmake +++ b/openmp/runtime/src/kmp_config.h.cmake @@ -100,7 +100,7 @@ #define ENABLE_LIBOMPTARGET OPENMP_ENABLE_LIBOMPTARGET // Configured cache line based on architecture -#if KMP_ARCH_PPC64 +#if KMP_ARCH_PPC64 || KMP_ARCH_PPC # define CACHE_LINE 128 #elif KMP_ARCH_AARCH64_A64FX # define CACHE_LINE 256 diff --git a/openmp/runtime/src/kmp_ftn_entry.h b/openmp/runtime/src/kmp_ftn_entry.h index d54c5bfd10fe..713561734c48 100644 --- a/openmp/runtime/src/kmp_ftn_entry.h +++ b/openmp/runtime/src/kmp_ftn_entry.h @@ -582,7 +582,7 @@ int FTN_STDCALL KMP_EXPAND_NAME(FTN_GET_THREAD_NUM)(void) { int gtid; #if KMP_OS_DARWIN || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD || \ - KMP_OS_OPENBSD || KMP_OS_HURD || KMP_OS_SOLARIS + KMP_OS_OPENBSD || KMP_OS_HURD || KMP_OS_SOLARIS || KMP_OS_AIX gtid = __kmp_entry_gtid(); #elif KMP_OS_WINDOWS if (!__kmp_init_parallel || diff --git a/openmp/runtime/src/kmp_global.cpp b/openmp/runtime/src/kmp_global.cpp index b132f38fd3b0..5017cd3de4be 100644 --- a/openmp/runtime/src/kmp_global.cpp +++ b/openmp/runtime/src/kmp_global.cpp @@ -172,7 +172,7 @@ int __kmp_ncores = 0; int __kmp_chunk = 0; int __kmp_force_monotonic = 0; int __kmp_abort_delay = 0; -#if KMP_OS_LINUX && defined(KMP_TDATA_GTID) +#if (KMP_OS_LINUX || KMP_OS_AIX) && defined(KMP_TDATA_GTID) int __kmp_gtid_mode = 3; /* use __declspec(thread) TLS to store gtid */ int __kmp_adjust_gtid_mode = FALSE; #elif KMP_OS_WINDOWS diff --git a/openmp/runtime/src/kmp_gsupport.cpp b/openmp/runtime/src/kmp_gsupport.cpp index 78af39533549..88189659a234 100644 --- a/openmp/runtime/src/kmp_gsupport.cpp +++ b/openmp/runtime/src/kmp_gsupport.cpp @@ -357,7 +357,8 @@ void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_ORDERED_END)(void) { // They come in two flavors: 64-bit unsigned, and either 32-bit signed // (IA-32 architecture) or 64-bit signed (Intel(R) 64). -#if KMP_ARCH_X86 || KMP_ARCH_ARM || KMP_ARCH_MIPS || KMP_ARCH_WASM +#if KMP_ARCH_X86 || KMP_ARCH_ARM || KMP_ARCH_MIPS || KMP_ARCH_WASM || \ + KMP_ARCH_PPC #define KMP_DISPATCH_INIT __kmp_aux_dispatch_init_4 #define KMP_DISPATCH_FINI_CHUNK __kmp_aux_dispatch_fini_chunk_4 #define KMP_DISPATCH_NEXT __kmpc_dispatch_next_4 diff --git a/openmp/runtime/src/kmp_os.h b/openmp/runtime/src/kmp_os.h index 4ffe9f2d8c95..6862fd89b630 100644 --- a/openmp/runtime/src/kmp_os.h +++ b/openmp/runtime/src/kmp_os.h @@ -176,7 +176,8 @@ typedef unsigned long long kmp_uint64; #define KMP_UINT64_SPEC "llu" #endif /* KMP_OS_UNIX */ -#if KMP_ARCH_X86 || KMP_ARCH_ARM || KMP_ARCH_MIPS || KMP_ARCH_WASM +#if KMP_ARCH_X86 || KMP_ARCH_ARM || KMP_ARCH_MIPS || KMP_ARCH_WASM || \ + KMP_ARCH_PPC #define KMP_SIZE_T_SPEC KMP_UINT32_SPEC #elif KMP_ARCH_X86_64 || KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || \ KMP_ARCH_MIPS64 || KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64 || \ @@ -186,7 +187,7 @@ typedef unsigned long long kmp_uint64; #error "Can't determine size_t printf format specifier." #endif -#if KMP_ARCH_X86 || KMP_ARCH_ARM || KMP_ARCH_WASM +#if KMP_ARCH_X86 || KMP_ARCH_ARM || KMP_ARCH_WASM || KMP_ARCH_PPC #define KMP_SIZE_T_MAX (0xFFFFFFFF) #else #define KMP_SIZE_T_MAX (0xFFFFFFFFFFFFFFFF) @@ -1046,7 +1047,7 @@ extern kmp_real64 __kmp_xchg_real64(volatile kmp_real64 *p, kmp_real64 v); #if KMP_ARCH_PPC64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS || \ KMP_ARCH_MIPS64 || KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64 || \ - KMP_ARCH_VE || KMP_ARCH_S390X + KMP_ARCH_VE || KMP_ARCH_S390X || KMP_ARCH_PPC #if KMP_OS_WINDOWS #undef KMP_MB #define KMP_MB() std::atomic_thread_fence(std::memory_order_seq_cst) @@ -1146,7 +1147,7 @@ extern kmp_real64 __kmp_xchg_real64(volatile kmp_real64 *p, kmp_real64 v); KMP_COMPARE_AND_STORE_REL64((volatile kmp_int64 *)(volatile void *)&(a), \ (kmp_int64)(b), (kmp_int64)(c)) -#if KMP_ARCH_X86 || KMP_ARCH_MIPS || KMP_ARCH_WASM +#if KMP_ARCH_X86 || KMP_ARCH_MIPS || KMP_ARCH_WASM || KMP_ARCH_PPC // What about ARM? #define TCR_PTR(a) ((void *)TCR_4(a)) #define TCW_PTR(a, b) TCW_4((a), (b)) diff --git a/openmp/runtime/src/kmp_platform.h b/openmp/runtime/src/kmp_platform.h index 45f411b9c219..c06f46db2d49 100644 --- a/openmp/runtime/src/kmp_platform.h +++ b/openmp/runtime/src/kmp_platform.h @@ -82,15 +82,20 @@ #define KMP_OS_WASI 1 #endif +#if (defined _AIX) +#undef KMP_OS_AIX +#define KMP_OS_AIX 1 +#endif + #if (1 != KMP_OS_LINUX + KMP_OS_DRAGONFLY + KMP_OS_FREEBSD + KMP_OS_NETBSD + \ KMP_OS_OPENBSD + KMP_OS_DARWIN + KMP_OS_WINDOWS + KMP_OS_HURD + \ - KMP_OS_SOLARIS + KMP_OS_WASI) + KMP_OS_SOLARIS + KMP_OS_WASI + KMP_OS_AIX) #error Unknown OS #endif #if KMP_OS_LINUX || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD || \ KMP_OS_OPENBSD || KMP_OS_DARWIN || KMP_OS_HURD || KMP_OS_SOLARIS || \ - KMP_OS_WASI + KMP_OS_WASI || KMP_OS_AIX #undef KMP_OS_UNIX #define KMP_OS_UNIX 1 #endif @@ -102,7 +107,8 @@ #define KMP_ARCH_AARCH64 0 #define KMP_ARCH_PPC64_ELFv1 0 #define KMP_ARCH_PPC64_ELFv2 0 -#define KMP_ARCH_PPC64 (KMP_ARCH_PPC64_ELFv2 || KMP_ARCH_PPC64_ELFv1) +#define KMP_ARCH_PPC64_XCOFF 0 +#define KMP_ARCH_PPC_XCOFF 0 #define KMP_ARCH_MIPS 0 #define KMP_ARCH_MIPS64 0 #define KMP_ARCH_RISCV64 0 @@ -134,13 +140,23 @@ #undef KMP_ARCH_X86 #define KMP_ARCH_X86 1 #elif defined __powerpc64__ -#if defined(_CALL_ELF) && _CALL_ELF == 2 +#if defined(_CALL_ELF) +#if _CALL_ELF == 2 #undef KMP_ARCH_PPC64_ELFv2 #define KMP_ARCH_PPC64_ELFv2 1 #else #undef KMP_ARCH_PPC64_ELFv1 #define KMP_ARCH_PPC64_ELFv1 1 #endif +#elif defined KMP_OS_AIX +#undef KMP_ARCH_PPC64_XCOFF +#define KMP_ARCH_PPC64_XCOFF 1 +#endif +#elif defined(__powerpc__) && defined(KMP_OS_AIX) +#undef KMP_ARCH_PPC_XCOFF +#define KMP_ARCH_PPC_XCOFF 1 +#undef KMP_ARCH_PPC +#define KMP_ARCH_PPC 1 #elif defined __aarch64__ #undef KMP_ARCH_AARCH64 #define KMP_ARCH_AARCH64 1 @@ -207,6 +223,9 @@ #define KMP_ARCH_WASM 1 #endif +#define KMP_ARCH_PPC64 \ + (KMP_ARCH_PPC64_ELFv2 || KMP_ARCH_PPC64_ELFv1 || KMP_ARCH_PPC64_XCOFF) + #if defined(__MIC__) || defined(__MIC2__) #define KMP_MIC 1 #if __MIC2__ || __KNC__ @@ -224,7 +243,8 @@ /* Specify 32 bit architectures here */ #define KMP_32_BIT_ARCH \ - (KMP_ARCH_X86 || KMP_ARCH_ARM || KMP_ARCH_MIPS || KMP_ARCH_WASM) + (KMP_ARCH_X86 || KMP_ARCH_ARM || KMP_ARCH_MIPS || KMP_ARCH_WASM || \ + KMP_ARCH_PPC) // Platforms which support Intel(R) Many Integrated Core Architecture #define KMP_MIC_SUPPORTED \ @@ -234,7 +254,7 @@ #if (1 != KMP_ARCH_X86 + KMP_ARCH_X86_64 + KMP_ARCH_ARM + KMP_ARCH_PPC64 + \ KMP_ARCH_AARCH64 + KMP_ARCH_MIPS + KMP_ARCH_MIPS64 + \ KMP_ARCH_RISCV64 + KMP_ARCH_LOONGARCH64 + KMP_ARCH_VE + \ - KMP_ARCH_S390X + KMP_ARCH_WASM) + KMP_ARCH_S390X + KMP_ARCH_WASM + KMP_ARCH_PPC) #error Unknown or unsupported architecture #endif diff --git a/openmp/runtime/src/kmp_runtime.cpp b/openmp/runtime/src/kmp_runtime.cpp index 4e1074a893a2..fc5e8405a415 100644 --- a/openmp/runtime/src/kmp_runtime.cpp +++ b/openmp/runtime/src/kmp_runtime.cpp @@ -8901,7 +8901,7 @@ __kmp_determine_reduction_method( #if KMP_OS_LINUX || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD || \ KMP_OS_OPENBSD || KMP_OS_WINDOWS || KMP_OS_DARWIN || KMP_OS_HURD || \ - KMP_OS_SOLARIS || KMP_OS_WASI + KMP_OS_SOLARIS || KMP_OS_WASI || KMP_OS_AIX int teamsize_cutoff = 4; @@ -8926,14 +8926,14 @@ __kmp_determine_reduction_method( #error "Unknown or unsupported OS" #endif // KMP_OS_LINUX || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD || // KMP_OS_OPENBSD || KMP_OS_WINDOWS || KMP_OS_DARWIN || KMP_OS_HURD || - // KMP_OS_SOLARIS || KMP_OS_WASI + // KMP_OS_SOLARIS || KMP_OS_WASI || KMP_OS_AIX #elif KMP_ARCH_X86 || KMP_ARCH_ARM || KMP_ARCH_AARCH || KMP_ARCH_MIPS || \ - KMP_ARCH_WASM + KMP_ARCH_WASM || KMP_ARCH_PPC #if KMP_OS_LINUX || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD || \ KMP_OS_OPENBSD || KMP_OS_WINDOWS || KMP_OS_HURD || KMP_OS_SOLARIS || \ - KMP_OS_WASI + KMP_OS_WASI || KMP_OS_AIX // basic tuning diff --git a/openmp/runtime/src/kmp_settings.cpp b/openmp/runtime/src/kmp_settings.cpp index e731bf45e8ee..30a4c05fe76b 100644 --- a/openmp/runtime/src/kmp_settings.cpp +++ b/openmp/runtime/src/kmp_settings.cpp @@ -6171,9 +6171,9 @@ void __kmp_env_initialize(char const *string) { // specifier, even as substrings. // // I can't find a case-insensitive version of strstr on Windows* OS. - // Use the case-sensitive version for now. + // Use the case-sensitive version for now. AIX does the same. -#if KMP_OS_WINDOWS +#if KMP_OS_WINDOWS || KMP_OS_AIX #define FIND strstr #else #define FIND strcasestr diff --git a/openmp/runtime/src/kmp_wrapper_getpid.h b/openmp/runtime/src/kmp_wrapper_getpid.h index f9d7f4804fbc..d31c6e80f75d 100644 --- a/openmp/runtime/src/kmp_wrapper_getpid.h +++ b/openmp/runtime/src/kmp_wrapper_getpid.h @@ -17,7 +17,9 @@ // On Unix-like systems (Linux* OS and OS X*) getpid() is declared in standard // headers. +#if !defined(KMP_OS_AIX) #include <sys/syscall.h> +#endif #include <sys/types.h> #include <unistd.h> #if KMP_OS_DARWIN @@ -31,6 +33,9 @@ #define __kmp_gettid() _lwp_self() #elif KMP_OS_OPENBSD #define __kmp_gettid() getthrid() +#elif KMP_OS_AIX +#include <pthread.h> +#define __kmp_gettid() pthread_self() #elif defined(SYS_gettid) // Hopefully other Unix systems define SYS_gettid syscall for getting os thread // id diff --git a/openmp/runtime/src/z_Linux_util.cpp b/openmp/runtime/src/z_Linux_util.cpp index c2df8895e887..f01fa647c4d4 100644 --- a/openmp/runtime/src/z_Linux_util.cpp +++ b/openmp/runtime/src/z_Linux_util.cpp @@ -29,7 +29,9 @@ #include <semaphore.h> #endif // KMP_OS_LINUX #include <sys/resource.h> +#if !KMP_OS_AIX #include <sys/syscall.h> +#endif #include <sys/time.h> #include <sys/times.h> #include <unistd.h> @@ -1832,7 +1834,7 @@ static int __kmp_get_xproc(void) { __kmp_type_convert(sysconf(_SC_NPROCESSORS_CONF), &(r)); #elif KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD || KMP_OS_OPENBSD || \ - KMP_OS_HURD || KMP_OS_SOLARIS || KMP_OS_WASI + KMP_OS_HURD || KMP_OS_SOLARIS || KMP_OS_WASI || KMP_OS_AIX __kmp_type_convert(sysconf(_SC_NPROCESSORS_ONLN), &(r)); @@ -2210,9 +2212,9 @@ int __kmp_is_address_mapped(void *addr) { } #elif KMP_OS_WASI found = (int)addr < (__builtin_wasm_memory_size(0) * PAGESIZE); -#elif KMP_OS_DRAGONFLY || KMP_OS_SOLARIS +#elif KMP_OS_DRAGONFLY || KMP_OS_SOLARIS || KMP_OS_AIX - // FIXME(DragonFly, Solaris): Implement this + // FIXME(DragonFly, Solaris, AIX): Implement this found = 1; #else @@ -2317,7 +2319,7 @@ int __kmp_get_load_balance(int max) { // Open "/proc/" directory. proc_dir = opendir("/proc"); if (proc_dir == NULL) { - // Cannot open "/prroc/". Probably the kernel does not support it. Return an + // Cannot open "/proc/". Probably the kernel does not support it. Return an // error now and in subsequent calls. running_threads = -1; permanent_error = 1; @@ -2330,9 +2332,14 @@ int __kmp_get_load_balance(int max) { proc_entry = readdir(proc_dir); while (proc_entry != NULL) { +#if KMP_OS_AIX + // Proc entry name starts with a digit. Assume it is a process' directory. + if (isdigit(proc_entry->d_name[0])) { +#else // Proc entry is a directory and name starts with a digit. Assume it is a // process' directory. if (proc_entry->d_type == DT_DIR && isdigit(proc_entry->d_name[0])) { +#endif #ifdef KMP_DEBUG ++total_processes; @@ -2376,7 +2383,11 @@ int __kmp_get_load_balance(int max) { task_entry = readdir(task_dir); while (task_entry != NULL) { // It is a directory and name starts with a digit. +#if KMP_OS_AIX + if (isdigit(task_entry->d_name[0])) { +#else if (proc_entry->d_type == DT_DIR && isdigit(task_entry->d_name[0])) { +#endif // Construct complete stat file path. Easiest way would be: // __kmp_str_buf_print( & stat_path, "%s/%s/stat", task_path.str, @@ -2486,7 +2497,7 @@ finish: // Clean up and exit. #if !(KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_MIC || \ ((KMP_OS_LINUX || KMP_OS_DARWIN) && KMP_ARCH_AARCH64) || \ KMP_ARCH_PPC64 || KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64 || \ - KMP_ARCH_ARM || KMP_ARCH_VE || KMP_ARCH_S390X) + KMP_ARCH_ARM || KMP_ARCH_VE || KMP_ARCH_S390X || KMP_ARCH_PPC_XCOFF) // we really only need the case with 1 argument, because CLANG always build // a struct of pointers to shared variables referenced in the outlined function |