diff options
83 files changed, 3235 insertions, 1019 deletions
diff --git a/clang/include/clang/Basic/BuiltinsAArch64.def b/clang/include/clang/Basic/BuiltinsAArch64.def index 1dac5d2371d4..634bcaed20a6 100644 --- a/clang/include/clang/Basic/BuiltinsAArch64.def +++ b/clang/include/clang/Basic/BuiltinsAArch64.def @@ -243,6 +243,9 @@ TARGET_HEADER_BUILTIN(_ReadStatusReg, "LLii", "nh", "intrin.h", ALL_MS_LANGUAG TARGET_HEADER_BUILTIN(_WriteStatusReg, "viLLi", "nh", "intrin.h", ALL_MS_LANGUAGES, "") TARGET_HEADER_BUILTIN(_AddressOfReturnAddress, "v*", "nh", "intrin.h", ALL_MS_LANGUAGES, "") +TARGET_HEADER_BUILTIN(__mulh, "SLLiSLLiSLLi", "nh", "intrin.h", ALL_MS_LANGUAGES, "") +TARGET_HEADER_BUILTIN(__umulh, "ULLiULLiULLi", "nh", "intrin.h", ALL_MS_LANGUAGES, "") + #undef BUILTIN #undef LANGBUILTIN #undef TARGET_HEADER_BUILTIN diff --git a/clang/include/clang/Basic/LangOptions.h b/clang/include/clang/Basic/LangOptions.h index 71cf0c65e692..b60b94a1ba08 100644 --- a/clang/include/clang/Basic/LangOptions.h +++ b/clang/include/clang/Basic/LangOptions.h @@ -354,6 +354,9 @@ public: /// A list of all -fno-builtin-* function names (e.g., memset). std::vector<std::string> NoBuiltinFuncs; + /// A prefix map for __FILE__, __BASE_FILE__ and __builtin_FILE(). + std::map<std::string, std::string, std::greater<std::string>> MacroPrefixMap; + /// Triples of the OpenMP targets that the host code codegen should /// take into account in order to generate accurate offloading descriptors. std::vector<llvm::Triple> OMPTargetTriples; @@ -460,6 +463,9 @@ public: } bool isSYCL() const { return SYCLIsDevice || SYCLIsHost; } + + /// Remap path prefix according to -fmacro-prefix-path option. + void remapPathPrefix(SmallString<256> &Path) const; }; /// Floating point control options diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index ab1a5487d9c0..a0cbcae0bdc3 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -2825,10 +2825,10 @@ def fcoverage_prefix_map_EQ HelpText<"remap file source paths in coverage mapping">; def ffile_prefix_map_EQ : Joined<["-"], "ffile-prefix-map=">, Group<f_Group>, - HelpText<"remap file source paths in debug info and predefined preprocessor macros">; + HelpText<"remap file source paths in debug info, predefined preprocessor macros and __builtin_FILE()">; def fmacro_prefix_map_EQ - : Joined<["-"], "fmacro-prefix-map=">, Group<Preprocessor_Group>, Flags<[CC1Option]>, - HelpText<"remap file source paths in predefined preprocessor macros">; + : Joined<["-"], "fmacro-prefix-map=">, Group<f_Group>, Flags<[CC1Option]>, + HelpText<"remap file source paths in predefined preprocessor macros and __builtin_FILE()">; defm force_dwarf_frame : BoolFOption<"force-dwarf-frame", CodeGenOpts<"ForceDwarfFrameSection">, DefaultFalse, PosFlag<SetTrue, [CC1Option], "Always emit a debug frame section">, NegFlag<SetFalse>>; diff --git a/clang/include/clang/Lex/PreprocessorOptions.h b/clang/include/clang/Lex/PreprocessorOptions.h index 99085b98fc7a..a7aabc3e1df2 100644 --- a/clang/include/clang/Lex/PreprocessorOptions.h +++ b/clang/include/clang/Lex/PreprocessorOptions.h @@ -199,9 +199,6 @@ public: /// build it again. std::shared_ptr<FailedModulesSet> FailedModules; - /// A prefix map for __FILE__ and __BASE_FILE__. - std::map<std::string, std::string, std::greater<std::string>> MacroPrefixMap; - /// Contains the currently active skipped range mappings for skipping excluded /// conditional directives. /// diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index 83a2d132bf6a..d8b2546b81a3 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -7828,8 +7828,7 @@ public: TemplateArgumentLoc &Arg, SmallVectorImpl<TemplateArgument> &Converted); - bool CheckTemplateArgument(TemplateTypeParmDecl *Param, - TypeSourceInfo *Arg); + bool CheckTemplateArgument(TypeSourceInfo *Arg); ExprResult CheckTemplateArgument(NonTypeTemplateParmDecl *Param, QualType InstantiatedParamType, Expr *Arg, TemplateArgument &Converted, diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp index e102a3ba508d..fdba204fbe7f 100644 --- a/clang/lib/AST/ASTContext.cpp +++ b/clang/lib/AST/ASTContext.cpp @@ -6066,9 +6066,11 @@ ASTContext::getCanonicalNestedNameSpecifier(NestedNameSpecifier *NNS) const { NNS->getAsNamespaceAlias()->getNamespace() ->getOriginalNamespace()); + // The difference between TypeSpec and TypeSpecWithTemplate is that the + // latter will have the 'template' keyword when printed. case NestedNameSpecifier::TypeSpec: case NestedNameSpecifier::TypeSpecWithTemplate: { - QualType T = getCanonicalType(QualType(NNS->getAsType(), 0)); + const Type *T = getCanonicalType(NNS->getAsType()); // If we have some kind of dependent-named type (e.g., "typename T::type"), // break it apart into its prefix and identifier, then reconsititute those @@ -6078,14 +6080,16 @@ ASTContext::getCanonicalNestedNameSpecifier(NestedNameSpecifier *NNS) const { // typedef typename T::type T1; // typedef typename T1::type T2; if (const auto *DNT = T->getAs<DependentNameType>()) - return NestedNameSpecifier::Create(*this, DNT->getQualifier(), - const_cast<IdentifierInfo *>(DNT->getIdentifier())); - - // Otherwise, just canonicalize the type, and force it to be a TypeSpec. - // FIXME: Why are TypeSpec and TypeSpecWithTemplate distinct in the - // first place? + return NestedNameSpecifier::Create( + *this, DNT->getQualifier(), + const_cast<IdentifierInfo *>(DNT->getIdentifier())); + if (const auto *DTST = T->getAs<DependentTemplateSpecializationType>()) + return NestedNameSpecifier::Create(*this, DTST->getQualifier(), true, + const_cast<Type *>(T)); + + // TODO: Set 'Template' parameter to true for other template types. return NestedNameSpecifier::Create(*this, nullptr, false, - const_cast<Type *>(T.getTypePtr())); + const_cast<Type *>(T)); } case NestedNameSpecifier::Global: diff --git a/clang/lib/AST/Expr.cpp b/clang/lib/AST/Expr.cpp index e8b4aaa2b81e..11f10d4695fc 100644 --- a/clang/lib/AST/Expr.cpp +++ b/clang/lib/AST/Expr.cpp @@ -2233,8 +2233,11 @@ APValue SourceLocExpr::EvaluateInContext(const ASTContext &Ctx, }; switch (getIdentKind()) { - case SourceLocExpr::File: - return MakeStringLiteral(PLoc.getFilename()); + case SourceLocExpr::File: { + SmallString<256> Path(PLoc.getFilename()); + Ctx.getLangOpts().remapPathPrefix(Path); + return MakeStringLiteral(Path); + } case SourceLocExpr::Function: { const Decl *CurDecl = dyn_cast_or_null<Decl>(Context); return MakeStringLiteral( diff --git a/clang/lib/Basic/LangOptions.cpp b/clang/lib/Basic/LangOptions.cpp index dc392d5352aa..bebf3178426f 100644 --- a/clang/lib/Basic/LangOptions.cpp +++ b/clang/lib/Basic/LangOptions.cpp @@ -11,6 +11,8 @@ //===----------------------------------------------------------------------===// #include "clang/Basic/LangOptions.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/Support/Path.h" using namespace clang; @@ -48,6 +50,12 @@ VersionTuple LangOptions::getOpenCLVersionTuple() const { return VersionTuple(Ver / 100, (Ver % 100) / 10); } +void LangOptions::remapPathPrefix(SmallString<256> &Path) const { + for (const auto &Entry : MacroPrefixMap) + if (llvm::sys::path::replace_path_prefix(Path, Entry.first, Entry.second)) + break; +} + FPOptions FPOptions::defaultWithoutTrailingStorage(const LangOptions &LO) { FPOptions result(LO); return result; diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index d9b2a5fe16be..1a02965b223e 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -9732,6 +9732,29 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, return Builder.CreateCall(F); } + if (BuiltinID == AArch64::BI__mulh || BuiltinID == AArch64::BI__umulh) { + llvm::Type *ResType = ConvertType(E->getType()); + llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128); + + bool IsSigned = BuiltinID == AArch64::BI__mulh; + Value *LHS = + Builder.CreateIntCast(EmitScalarExpr(E->getArg(0)), Int128Ty, IsSigned); + Value *RHS = + Builder.CreateIntCast(EmitScalarExpr(E->getArg(1)), Int128Ty, IsSigned); + + Value *MulResult, *HigherBits; + if (IsSigned) { + MulResult = Builder.CreateNSWMul(LHS, RHS); + HigherBits = Builder.CreateAShr(MulResult, 64); + } else { + MulResult = Builder.CreateNUWMul(LHS, RHS); + HigherBits = Builder.CreateLShr(MulResult, 64); + } + HigherBits = Builder.CreateIntCast(HigherBits, ResType, IsSigned); + + return HigherBits; + } + // Handle MSVC intrinsics before argument evaluation to prevent double // evaluation. if (Optional<MSVCIntrin> MsvcIntId = translateAarch64ToMsvcIntrin(BuiltinID)) diff --git a/clang/lib/CodeGen/CGDeclCXX.cpp b/clang/lib/CodeGen/CGDeclCXX.cpp index d43fb99550a8..553fedebfe56 100644 --- a/clang/lib/CodeGen/CGDeclCXX.cpp +++ b/clang/lib/CodeGen/CGDeclCXX.cpp @@ -555,7 +555,8 @@ CodeGenModule::EmitCXXGlobalVarDeclInitFunc(const VarDecl *D, PrioritizedCXXGlobalInits.size()); PrioritizedCXXGlobalInits.push_back(std::make_pair(Key, Fn)); } else if (isTemplateInstantiation(D->getTemplateSpecializationKind()) || - getContext().GetGVALinkageForVariable(D) == GVA_DiscardableODR) { + getContext().GetGVALinkageForVariable(D) == GVA_DiscardableODR || + D->hasAttr<SelectAnyAttr>()) { // C++ [basic.start.init]p2: // Definitions of explicitly specialized class template static data // members have ordered initialization. Other class template static data @@ -568,17 +569,18 @@ CodeGenModule::EmitCXXGlobalVarDeclInitFunc(const VarDecl *D, // group with the global being initialized. On most platforms, this is a // minor startup time optimization. In the MS C++ ABI, there are no guard // variables, so this COMDAT key is required for correctness. - AddGlobalCtor(Fn, 65535, COMDATKey); - if (getTarget().getCXXABI().isMicrosoft() && COMDATKey) { - // In The MS C++, MS add template static data member in the linker - // drective. - addUsedGlobal(COMDATKey); - } - } else if (D->hasAttr<SelectAnyAttr>()) { + // // SelectAny globals will be comdat-folded. Put the initializer into a // COMDAT group associated with the global, so the initializers get folded // too. + AddGlobalCtor(Fn, 65535, COMDATKey); + if (COMDATKey && (getTriple().isOSBinFormatELF() || + getTarget().getCXXABI().isMicrosoft())) { + // When COMDAT is used on ELF or in the MS C++ ABI, the key must be in + // llvm.used to prevent linker GC. + addUsedGlobal(COMDATKey); + } } else { I = DelayedCXXInitPosition.find(D); // Re-do lookup in case of re-hash. if (I == DelayedCXXInitPosition.end()) { diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index 9b40b88ea3c9..49a1396b58e3 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -186,7 +186,7 @@ CodeGenModule::CodeGenModule(ASTContext &C, const HeaderSearchOptions &HSO, !getModule().getSourceFileName().empty()) { std::string Path = getModule().getSourceFileName(); // Check if a path substitution is needed from the MacroPrefixMap. - for (const auto &Entry : PPO.MacroPrefixMap) + for (const auto &Entry : LangOpts.MacroPrefixMap) if (Path.rfind(Entry.first, 0) != std::string::npos) { Path = Entry.second + Path.substr(Entry.first.size()); break; diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 1870bd81789c..4c8ba8cdcd29 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -2637,7 +2637,7 @@ static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D, llvm::DenormalMode DenormalFPMath = DefaultDenormalFPMath; llvm::DenormalMode DenormalFP32Math = DefaultDenormalFP32Math; - StringRef FPContract = "on"; + StringRef FPContract = ""; bool StrictFPModel = false; @@ -2662,7 +2662,7 @@ static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D, ReciprocalMath = false; SignedZeros = true; // -fno_fast_math restores default denormal and fpcontract handling - FPContract = "on"; + FPContract = ""; DenormalFPMath = llvm::DenormalMode::getIEEE(); // FIXME: The target may have picked a non-IEEE default mode here based on @@ -2682,18 +2682,20 @@ static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D, // ffp-model= is a Driver option, it is entirely rewritten into more // granular options before being passed into cc1. // Use the gcc option in the switch below. - if (!FPModel.empty() && !FPModel.equals(Val)) + if (!FPModel.empty() && !FPModel.equals(Val)) { D.Diag(clang::diag::warn_drv_overriding_flag_option) << Args.MakeArgString("-ffp-model=" + FPModel) << Args.MakeArgString("-ffp-model=" + Val); + FPContract = ""; + } if (Val.equals("fast")) { optID = options::OPT_ffast_math; FPModel = Val; - FPContract = Val; + FPContract = "fast"; } else if (Val.equals("precise")) { optID = options::OPT_ffp_contract; FPModel = Val; - FPContract = "on"; + FPContract = "fast"; PreciseFPModel = true; } else if (Val.equals("strict")) { StrictFPModel = true; @@ -2779,11 +2781,9 @@ static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D, case options::OPT_ffp_contract: { StringRef Val = A->getValue(); if (PreciseFPModel) { - // When -ffp-model=precise is seen on the command line, - // the boolean PreciseFPModel is set to true which indicates - // "the current option is actually PreciseFPModel". The optID - // is changed to OPT_ffp_contract and FPContract is set to "on". - // the argument Val string is "precise": it shouldn't be checked. + // -ffp-model=precise enables ffp-contract=fast as a side effect + // the FPContract value has already been set to a string literal + // and the Val string isn't a pertinent value. ; } else if (Val.equals("fast") || Val.equals("on") || Val.equals("off")) FPContract = Val; @@ -2881,17 +2881,18 @@ static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D, // -fno_fast_math restores default denormal and fpcontract handling DenormalFPMath = DefaultDenormalFPMath; DenormalFP32Math = llvm::DenormalMode::getIEEE(); - FPContract = "on"; + FPContract = ""; break; } if (StrictFPModel) { // If -ffp-model=strict has been specified on command line but // subsequent options conflict then emit warning diagnostic. - if (HonorINFs && HonorNaNs && !AssociativeMath && !ReciprocalMath && - SignedZeros && TrappingMath && RoundingFPMath && - DenormalFPMath == llvm::DenormalMode::getIEEE() && - DenormalFP32Math == llvm::DenormalMode::getIEEE() && - FPContract.equals("off")) + if (HonorINFs && HonorNaNs && + !AssociativeMath && !ReciprocalMath && + SignedZeros && TrappingMath && RoundingFPMath && + (FPContract.equals("off") || FPContract.empty()) && + DenormalFPMath == llvm::DenormalMode::getIEEE() && + DenormalFP32Math == llvm::DenormalMode::getIEEE()) // OK: Current Arg doesn't conflict with -ffp-model=strict ; else { @@ -7690,8 +7691,11 @@ void OffloadBundler::ConstructJob(Compilation &C, const JobAction &JA, assert(CurTC == nullptr && "Expected one dependence!"); CurTC = TC; }); + UB += C.addTempFile( + C.getArgs().MakeArgString(CurTC->getInputFilename(Inputs[I]))); + } else { + UB += CurTC->getInputFilename(Inputs[I]); } - UB += CurTC->getInputFilename(Inputs[I]); } CmdArgs.push_back(TCArgs.MakeArgString(UB)); diff --git a/clang/lib/Driver/ToolChains/Hexagon.cpp b/clang/lib/Driver/ToolChains/Hexagon.cpp index 828bfdbb05a3..314d0efce441 100644 --- a/clang/lib/Driver/ToolChains/Hexagon.cpp +++ b/clang/lib/Driver/ToolChains/Hexagon.cpp @@ -588,21 +588,43 @@ void HexagonToolChain::addClangTargetOptions(const ArgList &DriverArgs, void HexagonToolChain::AddClangSystemIncludeArgs(const ArgList &DriverArgs, ArgStringList &CC1Args) const { - if (DriverArgs.hasArg(options::OPT_nostdinc) || - DriverArgs.hasArg(options::OPT_nostdlibinc)) + if (DriverArgs.hasArg(options::OPT_nostdinc)) return; + const bool IsELF = !getTriple().isMusl() && !getTriple().isOSLinux(); + const bool IsLinuxMusl = getTriple().isMusl() && getTriple().isOSLinux(); + const Driver &D = getDriver(); - if (!D.SysRoot.empty()) { + SmallString<128> ResourceDirInclude(D.ResourceDir); + if (!IsELF) { + llvm::sys::path::append(ResourceDirInclude, "include"); + if (!DriverArgs.hasArg(options::OPT_nobuiltininc) && + (!IsLinuxMusl || DriverArgs.hasArg(options::OPT_nostdlibinc))) + addSystemInclude(DriverArgs, CC1Args, ResourceDirInclude); + } + if (DriverArgs.hasArg(options::OPT_nostdlibinc)) + return; + + const bool HasSysRoot = !D.SysRoot.empty(); + if (HasSysRoot) { SmallString<128> P(D.SysRoot); - if (getTriple().isMusl()) + if (IsLinuxMusl) llvm::sys::path::append(P, "usr/include"); else llvm::sys::path::append(P, "include"); + addExternCSystemInclude(DriverArgs, CC1Args, P.str()); - return; + // LOCAL_INCLUDE_DIR + addSystemInclude(DriverArgs, CC1Args, P + "/usr/local/include"); + // TOOL_INCLUDE_DIR + AddMultilibIncludeArgs(DriverArgs, CC1Args); } + if (!DriverArgs.hasArg(options::OPT_nobuiltininc) && IsLinuxMusl) + addSystemInclude(DriverArgs, CC1Args, ResourceDirInclude); + + if (HasSysRoot) + return; std::string TargetDir = getHexagonTargetDir(D.getInstalledDir(), D.PrefixDirs); addExternCSystemInclude(DriverArgs, CC1Args, TargetDir + "/hexagon/include"); diff --git a/clang/lib/Driver/ToolChains/MinGW.cpp b/clang/lib/Driver/ToolChains/MinGW.cpp index 20efbdc237a8..7ba729f36bd8 100644 --- a/clang/lib/Driver/ToolChains/MinGW.cpp +++ b/clang/lib/Driver/ToolChains/MinGW.cpp @@ -136,10 +136,13 @@ void tools::MinGW::Linker::ConstructJob(Compilation &C, const JobAction &JA, llvm_unreachable("Unsupported target architecture."); } - if (Args.hasArg(options::OPT_mwindows)) { + Arg *SubsysArg = + Args.getLastArg(options::OPT_mwindows, options::OPT_mconsole); + if (SubsysArg && SubsysArg->getOption().matches(options::OPT_mwindows)) { CmdArgs.push_back("--subsystem"); CmdArgs.push_back("windows"); - } else if (Args.hasArg(options::OPT_mconsole)) { + } else if (SubsysArg && + SubsysArg->getOption().matches(options::OPT_mconsole)) { CmdArgs.push_back("--subsystem"); CmdArgs.push_back("console"); } diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp index 33e5f3e99c45..7025028bc94a 100644 --- a/clang/lib/Frontend/CompilerInvocation.cpp +++ b/clang/lib/Frontend/CompilerInvocation.cpp @@ -3528,6 +3528,9 @@ void CompilerInvocation::GenerateLangArgs(const LangOptions &Opts, GenerateArg(Args, OPT_fexperimental_relative_cxx_abi_vtables, SA); else GenerateArg(Args, OPT_fno_experimental_relative_cxx_abi_vtables, SA); + + for (const auto &MP : Opts.MacroPrefixMap) + GenerateArg(Args, OPT_fmacro_prefix_map_EQ, MP.first + "=" + MP.second, SA); } bool CompilerInvocation::ParseLangArgs(LangOptions &Opts, ArgList &Args, @@ -4037,6 +4040,12 @@ bool CompilerInvocation::ParseLangArgs(LangOptions &Opts, ArgList &Args, options::OPT_fno_experimental_relative_cxx_abi_vtables, TargetCXXABI::usesRelativeVTables(T)); + for (const auto &A : Args.getAllArgValues(OPT_fmacro_prefix_map_EQ)) { + auto Split = StringRef(A).split('='); + Opts.MacroPrefixMap.insert( + {std::string(Split.first), std::string(Split.second)}); + } + return Diags.getNumErrors() == NumErrorsBefore; } @@ -4109,9 +4118,6 @@ static void GeneratePreprocessorArgs(PreprocessorOptions &Opts, for (const auto &D : Opts.DeserializedPCHDeclsToErrorOn) GenerateArg(Args, OPT_error_on_deserialized_pch_decl, D, SA); - for (const auto &MP : Opts.MacroPrefixMap) - GenerateArg(Args, OPT_fmacro_prefix_map_EQ, MP.first + "=" + MP.second, SA); - if (Opts.PrecompiledPreambleBytes != std::make_pair(0u, false)) GenerateArg(Args, OPT_preamble_bytes_EQ, Twine(Opts.PrecompiledPreambleBytes.first) + "," + @@ -4180,12 +4186,6 @@ static bool ParsePreprocessorArgs(PreprocessorOptions &Opts, ArgList &Args, for (const auto *A : Args.filtered(OPT_error_on_deserialized_pch_decl)) Opts.DeserializedPCHDeclsToErrorOn.insert(A->getValue()); - for (const auto &A : Args.getAllArgValues(OPT_fmacro_prefix_map_EQ)) { - auto Split = StringRef(A).split('='); - Opts.MacroPrefixMap.insert( - {std::string(Split.first), std::string(Split.second)}); - } - if (const Arg *A = Args.getLastArg(OPT_preamble_bytes_EQ)) { StringRef Value(A->getValue()); size_t Comma = Value.find(','); diff --git a/clang/lib/Headers/intrin.h b/clang/lib/Headers/intrin.h index ff8eb8fca268..34ec79d6acbc 100644 --- a/clang/lib/Headers/intrin.h +++ b/clang/lib/Headers/intrin.h @@ -574,6 +574,9 @@ void _WriteStatusReg(int, __int64); unsigned short __cdecl _byteswap_ushort(unsigned short val); unsigned long __cdecl _byteswap_ulong (unsigned long val); unsigned __int64 __cdecl _byteswap_uint64(unsigned __int64 val); + +__int64 __mulh(__int64 __a, __int64 __b); +unsigned __int64 __umulh(unsigned __int64 __a, unsigned __int64 __b); #endif /*----------------------------------------------------------------------------*\ diff --git a/clang/lib/Lex/PPMacroExpansion.cpp b/clang/lib/Lex/PPMacroExpansion.cpp index 8728ac9e2166..d8ad9d845e7a 100644 --- a/clang/lib/Lex/PPMacroExpansion.cpp +++ b/clang/lib/Lex/PPMacroExpansion.cpp @@ -1453,15 +1453,6 @@ static bool isTargetEnvironment(const TargetInfo &TI, return TI.getTriple().getEnvironment() == Env.getEnvironment(); } -static void remapMacroPath( - SmallString<256> &Path, - const std::map<std::string, std::string, std::greater<std::string>> - &MacroPrefixMap) { - for (const auto &Entry : MacroPrefixMap) - if (llvm::sys::path::replace_path_prefix(Path, Entry.first, Entry.second)) - break; -} - /// ExpandBuiltinMacro - If an identifier token is read that is to be expanded /// as a builtin macro, handle it and return the next token as 'Tok'. void Preprocessor::ExpandBuiltinMacro(Token &Tok) { @@ -1543,7 +1534,7 @@ void Preprocessor::ExpandBuiltinMacro(Token &Tok) { } else { FN += PLoc.getFilename(); } - remapMacroPath(FN, PPOpts->MacroPrefixMap); + getLangOpts().remapPathPrefix(FN); Lexer::Stringify(FN); OS << '"' << FN << '"'; } diff --git a/clang/lib/Sema/SemaConcept.cpp b/clang/lib/Sema/SemaConcept.cpp index f2c70d0a56ef..931c9e3e2738 100644 --- a/clang/lib/Sema/SemaConcept.cpp +++ b/clang/lib/Sema/SemaConcept.cpp @@ -742,22 +742,15 @@ Optional<NormalizedConstraint> NormalizedConstraint::fromConstraintExprs(Sema &S, NamedDecl *D, ArrayRef<const Expr *> E) { assert(E.size() != 0); - auto First = fromConstraintExpr(S, D, E[0]); - if (E.size() == 1) - return First; - auto Second = fromConstraintExpr(S, D, E[1]); - if (!Second) + auto Conjunction = fromConstraintExpr(S, D, E[0]); + if (!Conjunction) return None; - llvm::Optional<NormalizedConstraint> Conjunction; - Conjunction.emplace(S.Context, std::move(*First), std::move(*Second), - CCK_Conjunction); - for (unsigned I = 2; I < E.size(); ++I) { + for (unsigned I = 1; I < E.size(); ++I) { auto Next = fromConstraintExpr(S, D, E[I]); if (!Next) - return llvm::Optional<NormalizedConstraint>{}; - NormalizedConstraint NewConjunction(S.Context, std::move(*Conjunction), + return None; + *Conjunction = NormalizedConstraint(S.Context, std::move(*Conjunction), std::move(*Next), CCK_Conjunction); - *Conjunction = std::move(NewConjunction); } return Conjunction; } diff --git a/clang/lib/Sema/SemaDeclCXX.cpp b/clang/lib/Sema/SemaDeclCXX.cpp index 83c97626ff7e..da4f4f862095 100644 --- a/clang/lib/Sema/SemaDeclCXX.cpp +++ b/clang/lib/Sema/SemaDeclCXX.cpp @@ -12472,6 +12472,8 @@ bool Sema::CheckUsingDeclRedeclaration(SourceLocation UsingLoc, return false; } + const NestedNameSpecifier *CNNS = + Context.getCanonicalNestedNameSpecifier(Qual); for (LookupResult::iterator I = Prev.begin(), E = Prev.end(); I != E; ++I) { NamedDecl *D = *I; @@ -12497,8 +12499,7 @@ bool Sema::CheckUsingDeclRedeclaration(SourceLocation UsingLoc, // using decls differ if they name different scopes (but note that // template instantiation can cause this check to trigger when it // didn't before instantiation). - if (Context.getCanonicalNestedNameSpecifier(Qual) != - Context.getCanonicalNestedNameSpecifier(DQual)) + if (CNNS != Context.getCanonicalNestedNameSpecifier(DQual)) continue; Diag(NameLoc, diag::err_using_decl_redeclaration) << SS.getRange(); diff --git a/clang/lib/Sema/SemaTemplate.cpp b/clang/lib/Sema/SemaTemplate.cpp index 175388198324..5d26f2d2c11a 100644 --- a/clang/lib/Sema/SemaTemplate.cpp +++ b/clang/lib/Sema/SemaTemplate.cpp @@ -1079,7 +1079,7 @@ NamedDecl *Sema::ActOnTypeParameter(Scope *S, bool Typename, return Param; // Check the template argument itself. - if (CheckTemplateArgument(Param, DefaultTInfo)) { + if (CheckTemplateArgument(DefaultTInfo)) { Param->setInvalidDecl(); return Param; } @@ -5042,7 +5042,7 @@ bool Sema::CheckTemplateTypeArgument(TemplateTypeParmDecl *Param, } } - if (CheckTemplateArgument(Param, TSI)) + if (CheckTemplateArgument(TSI)) return true; // Add the converted template type argument. @@ -5661,7 +5661,7 @@ bool Sema::CheckTemplateArgumentList( TemplateArgumentListInfo NewArgs = TemplateArgs; // Make sure we get the template parameter list from the most - // recentdeclaration, since that is the only one that has is guaranteed to + // recent declaration, since that is the only one that is guaranteed to // have all the default template argument information. TemplateParameterList *Params = cast<TemplateDecl>(Template->getMostRecentDecl()) @@ -6208,8 +6208,7 @@ bool UnnamedLocalNoLinkageFinder::VisitNestedNameSpecifier( /// /// This routine implements the semantics of C++ [temp.arg.type]. It /// returns true if an error occurred, and false otherwise. -bool Sema::CheckTemplateArgument(TemplateTypeParmDecl *Param, - TypeSourceInfo *ArgInfo) { +bool Sema::CheckTemplateArgument(TypeSourceInfo *ArgInfo) { assert(ArgInfo && "invalid TypeSourceInfo"); QualType Arg = ArgInfo->getType(); SourceRange SR = ArgInfo->getTypeLoc().getSourceRange(); diff --git a/clang/lib/Sema/SemaTemplateInstantiate.cpp b/clang/lib/Sema/SemaTemplateInstantiate.cpp index f18f77d3442a..74889aa3ca88 100644 --- a/clang/lib/Sema/SemaTemplateInstantiate.cpp +++ b/clang/lib/Sema/SemaTemplateInstantiate.cpp @@ -1934,25 +1934,23 @@ TemplateInstantiator::TransformExprRequirement(concepts::ExprRequirement *Req) { return Req; Sema::SFINAETrap Trap(SemaRef); - TemplateDeductionInfo Info(Req->getExpr()->getBeginLoc()); llvm::PointerUnion<Expr *, concepts::Requirement::SubstitutionDiagnostic *> TransExpr; if (Req->isExprSubstitutionFailure()) TransExpr = Req->getExprSubstitutionDiagnostic(); else { - Sema::InstantiatingTemplate ExprInst(SemaRef, Req->getExpr()->getBeginLoc(), - Req, Info, - Req->getExpr()->getSourceRange()); + Expr *E = Req->getExpr(); + TemplateDeductionInfo Info(E->getBeginLoc()); + Sema::InstantiatingTemplate ExprInst(SemaRef, E->getBeginLoc(), Req, Info, + E->getSourceRange()); if (ExprInst.isInvalid()) return nullptr; - ExprResult TransExprRes = TransformExpr(Req->getExpr()); + ExprResult TransExprRes = TransformExpr(E); if (TransExprRes.isInvalid() || Trap.hasErrorOccurred()) - TransExpr = createSubstDiag(SemaRef, Info, - [&] (llvm::raw_ostream& OS) { - Req->getExpr()->printPretty(OS, nullptr, - SemaRef.getPrintingPolicy()); - }); + TransExpr = createSubstDiag(SemaRef, Info, [&](llvm::raw_ostream &OS) { + E->printPretty(OS, nullptr, SemaRef.getPrintingPolicy()); + }); else TransExpr = TransExprRes.get(); } @@ -1966,6 +1964,7 @@ TemplateInstantiator::TransformExprRequirement(concepts::ExprRequirement *Req) { else if (RetReq.isTypeConstraint()) { TemplateParameterList *OrigTPL = RetReq.getTypeConstraintTemplateParameterList(); + TemplateDeductionInfo Info(OrigTPL->getTemplateLoc()); Sema::InstantiatingTemplate TPLInst(SemaRef, OrigTPL->getTemplateLoc(), Req, Info, OrigTPL->getSourceRange()); if (TPLInst.isInvalid()) diff --git a/compiler-rt/include/profile/InstrProfData.inc b/compiler-rt/include/profile/InstrProfData.inc index 08a642469627..7d2097cfc297 100644 --- a/compiler-rt/include/profile/InstrProfData.inc +++ b/compiler-rt/include/profile/InstrProfData.inc @@ -129,6 +129,7 @@ INSTR_PROF_VALUE_NODE(PtrToNodeT, llvm::Type::getInt8PtrTy(Ctx), Next, \ #endif INSTR_PROF_RAW_HEADER(uint64_t, Magic, __llvm_profile_get_magic()) INSTR_PROF_RAW_HEADER(uint64_t, Version, __llvm_profile_get_version()) +INSTR_PROF_RAW_HEADER(uint64_t, BinaryIdsSize, __llvm_write_binary_ids(NULL)) INSTR_PROF_RAW_HEADER(uint64_t, DataSize, DataSize) INSTR_PROF_RAW_HEADER(uint64_t, PaddingBytesBeforeCounters, PaddingBytesBeforeCounters) INSTR_PROF_RAW_HEADER(uint64_t, CountersSize, CountersSize) @@ -137,7 +138,6 @@ INSTR_PROF_RAW_HEADER(uint64_t, NamesSize, NamesSize) INSTR_PROF_RAW_HEADER(uint64_t, CountersDelta, (uintptr_t)CountersBegin) INSTR_PROF_RAW_HEADER(uint64_t, NamesDelta, (uintptr_t)NamesBegin) INSTR_PROF_RAW_HEADER(uint64_t, ValueKindLast, IPVK_Last) -INSTR_PROF_RAW_HEADER(uint64_t, BinaryIdsSize, __llvm_write_binary_ids(NULL)) #undef INSTR_PROF_RAW_HEADER /* INSTR_PROF_RAW_HEADER end */ @@ -646,7 +646,7 @@ serializeValueProfDataFrom(ValueProfRecordClosure *Closure, (uint64_t)'f' << 16 | (uint64_t)'R' << 8 | (uint64_t)129 /* Raw profile format version (start from 1). */ -#define INSTR_PROF_RAW_VERSION 6 +#define INSTR_PROF_RAW_VERSION 7 /* Indexed profile format version (start from 1). */ #define INSTR_PROF_INDEX_VERSION 7 /* Coverage mapping format version (start from 0). */ diff --git a/compiler-rt/lib/profile/InstrProfilingBuffer.c b/compiler-rt/lib/profile/InstrProfilingBuffer.c index 21fa7ba1ddd6..68b4f5cd6f52 100644 --- a/compiler-rt/lib/profile/InstrProfilingBuffer.c +++ b/compiler-rt/lib/profile/InstrProfilingBuffer.c @@ -116,7 +116,7 @@ uint64_t __llvm_profile_get_size_for_buffer_internal( DataSize, CountersSize, NamesSize, &PaddingBytesBeforeCounters, &PaddingBytesAfterCounters, &PaddingBytesAfterNames); - return sizeof(__llvm_profile_header) + + return sizeof(__llvm_profile_header) + __llvm_write_binary_ids(NULL) + (DataSize * sizeof(__llvm_profile_data)) + PaddingBytesBeforeCounters + (CountersSize * sizeof(uint64_t)) + PaddingBytesAfterCounters + NamesSize + PaddingBytesAfterNames; diff --git a/compiler-rt/lib/profile/InstrProfilingMerge.c b/compiler-rt/lib/profile/InstrProfilingMerge.c index 913228513259..16ebc2f8b2a9 100644 --- a/compiler-rt/lib/profile/InstrProfilingMerge.c +++ b/compiler-rt/lib/profile/InstrProfilingMerge.c @@ -22,6 +22,7 @@ void (*VPMergeHook)(ValueProfData *, __llvm_profile_data *); COMPILER_RT_VISIBILITY uint64_t lprofGetLoadModuleSignature() { /* A very fast way to compute a module signature. */ + uint64_t Version = __llvm_profile_get_version(); uint64_t CounterSize = (uint64_t)(__llvm_profile_end_counters() - __llvm_profile_begin_counters()); uint64_t DataSize = __llvm_profile_get_data_size(__llvm_profile_begin_data(), @@ -33,7 +34,7 @@ uint64_t lprofGetLoadModuleSignature() { const __llvm_profile_data *FirstD = __llvm_profile_begin_data(); return (NamesSize << 40) + (CounterSize << 30) + (DataSize << 20) + - (NumVnodes << 10) + (DataSize > 0 ? FirstD->NameRef : 0); + (NumVnodes << 10) + (DataSize > 0 ? FirstD->NameRef : 0) + Version; } /* Returns 1 if profile is not structurally compatible. */ @@ -44,7 +45,8 @@ int __llvm_profile_check_compatibility(const char *ProfileData, __llvm_profile_header *Header = (__llvm_profile_header *)ProfileData; __llvm_profile_data *SrcDataStart, *SrcDataEnd, *SrcData, *DstData; SrcDataStart = - (__llvm_profile_data *)(ProfileData + sizeof(__llvm_profile_header)); + (__llvm_profile_data *)(ProfileData + sizeof(__llvm_profile_header) + + Header->BinaryIdsSize); SrcDataEnd = SrcDataStart + Header->DataSize; if (ProfileSize < sizeof(__llvm_profile_header)) @@ -63,7 +65,7 @@ int __llvm_profile_check_compatibility(const char *ProfileData, Header->ValueKindLast != IPVK_Last) return 1; - if (ProfileSize < sizeof(__llvm_profile_header) + + if (ProfileSize < sizeof(__llvm_profile_header) + Header->BinaryIdsSize + Header->DataSize * sizeof(__llvm_profile_data) + Header->NamesSize + Header->CountersSize) return 1; @@ -91,7 +93,8 @@ int __llvm_profile_merge_from_buffer(const char *ProfileData, const char *SrcValueProfDataStart, *SrcValueProfData; SrcDataStart = - (__llvm_profile_data *)(ProfileData + sizeof(__llvm_profile_header)); + (__llvm_profile_data *)(ProfileData + sizeof(__llvm_profile_header) + + Header->BinaryIdsSize); SrcDataEnd = SrcDataStart + Header->DataSize; SrcCountersStart = (uint64_t *)SrcDataEnd; SrcNameStart = (const char *)(SrcCountersStart + Header->CountersSize); diff --git a/compiler-rt/lib/profile/InstrProfilingPlatformLinux.c b/compiler-rt/lib/profile/InstrProfilingPlatformLinux.c index 508624a80cd6..7c15f97aff89 100644 --- a/compiler-rt/lib/profile/InstrProfilingPlatformLinux.c +++ b/compiler-rt/lib/profile/InstrProfilingPlatformLinux.c @@ -17,6 +17,15 @@ #include "InstrProfiling.h" #include "InstrProfilingInternal.h" +#if defined(__FreeBSD__) && !defined(ElfW) +/* + * FreeBSD's elf.h and link.h headers do not define the ElfW(type) macro yet. + * If this is added to all supported FreeBSD versions in the future, this + * compatibility macro can be removed. + */ +#define ElfW(type) __ElfN(type) +#endif + #define PROF_DATA_START INSTR_PROF_SECT_START(INSTR_PROF_DATA_COMMON) #define PROF_DATA_STOP INSTR_PROF_SECT_STOP(INSTR_PROF_DATA_COMMON) #define PROF_NAME_START INSTR_PROF_SECT_START(INSTR_PROF_NAME_COMMON) @@ -76,6 +85,7 @@ COMPILER_RT_VISIBILITY ValueProfNode *__llvm_profile_end_vnodes(void) { COMPILER_RT_VISIBILITY ValueProfNode *CurrentVNode = &PROF_VNODES_START; COMPILER_RT_VISIBILITY ValueProfNode *EndVNode = &PROF_VNODES_STOP; +#ifdef NT_GNU_BUILD_ID static size_t RoundUp(size_t size, size_t align) { return (size + align - 1) & ~(align - 1); } @@ -179,5 +189,14 @@ COMPILER_RT_VISIBILITY int __llvm_write_binary_ids(ProfDataWriter *Writer) { return 0; } +#else /* !NT_GNU_BUILD_ID */ +/* + * Fallback implementation for targets that don't support the GNU + * extensions NT_GNU_BUILD_ID and __ehdr_start. + */ +COMPILER_RT_VISIBILITY int __llvm_write_binary_ids(ProfDataWriter *Writer) { + return 0; +} +#endif #endif diff --git a/libcxx/include/__config b/libcxx/include/__config index 3cf23694f878..97e33f3157aa 100644 --- a/libcxx/include/__config +++ b/libcxx/include/__config @@ -354,6 +354,16 @@ # define _LIBCPP_NO_CFI #endif +// If the compiler supports using_if_exists, pretend we have those functions and they'll +// be picked up if the C library provides them. +// +// TODO: Once we drop support for Clang 12, we can assume the compiler supports using_if_exists +// for platforms that don't have a conforming C11 library, so we can drop this whole thing. +#if __has_attribute(using_if_exists) +# define _LIBCPP_HAS_TIMESPEC_GET +# define _LIBCPP_HAS_QUICK_EXIT +# define _LIBCPP_HAS_ALIGNED_ALLOC +#else #if (defined(__ISO_C_VISIBLE) && (__ISO_C_VISIBLE >= 2011)) || __cplusplus >= 201103L # if defined(__FreeBSD__) # define _LIBCPP_HAS_ALIGNED_ALLOC @@ -408,6 +418,7 @@ # endif # endif // __APPLE__ #endif +#endif // __has_attribute(using_if_exists) #ifndef _LIBCPP_CXX03_LANG # define _LIBCPP_ALIGNOF(_Tp) alignof(_Tp) diff --git a/libcxx/include/ctime b/libcxx/include/ctime index 8b2efd7449ca..2a3fdd12e874 100644 --- a/libcxx/include/ctime +++ b/libcxx/include/ctime @@ -59,7 +59,7 @@ int timespec_get( struct timespec *ts, int base); // C++17 // we're detecting this here instead of in <__config> because we can't include // system headers from <__config>, since it leads to circular module dependencies. // This is also meant to be a very temporary workaround until the SDKs are fixed. -#if defined(__APPLE__) +#if defined(__APPLE__) && !__has_attribute(using_if_exists) # include <sys/cdefs.h> # if defined(_LIBCPP_HAS_TIMESPEC_GET) && (__DARWIN_C_LEVEL < __DARWIN_C_FULL) # define _LIBCPP_HAS_TIMESPEC_GET_NOT_ACTUALLY_PROVIDED diff --git a/libcxx/include/ios b/libcxx/include/ios index 3128bca89999..c9230d6a9484 100644 --- a/libcxx/include/ios +++ b/libcxx/include/ios @@ -607,8 +607,15 @@ public: static_assert((is_same<_CharT, typename traits_type::char_type>::value), "traits_type::char_type must be the same type as CharT"); +#ifdef _LIBCPP_CXX03_LANG + // Preserve the ability to compare with literal 0, + // and implicitly convert to bool, but not implicitly convert to int. + _LIBCPP_INLINE_VISIBILITY + operator void*() const {return fail() ? nullptr : (void*)this;} +#else _LIBCPP_INLINE_VISIBILITY explicit operator bool() const {return !fail();} +#endif _LIBCPP_INLINE_VISIBILITY bool operator!() const {return fail();} _LIBCPP_INLINE_VISIBILITY iostate rdstate() const {return ios_base::rdstate();} diff --git a/lld/ELF/Config.h b/lld/ELF/Config.h index a996a815599a..e1abb4dfab36 100644 --- a/lld/ELF/Config.h +++ b/lld/ELF/Config.h @@ -86,7 +86,8 @@ struct SymbolVersion { struct VersionDefinition { llvm::StringRef name; uint16_t id; - std::vector<SymbolVersion> patterns; + std::vector<SymbolVersion> nonLocalPatterns; + std::vector<SymbolVersion> localPatterns; }; // This struct contains the global configuration for the linker. diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp index 91e7df21a60a..594c20016827 100644 --- a/lld/ELF/Driver.cpp +++ b/lld/ELF/Driver.cpp @@ -1351,18 +1351,19 @@ static void readConfigs(opt::InputArgList &args) { } assert(config->versionDefinitions.empty()); - config->versionDefinitions.push_back({"local", (uint16_t)VER_NDX_LOCAL, {}}); config->versionDefinitions.push_back( - {"global", (uint16_t)VER_NDX_GLOBAL, {}}); + {"local", (uint16_t)VER_NDX_LOCAL, {}, {}}); + config->versionDefinitions.push_back( + {"global", (uint16_t)VER_NDX_GLOBAL, {}, {}}); // If --retain-symbol-file is used, we'll keep only the symbols listed in // the file and discard all others. if (auto *arg = args.getLastArg(OPT_retain_symbols_file)) { - config->versionDefinitions[VER_NDX_LOCAL].patterns.push_back( + config->versionDefinitions[VER_NDX_LOCAL].nonLocalPatterns.push_back( {"*", /*isExternCpp=*/false, /*hasWildcard=*/true}); if (Optional<MemoryBufferRef> buffer = readFile(arg->getValue())) for (StringRef s : args::getLines(*buffer)) - config->versionDefinitions[VER_NDX_GLOBAL].patterns.push_back( + config->versionDefinitions[VER_NDX_GLOBAL].nonLocalPatterns.push_back( {s, /*isExternCpp=*/false, /*hasWildcard=*/false}); } @@ -2069,23 +2070,37 @@ static void redirectSymbols(ArrayRef<WrappedSymbol> wrapped) { if (suffix1[0] != '@' || suffix1[1] == '@') continue; - // Check whether the default version foo@@v1 exists. If it exists, the - // symbol can be found by the name "foo" in the symbol table. - Symbol *maybeDefault = symtab->find(name); - if (!maybeDefault) - continue; - const char *suffix2 = maybeDefault->getVersionSuffix(); - if (suffix2[0] != '@' || suffix2[1] != '@' || - strcmp(suffix1 + 1, suffix2 + 2) != 0) + // Check the existing symbol foo. We have two special cases to handle: + // + // * There is a definition of foo@v1 and foo@@v1. + // * There is a definition of foo@v1 and foo. + Defined *sym2 = dyn_cast_or_null<Defined>(symtab->find(name)); + if (!sym2) continue; - - // foo@v1 and foo@@v1 should be merged, so redirect foo@v1 to foo@@v1. - map.try_emplace(sym, maybeDefault); - // If both foo@v1 and foo@@v1 are defined and non-weak, report a duplicate - // definition error. - maybeDefault->resolve(*sym); - // Eliminate foo@v1 from the symbol table. - sym->symbolKind = Symbol::PlaceholderKind; + const char *suffix2 = sym2->getVersionSuffix(); + if (suffix2[0] == '@' && suffix2[1] == '@' && + strcmp(suffix1 + 1, suffix2 + 2) == 0) { + // foo@v1 and foo@@v1 should be merged, so redirect foo@v1 to foo@@v1. + map.try_emplace(sym, sym2); + // If both foo@v1 and foo@@v1 are defined and non-weak, report a duplicate + // definition error. + sym2->resolve(*sym); + // Eliminate foo@v1 from the symbol table. + sym->symbolKind = Symbol::PlaceholderKind; + } else if (auto *sym1 = dyn_cast<Defined>(sym)) { + if (sym2->versionId > VER_NDX_GLOBAL + ? config->versionDefinitions[sym2->versionId].name == suffix1 + 1 + : sym1->section == sym2->section && sym1->value == sym2->value) { + // Due to an assembler design flaw, if foo is defined, .symver foo, + // foo@v1 defines both foo and foo@v1. Unless foo is bound to a + // different version, GNU ld makes foo@v1 canonical and elimiates foo. + // Emulate its behavior, otherwise we would have foo or foo@@v1 beside + // foo@v1. foo@v1 and foo combining does not apply if they are not + // defined in the same place. + map.try_emplace(sym2, sym); + sym2->symbolKind = Symbol::PlaceholderKind; + } + } } if (map.empty()) diff --git a/lld/ELF/LinkerScript.cpp b/lld/ELF/LinkerScript.cpp index a938984ad945..01785f39ed75 100644 --- a/lld/ELF/LinkerScript.cpp +++ b/lld/ELF/LinkerScript.cpp @@ -849,17 +849,8 @@ void LinkerScript::diagnoseOrphanHandling() const { } uint64_t LinkerScript::advance(uint64_t size, unsigned alignment) { - bool isTbss = - (ctx->outSec->flags & SHF_TLS) && ctx->outSec->type == SHT_NOBITS; - uint64_t start = isTbss ? dot + ctx->threadBssOffset : dot; - start = alignTo(start, alignment); - uint64_t end = start + size; - - if (isTbss) - ctx->threadBssOffset = end - dot; - else - dot = end; - return end; + dot = alignTo(dot, alignment) + size; + return dot; } void LinkerScript::output(InputSection *s) { @@ -931,13 +922,24 @@ static OutputSection *findFirstSection(PhdrEntry *load) { // This function assigns offsets to input sections and an output section // for a single sections command (e.g. ".text { *(.text); }"). void LinkerScript::assignOffsets(OutputSection *sec) { + const bool isTbss = (sec->flags & SHF_TLS) && sec->type == SHT_NOBITS; const bool sameMemRegion = ctx->memRegion == sec->memRegion; const bool prevLMARegionIsDefault = ctx->lmaRegion == nullptr; const uint64_t savedDot = dot; ctx->memRegion = sec->memRegion; ctx->lmaRegion = sec->lmaRegion; - if (sec->flags & SHF_ALLOC) { + if (!(sec->flags & SHF_ALLOC)) { + // Non-SHF_ALLOC sections have zero addresses. + dot = 0; + } else if (isTbss) { + // Allow consecutive SHF_TLS SHT_NOBITS output sections. The address range + // starts from the end address of the previous tbss section. + if (ctx->tbssAddr == 0) + ctx->tbssAddr = dot; + else + dot = ctx->tbssAddr; + } else { if (ctx->memRegion) dot = ctx->memRegion->curPos; if (sec->addrExpr) @@ -950,9 +952,6 @@ void LinkerScript::assignOffsets(OutputSection *sec) { if (ctx->memRegion && ctx->memRegion->curPos < dot) expandMemoryRegion(ctx->memRegion, dot - ctx->memRegion->curPos, ctx->memRegion->name, sec->name); - } else { - // Non-SHF_ALLOC sections have zero addresses. - dot = 0; } switchTo(sec); @@ -1008,8 +1007,13 @@ void LinkerScript::assignOffsets(OutputSection *sec) { // Non-SHF_ALLOC sections do not affect the addresses of other OutputSections // as they are not part of the process image. - if (!(sec->flags & SHF_ALLOC)) + if (!(sec->flags & SHF_ALLOC)) { dot = savedDot; + } else if (isTbss) { + // NOBITS TLS sections are similar. Additionally save the end address. + ctx->tbssAddr = dot; + dot = savedDot; + } } static bool isDiscardable(OutputSection &sec) { diff --git a/lld/ELF/LinkerScript.h b/lld/ELF/LinkerScript.h index 0592c52acb84..d2487ae0f9d2 100644 --- a/lld/ELF/LinkerScript.h +++ b/lld/ELF/LinkerScript.h @@ -247,11 +247,11 @@ class LinkerScript final { // not be used outside of the scope of a call to the above functions. struct AddressState { AddressState(); - uint64_t threadBssOffset = 0; OutputSection *outSec = nullptr; MemoryRegion *memRegion = nullptr; MemoryRegion *lmaRegion = nullptr; uint64_t lmaOffset = 0; + uint64_t tbssAddr = 0; }; llvm::DenseMap<StringRef, OutputSection *> nameToOutputSection; diff --git a/lld/ELF/Relocations.cpp b/lld/ELF/Relocations.cpp index e3cc210972b2..537859f9e0b5 100644 --- a/lld/ELF/Relocations.cpp +++ b/lld/ELF/Relocations.cpp @@ -527,6 +527,13 @@ static SmallSet<SharedSymbol *, 4> getSymbolsAt(SharedSymbol &ss) { if (auto *alias = dyn_cast_or_null<SharedSymbol>(sym)) ret.insert(alias); } + + // The loop does not check SHT_GNU_verneed, so ret does not contain + // non-default version symbols. If ss has a non-default version, ret won't + // contain ss. Just add ss unconditionally. If a non-default version alias is + // separately copy relocated, it and ss will have different addresses. + // Fortunately this case is impractical and fails with GNU ld as well. + ret.insert(&ss); return ret; } diff --git a/lld/ELF/ScriptParser.cpp b/lld/ELF/ScriptParser.cpp index 2c980eb810c7..1c743fd47747 100644 --- a/lld/ELF/ScriptParser.cpp +++ b/lld/ELF/ScriptParser.cpp @@ -1496,9 +1496,9 @@ void ScriptParser::readAnonymousDeclaration() { std::vector<SymbolVersion> globals; std::tie(locals, globals) = readSymbols(); for (const SymbolVersion &pat : locals) - config->versionDefinitions[VER_NDX_LOCAL].patterns.push_back(pat); + config->versionDefinitions[VER_NDX_LOCAL].localPatterns.push_back(pat); for (const SymbolVersion &pat : globals) - config->versionDefinitions[VER_NDX_GLOBAL].patterns.push_back(pat); + config->versionDefinitions[VER_NDX_GLOBAL].nonLocalPatterns.push_back(pat); expect(";"); } @@ -1510,13 +1510,12 @@ void ScriptParser::readVersionDeclaration(StringRef verStr) { std::vector<SymbolVersion> locals; std::vector<SymbolVersion> globals; std::tie(locals, globals) = readSymbols(); - for (const SymbolVersion &pat : locals) - config->versionDefinitions[VER_NDX_LOCAL].patterns.push_back(pat); // Create a new version definition and add that to the global symbols. VersionDefinition ver; ver.name = verStr; - ver.patterns = globals; + ver.nonLocalPatterns = std::move(globals); + ver.localPatterns = std::move(locals); ver.id = config->versionDefinitions.size(); config->versionDefinitions.push_back(ver); diff --git a/lld/ELF/SymbolTable.cpp b/lld/ELF/SymbolTable.cpp index 70aea288c53f..22e6b4f92898 100644 --- a/lld/ELF/SymbolTable.cpp +++ b/lld/ELF/SymbolTable.cpp @@ -134,9 +134,20 @@ static bool canBeVersioned(const Symbol &sym) { StringMap<std::vector<Symbol *>> &SymbolTable::getDemangledSyms() { if (!demangledSyms) { demangledSyms.emplace(); + std::string demangled; for (Symbol *sym : symVector) - if (canBeVersioned(*sym)) - (*demangledSyms)[demangleItanium(sym->getName())].push_back(sym); + if (canBeVersioned(*sym)) { + StringRef name = sym->getName(); + size_t pos = name.find('@'); + if (pos == std::string::npos) + demangled = demangleItanium(name); + else if (pos + 1 == name.size() || name[pos + 1] == '@') + demangled = demangleItanium(name.substr(0, pos)); + else + demangled = + (demangleItanium(name.substr(0, pos)) + name.substr(pos)).str(); + (*demangledSyms)[demangled].push_back(sym); + } } return *demangledSyms; } @@ -150,19 +161,29 @@ std::vector<Symbol *> SymbolTable::findByVersion(SymbolVersion ver) { return {}; } -std::vector<Symbol *> SymbolTable::findAllByVersion(SymbolVersion ver) { +std::vector<Symbol *> SymbolTable::findAllByVersion(SymbolVersion ver, + bool includeNonDefault) { std::vector<Symbol *> res; SingleStringMatcher m(ver.name); + auto check = [&](StringRef name) { + size_t pos = name.find('@'); + if (!includeNonDefault) + return pos == StringRef::npos; + return !(pos + 1 < name.size() && name[pos + 1] == '@'); + }; if (ver.isExternCpp) { for (auto &p : getDemangledSyms()) if (m.match(p.first())) - res.insert(res.end(), p.second.begin(), p.second.end()); + for (Symbol *sym : p.second) + if (check(sym->getName())) + res.push_back(sym); return res; } for (Symbol *sym : symVector) - if (canBeVersioned(*sym) && m.match(sym->getName())) + if (canBeVersioned(*sym) && check(sym->getName()) && + m.match(sym->getName())) res.push_back(sym); return res; } @@ -172,7 +193,7 @@ void SymbolTable::handleDynamicList() { for (SymbolVersion &ver : config->dynamicList) { std::vector<Symbol *> syms; if (ver.hasWildcard) - syms = findAllByVersion(ver); + syms = findAllByVersion(ver, /*includeNonDefault=*/true); else syms = findByVersion(ver); @@ -181,21 +202,13 @@ void SymbolTable::handleDynamicList() { } } -// Set symbol versions to symbols. This function handles patterns -// containing no wildcard characters. -void SymbolTable::assignExactVersion(SymbolVersion ver, uint16_t versionId, - StringRef versionName) { - if (ver.hasWildcard) - return; - +// Set symbol versions to symbols. This function handles patterns containing no +// wildcard characters. Return false if no symbol definition matches ver. +bool SymbolTable::assignExactVersion(SymbolVersion ver, uint16_t versionId, + StringRef versionName, + bool includeNonDefault) { // Get a list of symbols which we need to assign the version to. std::vector<Symbol *> syms = findByVersion(ver); - if (syms.empty()) { - if (!config->undefinedVersion) - error("version script assignment of '" + versionName + "' to symbol '" + - ver.name + "' failed: symbol not defined"); - return; - } auto getName = [](uint16_t ver) -> std::string { if (ver == VER_NDX_LOCAL) @@ -207,10 +220,11 @@ void SymbolTable::assignExactVersion(SymbolVersion ver, uint16_t versionId, // Assign the version. for (Symbol *sym : syms) { - // Skip symbols containing version info because symbol versions - // specified by symbol names take precedence over version scripts. - // See parseSymbolVersion(). - if (sym->getName().contains('@')) + // For a non-local versionId, skip symbols containing version info because + // symbol versions specified by symbol names take precedence over version + // scripts. See parseSymbolVersion(). + if (!includeNonDefault && versionId != VER_NDX_LOCAL && + sym->getName().contains('@')) continue; // If the version has not been assigned, verdefIndex is -1. Use an arbitrary @@ -225,13 +239,15 @@ void SymbolTable::assignExactVersion(SymbolVersion ver, uint16_t versionId, warn("attempt to reassign symbol '" + ver.name + "' of " + getName(sym->versionId) + " to " + getName(versionId)); } + return !syms.empty(); } -void SymbolTable::assignWildcardVersion(SymbolVersion ver, uint16_t versionId) { +void SymbolTable::assignWildcardVersion(SymbolVersion ver, uint16_t versionId, + bool includeNonDefault) { // Exact matching takes precedence over fuzzy matching, // so we set a version to a symbol only if no version has been assigned // to the symbol. This behavior is compatible with GNU. - for (Symbol *sym : findAllByVersion(ver)) + for (Symbol *sym : findAllByVersion(ver, includeNonDefault)) if (sym->verdefIndex == UINT32_C(-1)) { sym->verdefIndex = 0; sym->versionId = versionId; @@ -244,26 +260,60 @@ void SymbolTable::assignWildcardVersion(SymbolVersion ver, uint16_t versionId) { // script file, the script does not actually define any symbol version, // but just specifies symbols visibilities. void SymbolTable::scanVersionScript() { + SmallString<128> buf; // First, we assign versions to exact matching symbols, // i.e. version definitions not containing any glob meta-characters. - for (VersionDefinition &v : config->versionDefinitions) - for (SymbolVersion &pat : v.patterns) - assignExactVersion(pat, v.id, v.name); + std::vector<Symbol *> syms; + for (VersionDefinition &v : config->versionDefinitions) { + auto assignExact = [&](SymbolVersion pat, uint16_t id, StringRef ver) { + bool found = + assignExactVersion(pat, id, ver, /*includeNonDefault=*/false); + buf.clear(); + found |= assignExactVersion({(pat.name + "@" + v.name).toStringRef(buf), + pat.isExternCpp, /*hasWildCard=*/false}, + id, ver, /*includeNonDefault=*/true); + if (!found && !config->undefinedVersion) + errorOrWarn("version script assignment of '" + ver + "' to symbol '" + + pat.name + "' failed: symbol not defined"); + }; + for (SymbolVersion &pat : v.nonLocalPatterns) + if (!pat.hasWildcard) + assignExact(pat, v.id, v.name); + for (SymbolVersion pat : v.localPatterns) + if (!pat.hasWildcard) + assignExact(pat, VER_NDX_LOCAL, "local"); + } // Next, assign versions to wildcards that are not "*". Note that because the // last match takes precedence over previous matches, we iterate over the // definitions in the reverse order. - for (VersionDefinition &v : llvm::reverse(config->versionDefinitions)) - for (SymbolVersion &pat : v.patterns) + auto assignWildcard = [&](SymbolVersion pat, uint16_t id, StringRef ver) { + assignWildcardVersion(pat, id, /*includeNonDefault=*/false); + buf.clear(); + assignWildcardVersion({(pat.name + "@" + ver).toStringRef(buf), + pat.isExternCpp, /*hasWildCard=*/true}, + id, + /*includeNonDefault=*/true); + }; + for (VersionDefinition &v : llvm::reverse(config->versionDefinitions)) { + for (SymbolVersion &pat : v.nonLocalPatterns) if (pat.hasWildcard && pat.name != "*") - assignWildcardVersion(pat, v.id); + assignWildcard(pat, v.id, v.name); + for (SymbolVersion &pat : v.localPatterns) + if (pat.hasWildcard && pat.name != "*") + assignWildcard(pat, VER_NDX_LOCAL, v.name); + } // Then, assign versions to "*". In GNU linkers they have lower priority than // other wildcards. - for (VersionDefinition &v : config->versionDefinitions) - for (SymbolVersion &pat : v.patterns) + for (VersionDefinition &v : config->versionDefinitions) { + for (SymbolVersion &pat : v.nonLocalPatterns) if (pat.hasWildcard && pat.name == "*") - assignWildcardVersion(pat, v.id); + assignWildcard(pat, v.id, v.name); + for (SymbolVersion &pat : v.localPatterns) + if (pat.hasWildcard && pat.name == "*") + assignWildcard(pat, VER_NDX_LOCAL, v.name); + } // Symbol themselves might know their versions because symbols // can contain versions in the form of <name>@<version>. diff --git a/lld/ELF/SymbolTable.h b/lld/ELF/SymbolTable.h index 507af8d2be75..54c4b1169ed1 100644 --- a/lld/ELF/SymbolTable.h +++ b/lld/ELF/SymbolTable.h @@ -65,12 +65,14 @@ public: private: std::vector<Symbol *> findByVersion(SymbolVersion ver); - std::vector<Symbol *> findAllByVersion(SymbolVersion ver); + std::vector<Symbol *> findAllByVersion(SymbolVersion ver, + bool includeNonDefault); llvm::StringMap<std::vector<Symbol *>> &getDemangledSyms(); - void assignExactVersion(SymbolVersion ver, uint16_t versionId, - StringRef versionName); - void assignWildcardVersion(SymbolVersion ver, uint16_t versionId); + bool assignExactVersion(SymbolVersion ver, uint16_t versionId, + StringRef versionName, bool includeNonDefault); + void assignWildcardVersion(SymbolVersion ver, uint16_t versionId, + bool includeNonDefault); // The order the global symbols are in is not defined. We can use an arbitrary // order, but it has to be reproducible. That is true even when cross linking. diff --git a/lld/ELF/Symbols.cpp b/lld/ELF/Symbols.cpp index 496be33dd182..cef303f05f89 100644 --- a/lld/ELF/Symbols.cpp +++ b/lld/ELF/Symbols.cpp @@ -208,6 +208,9 @@ OutputSection *Symbol::getOutputSection() const { // If a symbol name contains '@', the characters after that is // a symbol version name. This function parses that. void Symbol::parseSymbolVersion() { + // Return if localized by a local: pattern in a version script. + if (versionId == VER_NDX_LOCAL) + return; StringRef s = getName(); size_t pos = s.find('@'); if (pos == 0 || pos == StringRef::npos) diff --git a/lld/docs/ReleaseNotes.rst b/lld/docs/ReleaseNotes.rst index a52ee4348f78..50af6e7d7939 100644 --- a/lld/docs/ReleaseNotes.rst +++ b/lld/docs/ReleaseNotes.rst @@ -24,6 +24,13 @@ Non-comprehensive list of changes in this release ELF Improvements ---------------- +* ``-z start-stop-gc`` is now supported and becomes the default. + (`D96914 <https://reviews.llvm.org/D96914>`_) + (`rG6d2d3bd0 <https://reviews.llvm.org/rG6d2d3bd0a61f5fc7fd9f61f48bc30e9ca77cc619>`_) +* ``--shuffle-sections=<seed>`` has been changed to ``--shuffle-sections=<section-glob>=<seed>``. + If seed is -1, the matched input sections are reversed. + (`D98445 <https://reviews.llvm.org/D98445>`_) + (`D98679 <https://reviews.llvm.org/D98679>`_) * ``-Bsymbolic -Bsymbolic-functions`` has been changed to behave the same as ``-Bsymbolic-functions``. This matches GNU ld. (`D102461 <https://reviews.llvm.org/D102461>`_) * ``-Bno-symbolic`` has been added. @@ -32,6 +39,75 @@ ELF Improvements (`D103303 <https://reviews.llvm.org/D103303>`_) * ``-Bsymbolic-non-weak-functions`` has been added as a ``STB_GLOBAL`` subset of ``-Bsymbolic-functions``. (`D102570 <https://reviews.llvm.org/D102570>`_) +* ``--no-allow-shlib-undefined`` has been improved to catch more cases. + (`D101996 <https://reviews.llvm.org/D101996>`_) +* ``__rela_iplt_start`` is no longer defined for -pie/-shared. + This makes GCC/Clang ``-static-pie`` built executables work. + (`rG8cb78e99 <https://reviews.llvm.org/rf8cb78e99aae9aa3f89f7bfe667db2c5b767f21f>`_) +* IRELATIVE/TLSDESC relocations now support ``-z rel``. + (`D100544 <https://reviews.llvm.org/D100544>`_) +* Section groups with a zero flag are now supported. + This is used by ``comdat nodeduplicate`` in LLVM IR. + (`D96636 <https://reviews.llvm.org/D96636>`_) + (`D106228 <https://reviews.llvm.org/D106228>`_) +* Defined symbols are now resolved before undefined symbols to stabilize the bheavior of archive member extraction. + (`D95985 <https://reviews.llvm.org/D95985>`_) +* ``STB_WEAK`` symbols are now preferred over COMMON symbols as a fix to a ``--fortran-common`` regression. + (`D105945 <https://reviews.llvm.org/D105945>`_) +* Absolute relocations referencing undef weak now produce dynamic relocations for -pie, matching GOT-generating relocations. + (`D105164 <https://reviews.llvm.org/D105164>`_) +* Exported symbols are now communicated to the LTO library so as to make LTO + based whole program devirtualization (``-flto=thin -fwhole-program-vtables``) + work with shared objects. + (`D91583 <https://reviews.llvm.org/D91583>`_) +* Whole program devirtualization now respects ``local:`` version nodes in a version script. + (`D98220 <https://reviews.llvm.org/D98220>`_) + (`D98686 <https://reviews.llvm.org/D98686>`_) +* ``local:`` version nodes in a version script now apply to non-default version symbols. + (`D107234 <https://reviews.llvm.org/D107234>`_) +* If an object file defines both ``foo`` and ``foo@v1``, now only ``foo@v1`` will be in the output. + (`D107235 <https://reviews.llvm.org/D107235>`_) +* Copy relocations on non-default version symbols are now supported. + (`D107535 <https://reviews.llvm.org/D107535>`_) + +Linker script changes: + +* ``.``, ``$``, and double quotes can now be used in symbol names in expressions. + (`D98306 <https://reviews.llvm.org/D98306>`_) + (`rGe7a7ad13 <https://reviews.llvm.org/rGe7a7ad134fe182aad190cb3ebc441164470e92f5>`_) +* Fixed value of ``.`` in the output section description of ``.tbss``. + (`D107288 <https://reviews.llvm.org/D107288>`_) +* ``NOLOAD`` sections can now be placed in a ``PT_LOAD`` program header. + (`D103815 <https://reviews.llvm.org/D103815>`_) +* ``OUTPUT_FORMAT(default, big, little)`` now consults ``-EL`` and ``-EB``. + (`D96214 <https://reviews.llvm.org/D96214>`_) +* The ``OVERWRITE_SECTIONS`` command has been added. + (`D103303 <https://reviews.llvm.org/D103303>`_) +* The section order within an ``INSERT AFTER`` command is now preserved. + (`D105158 <https://reviews.llvm.org/D105158>`_) + +Architecture specific changes: + +* aarch64_be is now supported. + (`D96188 <https://reviews.llvm.org/D96188>`_) +* The AMDGPU port now supports ``--amdhsa-code-object-version=4`` object files; + (`D95811 <https://reviews.llvm.org/D95811>`_) +* The ARM port now accounts for PC biases in range extension thunk creation. + (`D97550 <https://reviews.llvm.org/D97550>`_) +* The AVR port now computes ``e_flags``. + (`D99754 <https://reviews.llvm.org/D99754>`_) +* The Mips port now omits unneeded dynamic relocations for PIE non-preemptible TLS. + (`D101382 <https://reviews.llvm.org/D101382>`_) +* The PowerPC port now supports ``--power10-stubs=no`` to omit Power10 instructions from call stubs. + (`D94625 <https://reviews.llvm.org/D94625>`_) +* Fixed a thunk creation bug in the PowerPC port when TOC/NOTOC calls are mixed. + (`D101837 <https://reviews.llvm.org/D101837>`_) +* The RISC-V port now resolves undefined weak relocations to the current location if not using PLT. + (`D103001 <https://reviews.llvm.org/D103001>`_) +* ``R_386_GOTOFF`` relocations from .debug_info are now allowed to be compatible with GCC. + (`D95994 <https://reviews.llvm.org/D95994>`_) +* ``gotEntrySize`` has been added to improve support for the ILP32 ABI of x86-64. + (`D102569 <https://reviews.llvm.org/D102569>`_) Breaking changes ---------------- @@ -42,17 +118,75 @@ Breaking changes COFF Improvements ----------------- -* ... - -MinGW Improvements ------------------- +* Avoid thread exhaustion when running on 32 bit Windows. + (`D105506 <https://reviews.llvm.org/D105506>`_) -* ... +* Improve terminating the process on Windows while a thread pool might be + running. (`D102944 <https://reviews.llvm.org/D102944>`_) -MachO Improvements +MinGW Improvements ------------------ -* Item 1. +* Support for linking directly against a DLL without using an import library + has been added. (`D104530 <https://reviews.llvm.org/D104530>`_ and + `D104531 <https://reviews.llvm.org/D104531>`_) + +* Fix linking with ``--export-all-symbols`` in combination with + ``-function-sections``. (`D101522 <https://reviews.llvm.org/D101522>`_ and + `D101615 <https://reviews.llvm.org/D101615>`_) + +* Fix automatic export of symbols from LTO objects. + (`D101569 <https://reviews.llvm.org/D101569>`_) + +* Accept more spellings of some options. + (`D107237 <https://reviews.llvm.org/D107237>`_ and + `D107253 <https://reviews.llvm.org/D107253>`_) + +Mach-O Improvements +------------------- + +The Mach-O backend is now able to link several large, real-world programs, +though we are still working out the kinks. + +* arm64 is now supported as a target. (`D88629 <https://reviews.llvm.org/D88629>`_) +* arm64_32 is now supported as a target. (`D99822 <https://reviews.llvm.org/D99822>`_) +* Branch-range-extension thunks are now supported. (`D100818 <https://reviews.llvm.org/D100818>`_) +* ``-dead_strip`` is now supported. (`D103324 <https://reviews.llvm.org/D103324>`_) +* Support for identical code folding (``--icf=all``) has been added. + (`D103292 <https://reviews.llvm.org/D103292>`_) +* Support for special ``$start`` and ``$end`` symbols for segment & sections has been + added. (`D106767 <https://reviews.llvm.org/D106767>`_, `D106629 <https://reviews.llvm.org/D106629>`_) +* ``$ld$previous`` symbols are now supported. (`D103505 <https://reviews.llvm.org/D103505 >`_) +* ``$ld$install_name`` symbols are now supported. (`D103746 <https://reviews.llvm.org/D103746>`_) +* ``__mh_*_header`` symbols are now supported. (`D97007 <https://reviews.llvm.org/D97007>`_) +* LC_CODE_SIGNATURE is now supported. (`D96164 <https://reviews.llvm.org/D96164>`_) +* LC_FUNCTION_STARTS is now supported. (`D97260 <https://reviews.llvm.org/D97260>`_) +* LC_DATA_IN_CODE is now supported. (`D103006 <https://reviews.llvm.org/D103006>`_) +* Bind opcodes are more compactly encoded. (`D106128 <https://reviews.llvm.org/D106128>`_, + `D105075 <https://reviews.llvm.org/D105075>`_) +* LTO cache support has been added. (`D105922 <https://reviews.llvm.org/D105922>`_) +* ``-application_extension`` is now supported. (`D105818 <https://reviews.llvm.org/D105818>`_) +* ``-export_dynamic`` is now partially supported. (`D105482 <https://reviews.llvm.org/D105482>`_) +* ``-arch_multiple`` is now supported. (`D105450 <https://reviews.llvm.org/D105450>`_) +* ``-final_output`` is now supported. (`D105449 <https://reviews.llvm.org/D105449>`_) +* ``-umbrella`` is now supported. (`D105448 <https://reviews.llvm.org/D105448>`_) +* ``--print-dylib-search`` is now supported. (`D103985 <https://reviews.llvm.org/D103985>`_) +* ``-force_load_swift_libs`` is now supported. (`D103709 <https://reviews.llvm.org/D103709>`_) +* ``-reexport_framework``, ``-reexport_library``, ``-reexport-l`` are now supported. + (`D103497 <https://reviews.llvm.org/D103497>`_) +* ``.weak_def_can_be_hidden`` is now supported. (`D101080 <https://reviews.llvm.org/D101080>`_) +* ``-add_ast_path`` is now supported. (`D100076 <https://reviews.llvm.org/D100076>`_) +* ``-segprot`` is now supported. (`D99389 <https://reviews.llvm.org/D99389>`_) +* ``-dependency_info`` is now partially supported. (`D98559 <https://reviews.llvm.org/D98559>`_) +* ``--time-trace`` is now supported. (`D98419 <https://reviews.llvm.org/D98419>`_) +* ``-mark_dead_strippable_dylib`` is now supported. (`D98262 <https://reviews.llvm.org/D98262>`_) +* ``-[un]exported_symbol[s_list]`` is now supported. (`D98223 <https://reviews.llvm.org/D98223>`_) +* ``-flat_namespace`` is now supported. (`D97641 <https://reviews.llvm.org/D97641>`_) +* ``-rename_section`` and ``-rename_segment`` are now supported. (`D97600 <https://reviews.llvm.org/D97600>`_) +* ``-bundle_loader`` is now supported. (`D95913 <https://reviews.llvm.org/D95913>`_) +* ``-map`` is now partially supported. (`D98323 <https://reviews.llvm.org/D98323>`_) + +There were numerous other bug-fixes as well. WebAssembly Improvements ------------------------ diff --git a/lldb/source/Commands/CommandObjectMemoryTag.cpp b/lldb/source/Commands/CommandObjectMemoryTag.cpp index 1dfb32a92f3b..840f81719d7d 100644 --- a/lldb/source/Commands/CommandObjectMemoryTag.cpp +++ b/lldb/source/Commands/CommandObjectMemoryTag.cpp @@ -7,8 +7,11 @@ //===----------------------------------------------------------------------===// #include "CommandObjectMemoryTag.h" +#include "lldb/Host/OptionParser.h" #include "lldb/Interpreter/CommandReturnObject.h" #include "lldb/Interpreter/OptionArgParser.h" +#include "lldb/Interpreter/OptionGroupFormat.h" +#include "lldb/Interpreter/OptionValueString.h" #include "lldb/Target/Process.h" using namespace lldb; @@ -21,7 +24,8 @@ class CommandObjectMemoryTagRead : public CommandObjectParsed { public: CommandObjectMemoryTagRead(CommandInterpreter &interpreter) : CommandObjectParsed(interpreter, "tag", - "Read memory tags for the given range of memory.", + "Read memory tags for the given range of memory." + " Mismatched tags will be marked.", nullptr, eCommandRequiresTarget | eCommandRequiresProcess | eCommandProcessMustBePaused) { @@ -97,16 +101,17 @@ protected: return false; } - result.AppendMessageWithFormatv("Logical tag: {0:x}", - tag_manager->GetLogicalTag(start_addr)); + lldb::addr_t logical_tag = tag_manager->GetLogicalTag(start_addr); + result.AppendMessageWithFormatv("Logical tag: {0:x}", logical_tag); result.AppendMessage("Allocation tags:"); addr_t addr = tagged_range->GetRangeBase(); for (auto tag : *tags) { addr_t next_addr = addr + tag_manager->GetGranuleSize(); // Showing tagged adresses here until we have non address bit handling - result.AppendMessageWithFormatv("[{0:x}, {1:x}): {2:x}", addr, next_addr, - tag); + result.AppendMessageWithFormatv("[{0:x}, {1:x}): {2:x}{3}", addr, + next_addr, tag, + logical_tag == tag ? "" : " (mismatch)"); addr = next_addr; } @@ -115,6 +120,168 @@ protected: } }; +#define LLDB_OPTIONS_memory_tag_write +#include "CommandOptions.inc" + +class CommandObjectMemoryTagWrite : public CommandObjectParsed { +public: + class OptionGroupTagWrite : public OptionGroup { + public: + OptionGroupTagWrite() : OptionGroup(), m_end_addr(LLDB_INVALID_ADDRESS) {} + + ~OptionGroupTagWrite() override = default; + + llvm::ArrayRef<OptionDefinition> GetDefinitions() override { + return llvm::makeArrayRef(g_memory_tag_write_options); + } + + Status SetOptionValue(uint32_t option_idx, llvm::StringRef option_value, + ExecutionContext *execution_context) override { + Status status; + const int short_option = + g_memory_tag_write_options[option_idx].short_option; + + switch (short_option) { + case 'e': + m_end_addr = OptionArgParser::ToAddress(execution_context, option_value, + LLDB_INVALID_ADDRESS, &status); + break; + default: + llvm_unreachable("Unimplemented option"); + } + + return status; + } + + void OptionParsingStarting(ExecutionContext *execution_context) override { + m_end_addr = LLDB_INVALID_ADDRESS; + } + + lldb::addr_t m_end_addr; + }; + + CommandObjectMemoryTagWrite(CommandInterpreter &interpreter) + : CommandObjectParsed(interpreter, "tag", + "Write memory tags starting from the granule that " + "contains the given address.", + nullptr, + eCommandRequiresTarget | eCommandRequiresProcess | + eCommandProcessMustBePaused), + m_option_group(), m_tag_write_options() { + // Address + m_arguments.push_back( + CommandArgumentEntry{CommandArgumentData(eArgTypeAddressOrExpression)}); + // One or more tag values + m_arguments.push_back(CommandArgumentEntry{ + CommandArgumentData(eArgTypeValue, eArgRepeatPlus)}); + + m_option_group.Append(&m_tag_write_options); + m_option_group.Finalize(); + } + + ~CommandObjectMemoryTagWrite() override = default; + + Options *GetOptions() override { return &m_option_group; } + +protected: + bool DoExecute(Args &command, CommandReturnObject &result) override { + if (command.GetArgumentCount() < 2) { + result.AppendError("wrong number of arguments; expected " + "<address-expression> <tag> [<tag> [...]]"); + return false; + } + + Status error; + addr_t start_addr = OptionArgParser::ToAddress( + &m_exe_ctx, command[0].ref(), LLDB_INVALID_ADDRESS, &error); + if (start_addr == LLDB_INVALID_ADDRESS) { + result.AppendErrorWithFormatv("Invalid address expression, {0}", + error.AsCString()); + return false; + } + + command.Shift(); // shift off start address + + std::vector<lldb::addr_t> tags; + for (auto &entry : command) { + lldb::addr_t tag_value; + // getAsInteger returns true on failure + if (entry.ref().getAsInteger(0, tag_value)) { + result.AppendErrorWithFormat( + "'%s' is not a valid unsigned decimal string value.\n", + entry.c_str()); + return false; + } + tags.push_back(tag_value); + } + + Process *process = m_exe_ctx.GetProcessPtr(); + llvm::Expected<const MemoryTagManager *> tag_manager_or_err = + process->GetMemoryTagManager(); + + if (!tag_manager_or_err) { + result.SetError(Status(tag_manager_or_err.takeError())); + return false; + } + + const MemoryTagManager *tag_manager = *tag_manager_or_err; + + MemoryRegionInfos memory_regions; + // If this fails the list of regions is cleared, so we don't need to read + // the return status here. + process->GetMemoryRegions(memory_regions); + + // We have to assume start_addr is not granule aligned. + // So if we simply made a range: + // (start_addr, start_addr + (N * granule_size)) + // We would end up with a range that isn't N granules but N+1 + // granules. To avoid this we'll align the start first using the method that + // doesn't check memory attributes. (if the final range is untagged we'll + // handle that error later) + lldb::addr_t aligned_start_addr = + tag_manager->ExpandToGranule(MemoryTagManager::TagRange(start_addr, 1)) + .GetRangeBase(); + + lldb::addr_t end_addr = 0; + // When you have an end address you want to align the range like tag read + // does. Meaning, align the start down (which we've done) and align the end + // up. + if (m_tag_write_options.m_end_addr != LLDB_INVALID_ADDRESS) + end_addr = m_tag_write_options.m_end_addr; + else + // Without an end address assume number of tags matches number of granules + // to write to + end_addr = + aligned_start_addr + (tags.size() * tag_manager->GetGranuleSize()); + + // Now we've aligned the start address so if we ask for another range + // using the number of tags N, we'll get back a range that is also N + // granules in size. + llvm::Expected<MemoryTagManager::TagRange> tagged_range = + tag_manager->MakeTaggedRange(aligned_start_addr, end_addr, + memory_regions); + + if (!tagged_range) { + result.SetError(Status(tagged_range.takeError())); + return false; + } + + Status status = process->WriteMemoryTags(tagged_range->GetRangeBase(), + tagged_range->GetByteSize(), tags); + + if (status.Fail()) { + result.SetError(status); + return false; + } + + result.SetStatus(eReturnStatusSuccessFinishResult); + return true; + } + + OptionGroupOptions m_option_group; + OptionGroupTagWrite m_tag_write_options; +}; + CommandObjectMemoryTag::CommandObjectMemoryTag(CommandInterpreter &interpreter) : CommandObjectMultiword( interpreter, "tag", "Commands for manipulating memory tags", @@ -123,6 +290,11 @@ CommandObjectMemoryTag::CommandObjectMemoryTag(CommandInterpreter &interpreter) new CommandObjectMemoryTagRead(interpreter)); read_command_object->SetCommandName("memory tag read"); LoadSubCommand("read", read_command_object); + + CommandObjectSP write_command_object( + new CommandObjectMemoryTagWrite(interpreter)); + write_command_object->SetCommandName("memory tag write"); + LoadSubCommand("write", write_command_object); } CommandObjectMemoryTag::~CommandObjectMemoryTag() = default; diff --git a/lldb/source/Commands/Options.td b/lldb/source/Commands/Options.td index 9c9b7c6e9b82..6abb4788bed0 100644 --- a/lldb/source/Commands/Options.td +++ b/lldb/source/Commands/Options.td @@ -504,6 +504,14 @@ let Command = "memory write" in { Desc<"Start writing bytes from an offset within the input file.">; } +let Command = "memory tag write" in { + def memory_write_end_addr : Option<"end-addr", "e">, Group<1>, + Arg<"AddressOrExpression">, Desc< + "Set tags for start address to end-addr, repeating tags as needed" + " to cover the range. (instead of calculating the range from the" + " number of tags given)">; +} + let Command = "register read" in { def register_read_alternate : Option<"alternate", "A">, Desc<"Display register names using the alternate register name if there " diff --git a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServerLLGS.cpp b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServerLLGS.cpp index 5e69b5793f9f..8e1f6bc29a6f 100644 --- a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServerLLGS.cpp +++ b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServerLLGS.cpp @@ -3474,15 +3474,31 @@ GDBRemoteCommunicationServerLLGS::Handle_qMemTags( if (packet.GetBytesLeft() < 1 || packet.GetChar() != ':') return SendIllFormedResponse(packet, invalid_type_err); - int32_t type = - packet.GetS32(std::numeric_limits<int32_t>::max(), /*base=*/16); - if (type == std::numeric_limits<int32_t>::max() || + // Type is a signed integer but packed into the packet as its raw bytes. + // However, our GetU64 uses strtoull which allows +/-. We do not want this. + const char *first_type_char = packet.Peek(); + if (first_type_char && (*first_type_char == '+' || *first_type_char == '-')) + return SendIllFormedResponse(packet, invalid_type_err); + + // Extract type as unsigned then cast to signed. + // Using a uint64_t here so that we have some value outside of the 32 bit + // range to use as the invalid return value. + uint64_t raw_type = + packet.GetU64(std::numeric_limits<uint64_t>::max(), /*base=*/16); + + if ( // Make sure the cast below would be valid + raw_type > std::numeric_limits<uint32_t>::max() || // To catch inputs like "123aardvark" that will parse but clearly aren't // valid in this case. packet.GetBytesLeft()) { return SendIllFormedResponse(packet, invalid_type_err); } + // First narrow to 32 bits otherwise the copy into type would take + // the wrong 4 bytes on big endian. + uint32_t raw_type_32 = raw_type; + int32_t type = reinterpret_cast<int32_t &>(raw_type_32); + StreamGDBRemote response; std::vector<uint8_t> tags; Status error = m_current_process->ReadMemoryTags(type, addr, length, tags); @@ -3552,7 +3568,11 @@ GDBRemoteCommunicationServerLLGS::Handle_QMemTags( packet.GetU64(std::numeric_limits<uint64_t>::max(), /*base=*/16); if (raw_type > std::numeric_limits<uint32_t>::max()) return SendIllFormedResponse(packet, invalid_type_err); - int32_t type = static_cast<int32_t>(raw_type); + + // First narrow to 32 bits. Otherwise the copy below would get the wrong + // 4 bytes on big endian. + uint32_t raw_type_32 = raw_type; + int32_t type = reinterpret_cast<int32_t &>(raw_type_32); // Tag data if (packet.GetBytesLeft() < 1 || packet.GetChar() != ':') diff --git a/lldb/source/Symbol/TypeSystem.cpp b/lldb/source/Symbol/TypeSystem.cpp index 252b06e269d6..0b3f7e4f3bd4 100644 --- a/lldb/source/Symbol/TypeSystem.cpp +++ b/lldb/source/Symbol/TypeSystem.cpp @@ -223,62 +223,32 @@ void TypeSystemMap::ForEach(std::function<bool(TypeSystem *)> const &callback) { llvm::Expected<TypeSystem &> TypeSystemMap::GetTypeSystemForLanguage( lldb::LanguageType language, llvm::Optional<CreateCallback> create_callback) { - llvm::Error error = llvm::Error::success(); - assert(!error); // Check the success value when assertions are enabled std::lock_guard<std::mutex> guard(m_mutex); - if (m_clear_in_progress) { - error = llvm::make_error<llvm::StringError>( + if (m_clear_in_progress) + return llvm::make_error<llvm::StringError>( "Unable to get TypeSystem because TypeSystemMap is being cleared", llvm::inconvertibleErrorCode()); - } else { - collection::iterator pos = m_map.find(language); - if (pos != m_map.end()) { - auto *type_system = pos->second.get(); - if (type_system) { - llvm::consumeError(std::move(error)); - return *type_system; - } - error = llvm::make_error<llvm::StringError>( - "TypeSystem for language " + - llvm::StringRef(Language::GetNameForLanguageType(language)) + - " doesn't exist", - llvm::inconvertibleErrorCode()); - return std::move(error); - } - for (const auto &pair : m_map) { - if (pair.second && pair.second->SupportsLanguage(language)) { - // Add a new mapping for "language" to point to an already existing - // TypeSystem that supports this language - m_map[language] = pair.second; - if (pair.second.get()) { - llvm::consumeError(std::move(error)); - return *pair.second.get(); - } - error = llvm::make_error<llvm::StringError>( - "TypeSystem for language " + - llvm::StringRef(Language::GetNameForLanguageType(language)) + - " doesn't exist", - llvm::inconvertibleErrorCode()); - return std::move(error); - } - } + collection::iterator pos = m_map.find(language); + if (pos != m_map.end()) { + auto *type_system = pos->second.get(); + if (type_system) + return *type_system; + return llvm::make_error<llvm::StringError>( + "TypeSystem for language " + + llvm::StringRef(Language::GetNameForLanguageType(language)) + + " doesn't exist", + llvm::inconvertibleErrorCode()); + } - if (!create_callback) { - error = llvm::make_error<llvm::StringError>( - "Unable to find type system for language " + - llvm::StringRef(Language::GetNameForLanguageType(language)), - llvm::inconvertibleErrorCode()); - } else { - // Cache even if we get a shared pointer that contains a null type system - // back - TypeSystemSP type_system_sp = (*create_callback)(); - m_map[language] = type_system_sp; - if (type_system_sp.get()) { - llvm::consumeError(std::move(error)); - return *type_system_sp.get(); - } - error = llvm::make_error<llvm::StringError>( + for (const auto &pair : m_map) { + if (pair.second && pair.second->SupportsLanguage(language)) { + // Add a new mapping for "language" to point to an already existing + // TypeSystem that supports this language + m_map[language] = pair.second; + if (pair.second.get()) + return *pair.second.get(); + return llvm::make_error<llvm::StringError>( "TypeSystem for language " + llvm::StringRef(Language::GetNameForLanguageType(language)) + " doesn't exist", @@ -286,7 +256,23 @@ llvm::Expected<TypeSystem &> TypeSystemMap::GetTypeSystemForLanguage( } } - return std::move(error); + if (!create_callback) + return llvm::make_error<llvm::StringError>( + "Unable to find type system for language " + + llvm::StringRef(Language::GetNameForLanguageType(language)), + llvm::inconvertibleErrorCode()); + + // Cache even if we get a shared pointer that contains a null type system + // back + TypeSystemSP type_system_sp = (*create_callback)(); + m_map[language] = type_system_sp; + if (type_system_sp.get()) + return *type_system_sp.get(); + return llvm::make_error<llvm::StringError>( + "TypeSystem for language " + + llvm::StringRef(Language::GetNameForLanguageType(language)) + + " doesn't exist", + llvm::inconvertibleErrorCode()); } llvm::Expected<TypeSystem &> diff --git a/llvm/include/llvm/Analysis/ValueTracking.h b/llvm/include/llvm/Analysis/ValueTracking.h index 90ec742f18e6..f46e66641c08 100644 --- a/llvm/include/llvm/Analysis/ValueTracking.h +++ b/llvm/include/llvm/Analysis/ValueTracking.h @@ -744,6 +744,10 @@ constexpr unsigned MaxAnalysisRecursionDepth = 6; /// minimum/maximum flavor. CmpInst::Predicate getInverseMinMaxPred(SelectPatternFlavor SPF); + /// Return the minimum or maximum constant value for the specified integer + /// min/max flavor and type. + APInt getMinMaxLimit(SelectPatternFlavor SPF, unsigned BitWidth); + /// Check if the values in \p VL are select instructions that can be converted /// to a min or max (vector) intrinsic. Returns the intrinsic ID, if such a /// conversion is possible, together with a bool indicating whether all select diff --git a/llvm/include/llvm/IR/Module.h b/llvm/include/llvm/IR/Module.h index 81e29d9b86e8..97aea5aedf22 100644 --- a/llvm/include/llvm/IR/Module.h +++ b/llvm/include/llvm/IR/Module.h @@ -324,6 +324,9 @@ public: /// name is not found. GlobalValue *getNamedValue(StringRef Name) const; + /// Return the number of global values in the module. + unsigned getNumNamedValues() const; + /// Return a unique non-zero ID for the specified metadata kind. This ID is /// uniqued across modules in the current LLVMContext. unsigned getMDKindID(StringRef Name) const; diff --git a/llvm/include/llvm/ProfileData/InstrProf.h b/llvm/include/llvm/ProfileData/InstrProf.h index 08a934e6985f..c0cedb23bdcf 100644 --- a/llvm/include/llvm/ProfileData/InstrProf.h +++ b/llvm/include/llvm/ProfileData/InstrProf.h @@ -1104,6 +1104,7 @@ namespace RawInstrProf { // Version 5: Bit 60 of FuncHash is reserved for the flag for the context // sensitive records. // Version 6: Added binary id. +// Version 7: Reorder binary id and include version in signature. const uint64_t Version = INSTR_PROF_RAW_VERSION; template <class IntPtrT> inline uint64_t getMagic(); diff --git a/llvm/include/llvm/ProfileData/InstrProfData.inc b/llvm/include/llvm/ProfileData/InstrProfData.inc index 08a642469627..7d2097cfc297 100644 --- a/llvm/include/llvm/ProfileData/InstrProfData.inc +++ b/llvm/include/llvm/ProfileData/InstrProfData.inc @@ -129,6 +129,7 @@ INSTR_PROF_VALUE_NODE(PtrToNodeT, llvm::Type::getInt8PtrTy(Ctx), Next, \ #endif INSTR_PROF_RAW_HEADER(uint64_t, Magic, __llvm_profile_get_magic()) INSTR_PROF_RAW_HEADER(uint64_t, Version, __llvm_profile_get_version()) +INSTR_PROF_RAW_HEADER(uint64_t, BinaryIdsSize, __llvm_write_binary_ids(NULL)) INSTR_PROF_RAW_HEADER(uint64_t, DataSize, DataSize) INSTR_PROF_RAW_HEADER(uint64_t, PaddingBytesBeforeCounters, PaddingBytesBeforeCounters) INSTR_PROF_RAW_HEADER(uint64_t, CountersSize, CountersSize) @@ -137,7 +138,6 @@ INSTR_PROF_RAW_HEADER(uint64_t, NamesSize, NamesSize) INSTR_PROF_RAW_HEADER(uint64_t, CountersDelta, (uintptr_t)CountersBegin) INSTR_PROF_RAW_HEADER(uint64_t, NamesDelta, (uintptr_t)NamesBegin) INSTR_PROF_RAW_HEADER(uint64_t, ValueKindLast, IPVK_Last) -INSTR_PROF_RAW_HEADER(uint64_t, BinaryIdsSize, __llvm_write_binary_ids(NULL)) #undef INSTR_PROF_RAW_HEADER /* INSTR_PROF_RAW_HEADER end */ @@ -646,7 +646,7 @@ serializeValueProfDataFrom(ValueProfRecordClosure *Closure, (uint64_t)'f' << 16 | (uint64_t)'R' << 8 | (uint64_t)129 /* Raw profile format version (start from 1). */ -#define INSTR_PROF_RAW_VERSION 6 +#define INSTR_PROF_RAW_VERSION 7 /* Indexed profile format version (start from 1). */ #define INSTR_PROF_INDEX_VERSION 7 /* Coverage mapping format version (start from 0). */ diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h index c93b8adcc890..c3c12fd23746 100644 --- a/llvm/include/llvm/Transforms/IPO/Attributor.h +++ b/llvm/include/llvm/Transforms/IPO/Attributor.h @@ -1855,6 +1855,10 @@ public: /// static void createShallowWrapper(Function &F); + /// Returns true if the function \p F can be internalized. i.e. it has a + /// compatible linkage. + static bool isInternalizable(Function &F); + /// Make another copy of the function \p F such that the copied version has /// internal linkage afterwards and can be analysed. Then we replace all uses /// of the original function to the copied one @@ -1870,6 +1874,22 @@ public: /// null pointer. static Function *internalizeFunction(Function &F, bool Force = false); + /// Make copies of each function in the set \p FnSet such that the copied + /// version has internal linkage afterwards and can be analysed. Then we + /// replace all uses of the original function to the copied one. The map + /// \p FnMap contains a mapping of functions to their internalized versions. + /// + /// Only non-locally linked functions that have `linkonce_odr` or `weak_odr` + /// linkage can be internalized because these linkages guarantee that other + /// definitions with the same name have the same semantics as this one. + /// + /// This version will internalize all the functions in the set \p FnSet at + /// once and then replace the uses. This prevents internalized functions being + /// called by external functions when there is an internalized version in the + /// module. + static bool internalizeFunctions(SmallPtrSetImpl<Function *> &FnSet, + DenseMap<Function *, Function *> &FnMap); + /// Return the data layout associated with the anchor scope. const DataLayout &getDataLayout() const { return InfoCache.DL; } diff --git a/llvm/include/llvm/Transforms/Utils/PredicateInfo.h b/llvm/include/llvm/Transforms/Utils/PredicateInfo.h index c4030735d965..c922476ac79d 100644 --- a/llvm/include/llvm/Transforms/Utils/PredicateInfo.h +++ b/llvm/include/llvm/Transforms/Utils/PredicateInfo.h @@ -51,11 +51,13 @@ #define LLVM_TRANSFORMS_UTILS_PREDICATEINFO_H #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/ilist.h" #include "llvm/ADT/ilist_node.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/PassManager.h" #include "llvm/IR/Value.h" +#include "llvm/IR/ValueHandle.h" #include "llvm/Pass.h" namespace llvm { @@ -176,7 +178,7 @@ public: class PredicateInfo { public: PredicateInfo(Function &, DominatorTree &, AssumptionCache &); - ~PredicateInfo() = default; + ~PredicateInfo(); void verifyPredicateInfo() const; @@ -203,6 +205,8 @@ private: // the Predicate Info, they belong to the ValueInfo structs in the ValueInfos // vector. DenseMap<const Value *, const PredicateBase *> PredicateMap; + // The set of ssa_copy declarations we created with our custom mangling. + SmallSet<AssertingVH<Function>, 20> CreatedDeclarations; }; // This pass does eager building and then printing of PredicateInfo. It is used diff --git a/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h b/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h index 8662dbf385dc..59bf3a342caa 100644 --- a/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h +++ b/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h @@ -83,6 +83,9 @@ class SCEVExpander : public SCEVVisitor<SCEVExpander, Value *> { /// InsertedValues/InsertedPostIncValues. SmallPtrSet<Value *, 16> ReusedValues; + // The induction variables generated. + SmallVector<WeakVH, 2> InsertedIVs; + /// A memoization of the "relevant" loop for a given SCEV. DenseMap<const SCEV *, const Loop *> RelevantLoops; @@ -199,9 +202,11 @@ public: InsertedPostIncValues.clear(); ReusedValues.clear(); ChainedPhis.clear(); + InsertedIVs.clear(); } ScalarEvolution *getSE() { return &SE; } + const SmallVectorImpl<WeakVH> &getInsertedIVs() const { return InsertedIVs; } /// Return a vector containing all instructions inserted during expansion. SmallVector<Instruction *, 32> getAllInsertedInstructions() const { diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp index 23083bc8178e..69ab0052b0a7 100644 --- a/llvm/lib/Analysis/InstructionSimplify.cpp +++ b/llvm/lib/Analysis/InstructionSimplify.cpp @@ -4080,6 +4080,22 @@ static Value *simplifySelectWithICmpCond(Value *CondVal, Value *TrueVal, std::swap(TrueVal, FalseVal); } + // Check for integer min/max with a limit constant: + // X > MIN_INT ? X : MIN_INT --> X + // X < MAX_INT ? X : MAX_INT --> X + if (TrueVal->getType()->isIntOrIntVectorTy()) { + Value *X, *Y; + SelectPatternFlavor SPF = + matchDecomposedSelectPattern(cast<ICmpInst>(CondVal), TrueVal, FalseVal, + X, Y).Flavor; + if (SelectPatternResult::isMinOrMax(SPF) && Pred == getMinMaxPred(SPF)) { + APInt LimitC = getMinMaxLimit(getInverseMinMaxFlavor(SPF), + X->getType()->getScalarSizeInBits()); + if (match(Y, m_SpecificInt(LimitC))) + return X; + } + } + if (Pred == ICmpInst::ICMP_EQ && match(CmpRHS, m_Zero())) { Value *X; const APInt *Y; diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp index 522d21812c6a..6e3ca5c4e08a 100644 --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -6253,6 +6253,16 @@ CmpInst::Predicate llvm::getInverseMinMaxPred(SelectPatternFlavor SPF) { return getMinMaxPred(getInverseMinMaxFlavor(SPF)); } +APInt llvm::getMinMaxLimit(SelectPatternFlavor SPF, unsigned BitWidth) { + switch (SPF) { + case SPF_SMAX: return APInt::getSignedMaxValue(BitWidth); + case SPF_SMIN: return APInt::getSignedMinValue(BitWidth); + case SPF_UMAX: return APInt::getMaxValue(BitWidth); + case SPF_UMIN: return APInt::getMinValue(BitWidth); + default: llvm_unreachable("Unexpected flavor"); + } +} + std::pair<Intrinsic::ID, bool> llvm::canConvertToMinOrMaxIntrinsic(ArrayRef<Value *> VL) { // Check if VL contains select instructions that can be folded into a min/max diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 1bba7232eb14..4f730b2cf372 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -20560,8 +20560,12 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) { // otherwise => (extract_subvec V1, ExtIdx) uint64_t InsIdx = V.getConstantOperandVal(2); if (InsIdx * SmallVT.getScalarSizeInBits() == - ExtIdx * NVT.getScalarSizeInBits()) + ExtIdx * NVT.getScalarSizeInBits()) { + if (LegalOperations && !TLI.isOperationLegal(ISD::BITCAST, NVT)) + return SDValue(); + return DAG.getBitcast(NVT, V.getOperand(1)); + } return DAG.getNode( ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT, DAG.getBitcast(N->getOperand(0).getValueType(), V.getOperand(0)), diff --git a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp index add34eccc1f3..de096f95afcb 100644 --- a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp +++ b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp @@ -677,8 +677,9 @@ calcUniqueIDUpdateFlagsAndSize(const GlobalObject *GO, StringRef SectionName, } if (Retain) { - if (Ctx.getAsmInfo()->useIntegratedAssembler() || - Ctx.getAsmInfo()->binutilsIsAtLeast(2, 36)) + if ((Ctx.getAsmInfo()->useIntegratedAssembler() || + Ctx.getAsmInfo()->binutilsIsAtLeast(2, 36)) && + !TM.getTargetTriple().isOSSolaris()) Flags |= ELF::SHF_GNU_RETAIN; return NextUniqueID++; } @@ -855,8 +856,10 @@ static MCSection *selectELFSectionForGlobal( EmitUniqueSection = true; Flags |= ELF::SHF_LINK_ORDER; } - if (Retain && (Ctx.getAsmInfo()->useIntegratedAssembler() || - Ctx.getAsmInfo()->binutilsIsAtLeast(2, 36))) { + if (Retain && + (Ctx.getAsmInfo()->useIntegratedAssembler() || + Ctx.getAsmInfo()->binutilsIsAtLeast(2, 36)) && + !TM.getTargetTriple().isOSSolaris()) { EmitUniqueSection = true; Flags |= ELF::SHF_GNU_RETAIN; } diff --git a/llvm/lib/IR/ConstantFold.cpp b/llvm/lib/IR/ConstantFold.cpp index 5f05aa2e94e7..e1e28d1230b0 100644 --- a/llvm/lib/IR/ConstantFold.cpp +++ b/llvm/lib/IR/ConstantFold.cpp @@ -349,200 +349,6 @@ static Constant *ExtractConstantBytes(Constant *C, unsigned ByteStart, } } -/// Wrapper around getFoldedSizeOfImpl() that adds caching. -static Constant *getFoldedSizeOf(Type *Ty, Type *DestTy, bool Folded, - DenseMap<Type *, Constant *> &Cache); - -/// Return a ConstantExpr with type DestTy for sizeof on Ty, with any known -/// factors factored out. If Folded is false, return null if no factoring was -/// possible, to avoid endlessly bouncing an unfoldable expression back into the -/// top-level folder. -static Constant *getFoldedSizeOfImpl(Type *Ty, Type *DestTy, bool Folded, - DenseMap<Type *, Constant *> &Cache) { - // This is the actual implementation of getFoldedSizeOf(). To get the caching - // behavior, we need to call getFoldedSizeOf() when we recurse. - - if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) { - Constant *N = ConstantInt::get(DestTy, ATy->getNumElements()); - Constant *E = getFoldedSizeOf(ATy->getElementType(), DestTy, true, Cache); - return ConstantExpr::getNUWMul(E, N); - } - - if (StructType *STy = dyn_cast<StructType>(Ty)) - if (!STy->isPacked()) { - unsigned NumElems = STy->getNumElements(); - // An empty struct has size zero. - if (NumElems == 0) - return ConstantExpr::getNullValue(DestTy); - // Check for a struct with all members having the same size. - Constant *MemberSize = - getFoldedSizeOf(STy->getElementType(0), DestTy, true, Cache); - bool AllSame = true; - for (unsigned i = 1; i != NumElems; ++i) - if (MemberSize != - getFoldedSizeOf(STy->getElementType(i), DestTy, true, Cache)) { - AllSame = false; - break; - } - if (AllSame) { - Constant *N = ConstantInt::get(DestTy, NumElems); - return ConstantExpr::getNUWMul(MemberSize, N); - } - } - - // Pointer size doesn't depend on the pointee type, so canonicalize them - // to an arbitrary pointee. - if (PointerType *PTy = dyn_cast<PointerType>(Ty)) - if (!PTy->getElementType()->isIntegerTy(1)) - return getFoldedSizeOf( - PointerType::get(IntegerType::get(PTy->getContext(), 1), - PTy->getAddressSpace()), - DestTy, true, Cache); - - // If there's no interesting folding happening, bail so that we don't create - // a constant that looks like it needs folding but really doesn't. - if (!Folded) - return nullptr; - - // Base case: Get a regular sizeof expression. - Constant *C = ConstantExpr::getSizeOf(Ty); - C = ConstantExpr::getCast(CastInst::getCastOpcode(C, false, - DestTy, false), - C, DestTy); - return C; -} - -static Constant *getFoldedSizeOf(Type *Ty, Type *DestTy, bool Folded, - DenseMap<Type *, Constant *> &Cache) { - // Check for previously generated folded size constant. - auto It = Cache.find(Ty); - if (It != Cache.end()) - return It->second; - return Cache[Ty] = getFoldedSizeOfImpl(Ty, DestTy, Folded, Cache); -} - -static Constant *getFoldedSizeOf(Type *Ty, Type *DestTy, bool Folded) { - DenseMap<Type *, Constant *> Cache; - return getFoldedSizeOf(Ty, DestTy, Folded, Cache); -} - -/// Return a ConstantExpr with type DestTy for alignof on Ty, with any known -/// factors factored out. If Folded is false, return null if no factoring was -/// possible, to avoid endlessly bouncing an unfoldable expression back into the -/// top-level folder. -static Constant *getFoldedAlignOf(Type *Ty, Type *DestTy, bool Folded) { - // The alignment of an array is equal to the alignment of the - // array element. Note that this is not always true for vectors. - if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) { - Constant *C = ConstantExpr::getAlignOf(ATy->getElementType()); - C = ConstantExpr::getCast(CastInst::getCastOpcode(C, false, - DestTy, - false), - C, DestTy); - return C; - } - - if (StructType *STy = dyn_cast<StructType>(Ty)) { - // Packed structs always have an alignment of 1. - if (STy->isPacked()) - return ConstantInt::get(DestTy, 1); - - // Otherwise, struct alignment is the maximum alignment of any member. - // Without target data, we can't compare much, but we can check to see - // if all the members have the same alignment. - unsigned NumElems = STy->getNumElements(); - // An empty struct has minimal alignment. - if (NumElems == 0) - return ConstantInt::get(DestTy, 1); - // Check for a struct with all members having the same alignment. - Constant *MemberAlign = - getFoldedAlignOf(STy->getElementType(0), DestTy, true); - bool AllSame = true; - for (unsigned i = 1; i != NumElems; ++i) - if (MemberAlign != getFoldedAlignOf(STy->getElementType(i), DestTy, true)) { - AllSame = false; - break; - } - if (AllSame) - return MemberAlign; - } - - // Pointer alignment doesn't depend on the pointee type, so canonicalize them - // to an arbitrary pointee. - if (PointerType *PTy = dyn_cast<PointerType>(Ty)) - if (!PTy->getElementType()->isIntegerTy(1)) - return - getFoldedAlignOf(PointerType::get(IntegerType::get(PTy->getContext(), - 1), - PTy->getAddressSpace()), - DestTy, true); - - // If there's no interesting folding happening, bail so that we don't create - // a constant that looks like it needs folding but really doesn't. - if (!Folded) - return nullptr; - - // Base case: Get a regular alignof expression. - Constant *C = ConstantExpr::getAlignOf(Ty); - C = ConstantExpr::getCast(CastInst::getCastOpcode(C, false, - DestTy, false), - C, DestTy); - return C; -} - -/// Return a ConstantExpr with type DestTy for offsetof on Ty and FieldNo, with -/// any known factors factored out. If Folded is false, return null if no -/// factoring was possible, to avoid endlessly bouncing an unfoldable expression -/// back into the top-level folder. -static Constant *getFoldedOffsetOf(Type *Ty, Constant *FieldNo, Type *DestTy, - bool Folded) { - if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) { - Constant *N = ConstantExpr::getCast(CastInst::getCastOpcode(FieldNo, false, - DestTy, false), - FieldNo, DestTy); - Constant *E = getFoldedSizeOf(ATy->getElementType(), DestTy, true); - return ConstantExpr::getNUWMul(E, N); - } - - if (StructType *STy = dyn_cast<StructType>(Ty)) - if (!STy->isPacked()) { - unsigned NumElems = STy->getNumElements(); - // An empty struct has no members. - if (NumElems == 0) - return nullptr; - // Check for a struct with all members having the same size. - Constant *MemberSize = - getFoldedSizeOf(STy->getElementType(0), DestTy, true); - bool AllSame = true; - for (unsigned i = 1; i != NumElems; ++i) - if (MemberSize != - getFoldedSizeOf(STy->getElementType(i), DestTy, true)) { - AllSame = false; - break; - } - if (AllSame) { - Constant *N = ConstantExpr::getCast(CastInst::getCastOpcode(FieldNo, - false, - DestTy, - false), - FieldNo, DestTy); - return ConstantExpr::getNUWMul(MemberSize, N); - } - } - - // If there's no interesting folding happening, bail so that we don't create - // a constant that looks like it needs folding but really doesn't. - if (!Folded) - return nullptr; - - // Base case: Get a regular offsetof expression. - Constant *C = ConstantExpr::getOffsetOf(Ty, FieldNo); - C = ConstantExpr::getCast(CastInst::getCastOpcode(C, false, - DestTy, false), - C, DestTy); - return C; -} - Constant *llvm::ConstantFoldCastInstruction(unsigned opc, Constant *V, Type *DestTy) { if (isa<PoisonValue>(V)) @@ -666,53 +472,6 @@ Constant *llvm::ConstantFoldCastInstruction(unsigned opc, Constant *V, // Is it a null pointer value? if (V->isNullValue()) return ConstantInt::get(DestTy, 0); - // If this is a sizeof-like expression, pull out multiplications by - // known factors to expose them to subsequent folding. If it's an - // alignof-like expression, factor out known factors. - if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) - if (CE->getOpcode() == Instruction::GetElementPtr && - CE->getOperand(0)->isNullValue()) { - // FIXME: Looks like getFoldedSizeOf(), getFoldedOffsetOf() and - // getFoldedAlignOf() don't handle the case when DestTy is a vector of - // pointers yet. We end up in asserts in CastInst::getCastOpcode (see - // test/Analysis/ConstantFolding/cast-vector.ll). I've only seen this - // happen in one "real" C-code test case, so it does not seem to be an - // important optimization to handle vectors here. For now, simply bail - // out. - if (DestTy->isVectorTy()) - return nullptr; - GEPOperator *GEPO = cast<GEPOperator>(CE); - Type *Ty = GEPO->getSourceElementType(); - if (CE->getNumOperands() == 2) { - // Handle a sizeof-like expression. - Constant *Idx = CE->getOperand(1); - bool isOne = isa<ConstantInt>(Idx) && cast<ConstantInt>(Idx)->isOne(); - if (Constant *C = getFoldedSizeOf(Ty, DestTy, !isOne)) { - Idx = ConstantExpr::getCast(CastInst::getCastOpcode(Idx, true, - DestTy, false), - Idx, DestTy); - return ConstantExpr::getMul(C, Idx); - } - } else if (CE->getNumOperands() == 3 && - CE->getOperand(1)->isNullValue()) { - // Handle an alignof-like expression. - if (StructType *STy = dyn_cast<StructType>(Ty)) - if (!STy->isPacked()) { - ConstantInt *CI = cast<ConstantInt>(CE->getOperand(2)); - if (CI->isOne() && - STy->getNumElements() == 2 && - STy->getElementType(0)->isIntegerTy(1)) { - return getFoldedAlignOf(STy->getElementType(1), DestTy, false); - } - } - // Handle an offsetof-like expression. - if (Ty->isStructTy() || Ty->isArrayTy()) { - if (Constant *C = getFoldedOffsetOf(Ty, CE->getOperand(2), - DestTy, false)) - return C; - } - } - } // Other pointer types cannot be casted return nullptr; case Instruction::UIToFP: diff --git a/llvm/lib/IR/Module.cpp b/llvm/lib/IR/Module.cpp index 7c18dc0ed299..63ea41fba89a 100644 --- a/llvm/lib/IR/Module.cpp +++ b/llvm/lib/IR/Module.cpp @@ -114,6 +114,10 @@ GlobalValue *Module::getNamedValue(StringRef Name) const { return cast_or_null<GlobalValue>(getValueSymbolTable().lookup(Name)); } +unsigned Module::getNumNamedValues() const { + return getValueSymbolTable().size(); +} + /// getMDKindID - Return a unique non-zero ID for the specified metadata kind. /// This ID is uniqued across modules in the current LLVMContext. unsigned Module::getMDKindID(StringRef Name) const { diff --git a/llvm/lib/ProfileData/InstrProfReader.cpp b/llvm/lib/ProfileData/InstrProfReader.cpp index 8a4470ae207d..a0460062f307 100644 --- a/llvm/lib/ProfileData/InstrProfReader.cpp +++ b/llvm/lib/ProfileData/InstrProfReader.cpp @@ -366,6 +366,7 @@ Error RawInstrProfReader<IntPtrT>::readHeader( if (GET_VERSION(Version) != RawInstrProf::Version) return error(instrprof_error::unsupported_version); + BinaryIdsSize = swap(Header.BinaryIdsSize); CountersDelta = swap(Header.CountersDelta); NamesDelta = swap(Header.NamesDelta); auto DataSize = swap(Header.DataSize); @@ -374,7 +375,6 @@ Error RawInstrProfReader<IntPtrT>::readHeader( auto PaddingBytesAfterCounters = swap(Header.PaddingBytesAfterCounters); NamesSize = swap(Header.NamesSize); ValueKindLast = swap(Header.ValueKindLast); - BinaryIdsSize = swap(Header.BinaryIdsSize); auto DataSizeInBytes = DataSize * sizeof(RawInstrProf::ProfileData<IntPtrT>); auto PaddingSize = getNumPaddingBytes(NamesSize); diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index ca6b87a5ebb0..b27a02b8c182 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -4353,8 +4353,13 @@ SDValue AArch64TargetLowering::LowerMGATHER(SDValue Op, if (IsFixedLength) { assert(Subtarget->useSVEForFixedLengthVectors() && "Cannot lower when not using SVE for fixed vectors"); - IndexVT = getContainerForFixedLengthVector(DAG, IndexVT); - MemVT = IndexVT.changeVectorElementType(MemVT.getVectorElementType()); + if (MemVT.getScalarSizeInBits() <= IndexVT.getScalarSizeInBits()) { + IndexVT = getContainerForFixedLengthVector(DAG, IndexVT); + MemVT = IndexVT.changeVectorElementType(MemVT.getVectorElementType()); + } else { + MemVT = getContainerForFixedLengthVector(DAG, MemVT); + IndexVT = MemVT.changeTypeToInteger(); + } InputVT = DAG.getValueType(MemVT.changeTypeToInteger()); Mask = DAG.getNode( ISD::ZERO_EXTEND, DL, @@ -4453,8 +4458,13 @@ SDValue AArch64TargetLowering::LowerMSCATTER(SDValue Op, if (IsFixedLength) { assert(Subtarget->useSVEForFixedLengthVectors() && "Cannot lower when not using SVE for fixed vectors"); - IndexVT = getContainerForFixedLengthVector(DAG, IndexVT); - MemVT = IndexVT.changeVectorElementType(MemVT.getVectorElementType()); + if (MemVT.getScalarSizeInBits() <= IndexVT.getScalarSizeInBits()) { + IndexVT = getContainerForFixedLengthVector(DAG, IndexVT); + MemVT = IndexVT.changeVectorElementType(MemVT.getVectorElementType()); + } else { + MemVT = getContainerForFixedLengthVector(DAG, MemVT); + IndexVT = MemVT.changeTypeToInteger(); + } InputVT = DAG.getValueType(MemVT.changeTypeToInteger()); StoreVal = diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp index b03d421d3e6d..091a62aa4ada 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -1120,6 +1120,16 @@ bool AArch64InstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg, if (!MI.getOperand(1).isReg()) return false; + auto NormalizeCmpValue = [](int64_t Value) -> int { + // Comparison immediates may be 64-bit, but CmpValue is only an int. + // Normalize to 0/1/2 return value, where 2 indicates any value apart from + // 0 or 1. + // TODO: Switch CmpValue to int64_t in the API to avoid this. + if (Value == 0 || Value == 1) + return Value; + return 2; + }; + switch (MI.getOpcode()) { default: break; @@ -1155,8 +1165,7 @@ bool AArch64InstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg, SrcReg = MI.getOperand(1).getReg(); SrcReg2 = 0; CmpMask = ~0; - // FIXME: In order to convert CmpValue to 0 or 1 - CmpValue = MI.getOperand(2).getImm() != 0; + CmpValue = NormalizeCmpValue(MI.getOperand(2).getImm()); return true; case AArch64::ANDSWri: case AArch64::ANDSXri: @@ -1165,14 +1174,9 @@ bool AArch64InstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg, SrcReg = MI.getOperand(1).getReg(); SrcReg2 = 0; CmpMask = ~0; - // FIXME:The return val type of decodeLogicalImmediate is uint64_t, - // while the type of CmpValue is int. When converting uint64_t to int, - // the high 32 bits of uint64_t will be lost. - // In fact it causes a bug in spec2006-483.xalancbmk - // CmpValue is only used to compare with zero in OptimizeCompareInstr - CmpValue = AArch64_AM::decodeLogicalImmediate( + CmpValue = NormalizeCmpValue(AArch64_AM::decodeLogicalImmediate( MI.getOperand(2).getImm(), - MI.getOpcode() == AArch64::ANDSWri ? 32 : 64) != 0; + MI.getOpcode() == AArch64::ANDSWri ? 32 : 64)); return true; } @@ -1462,10 +1466,9 @@ bool AArch64InstrInfo::optimizeCompareInstr( if (CmpInstr.getOpcode() == AArch64::PTEST_PP) return optimizePTestInstr(&CmpInstr, SrcReg, SrcReg2, MRI); - // Continue only if we have a "ri" where immediate is zero. - // FIXME:CmpValue has already been converted to 0 or 1 in analyzeCompare - // function. - assert((CmpValue == 0 || CmpValue == 1) && "CmpValue must be 0 or 1!"); + // Warning: CmpValue == 2 indicates *any* value apart from 0 or 1. + assert((CmpValue == 0 || CmpValue == 1 || CmpValue == 2) && + "CmpValue must be 0, 1, or 2!"); if (SrcReg2 != 0) return false; @@ -1473,9 +1476,10 @@ bool AArch64InstrInfo::optimizeCompareInstr( if (!MRI->use_nodbg_empty(CmpInstr.getOperand(0).getReg())) return false; - if (!CmpValue && substituteCmpToZero(CmpInstr, SrcReg, *MRI)) + if (CmpValue == 0 && substituteCmpToZero(CmpInstr, SrcReg, *MRI)) return true; - return removeCmpToZeroOrOne(CmpInstr, SrcReg, CmpValue, *MRI); + return (CmpValue == 0 || CmpValue == 1) && + removeCmpToZeroOrOne(CmpInstr, SrcReg, CmpValue, *MRI); } /// Get opcode of S version of Instr. diff --git a/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp index 2167ad5d7467..e68a3aa8bf47 100644 --- a/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ b/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -1647,7 +1647,7 @@ bool ARMExpandPseudo::ExpandCMP_SWAP(MachineBasicBlock &MBB, "CMP_SWAP not expected to be custom expanded for Thumb1"); assert((UxtOp == 0 || UxtOp == ARM::tUXTB || UxtOp == ARM::tUXTH) && "ARMv8-M.baseline does not have t2UXTB/t2UXTH"); - assert(ARM::tGPRRegClass.contains(DesiredReg) && + assert((UxtOp == 0 || ARM::tGPRRegClass.contains(DesiredReg)) && "DesiredReg used for UXT op must be tGPR"); } diff --git a/llvm/lib/Target/BPF/BPFTargetTransformInfo.h b/llvm/lib/Target/BPF/BPFTargetTransformInfo.h index 3bc5556a62f4..417e8b6ffec3 100644 --- a/llvm/lib/Target/BPF/BPFTargetTransformInfo.h +++ b/llvm/lib/Target/BPF/BPFTargetTransformInfo.h @@ -54,6 +54,24 @@ public: return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, I); } + + InstructionCost getArithmeticInstrCost( + unsigned Opcode, Type *Ty, + TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput, + TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue, + TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue, + TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None, + TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None, + ArrayRef<const Value *> Args = ArrayRef<const Value *>(), + const Instruction *CxtI = nullptr) { + int ISD = TLI->InstructionOpcodeToISD(Opcode); + if (ISD == ISD::ADD && CostKind == TTI::TCK_RecipThroughput) + return SCEVCheapExpansionBudget.getValue() + 1; + + return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Opd1Info, + Opd2Info, Opd1PropInfo, + Opd2PropInfo); + } }; } // end namespace llvm diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp index d5a7873bd056..abf5b213bbac 100644 --- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp @@ -485,6 +485,9 @@ bool PPCTTIImpl::mightUseCTR(BasicBlock *BB, TargetLibraryInfo *LibInfo, case Intrinsic::experimental_constrained_sin: case Intrinsic::experimental_constrained_cos: return true; + // There is no corresponding FMA instruction for PPC double double. + // Thus, we need to disable CTR loop generation for this type. + case Intrinsic::fmuladd: case Intrinsic::copysign: if (CI->getArgOperand(0)->getType()->getScalarType()-> isPPC_FP128Ty()) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoV.td b/llvm/lib/Target/RISCV/RISCVInstrInfoV.td index 342497150d49..8af3c8f5cfdb 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoV.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoV.td @@ -78,6 +78,39 @@ def simm5_plus1 : Operand<XLenVT>, ImmLeaf<XLenVT, } //===----------------------------------------------------------------------===// +// Scheduling definitions. +//===----------------------------------------------------------------------===// + +class VMVRSched<int n>: Sched <[!cast<SchedReadWrite>("WriteVMov" # n # "V"), + !cast<SchedReadWrite>("ReadVMov" # n # "V")]>; + +class VLESched<int n> : Sched <[!cast<SchedReadWrite>("WriteVLDE" # n), + ReadVLDX, ReadVMask]>; + +class VSESched<int n> : Sched <[!cast<SchedReadWrite>("WriteVSTE" # n), + !cast<SchedReadWrite>("ReadVSTE" # n # "V"), + ReadVSTX, ReadVMask]>; + +class VLSSched<int n> : Sched <[!cast<SchedReadWrite>("WriteVLDS" # n), + ReadVLDX, ReadVLDSX, ReadVMask]>; + +class VSSSched<int n> : Sched <[!cast<SchedReadWrite>("WriteVSTS" # n), + !cast<SchedReadWrite>("ReadVSTS" # n # "V"), + ReadVSTX, ReadVSTSX, ReadVMask]>; + +class VLXSched<int n, string o> : + Sched <[!cast<SchedReadWrite>("WriteVLD" # o # "X" # n), + ReadVLDX, !cast<SchedReadWrite>("ReadVLD" # o # "XV"), ReadVMask]>; + +class VSXSched<int n, string o> : + Sched <[!cast<SchedReadWrite>("WriteVST" # o # "X" # n), + !cast<SchedReadWrite>("ReadVST" # o # "X" # n), + ReadVSTX, !cast<SchedReadWrite>("ReadVST" # o # "XV"), ReadVMask]>; + +class VLFSched<int n> : Sched <[!cast<SchedReadWrite>("WriteVLDFF" # n), + ReadVLDX, ReadVMask]>; + +//===----------------------------------------------------------------------===// // Instruction class templates //===----------------------------------------------------------------------===// @@ -328,106 +361,417 @@ class VAMONoWd<RISCVAMOOP amoop, RISCVWidth width, string opcodestr> // Use these multiclasses to define instructions more easily. //===----------------------------------------------------------------------===// multiclass VALU_IV_V_X_I<string opcodestr, bits<6> funct6, Operand optype = simm5, string vw = "v"> { - def V : VALUVV<funct6, OPIVV, opcodestr # "." # vw # "v">; - def X : VALUVX<funct6, OPIVX, opcodestr # "." # vw # "x">; - def I : VALUVI<funct6, opcodestr # "." # vw # "i", optype>; + def V : VALUVV<funct6, OPIVV, opcodestr # "." # vw # "v">, + Sched<[WriteVIALUV, ReadVIALUV, ReadVIALUV, ReadVMask]>; + def X : VALUVX<funct6, OPIVX, opcodestr # "." # vw # "x">, + Sched<[WriteVIALUX, ReadVIALUV, ReadVIALUX, ReadVMask]>; + def I : VALUVI<funct6, opcodestr # "." # vw # "i", optype>, + Sched<[WriteVIALUI, ReadVIALUV, ReadVMask]>; } multiclass VALU_IV_V_X<string opcodestr, bits<6> funct6, string vw = "v"> { - def V : VALUVV<funct6, OPIVV, opcodestr # "." # vw # "v">; - def X : VALUVX<funct6, OPIVX, opcodestr # "." # vw # "x">; + def V : VALUVV<funct6, OPIVV, opcodestr # "." # vw # "v">, + Sched<[WriteVIALUV, ReadVIALUV, ReadVIALUV, ReadVMask]>; + def X : VALUVX<funct6, OPIVX, opcodestr # "." # vw # "x">, + Sched<[WriteVIALUX, ReadVIALUV, ReadVIALUX, ReadVMask]>; } -multiclass VALUr_IV_V_X<string opcodestr, bits<6> funct6, string vw = "v"> { - def V : VALUrVV<funct6, OPIVV, opcodestr # "." # vw # "v">; - def X : VALUrVX<funct6, OPIVX, opcodestr # "." # vw # "x">; +multiclass VALU_IV_X_I<string opcodestr, bits<6> funct6, Operand optype = simm5, string vw = "v"> { + def X : VALUVX<funct6, OPIVX, opcodestr # "." # vw # "x">, + Sched<[WriteVIALUV, ReadVIALUV, ReadVIALUX, ReadVMask]>; + def I : VALUVI<funct6, opcodestr # "." # vw # "i", optype>, + Sched<[WriteVIALUI, ReadVIALUV, ReadVMask]>; } -multiclass VALU_IV_X_I<string opcodestr, bits<6> funct6, Operand optype = simm5, string vw = "v"> { - def X : VALUVX<funct6, OPIVX, opcodestr # "." # vw # "x">; - def I : VALUVI<funct6, opcodestr # "." # vw # "i", optype>; +multiclass VALU_MV_V_X<string opcodestr, bits<6> funct6, string vw = "v"> { + def V : VALUVV<funct6, OPMVV, opcodestr # "." # vw # "v">, + Sched<[WriteVIWALUV, ReadVIWALUV, ReadVIWALUV, ReadVMask]>; + def X : VALUVX<funct6, OPMVX, opcodestr # "." # vw # "x">, + Sched<[WriteVIWALUX, ReadVIWALUV, ReadVIWALUX, ReadVMask]>; } -multiclass VALU_IV_V<string opcodestr, bits<6> funct6> { - def _VS : VALUVV<funct6, OPIVV, opcodestr # ".vs">; +multiclass VMAC_MV_V_X<string opcodestr, bits<6> funct6, string vw = "v"> { + def V : VALUrVV<funct6, OPMVV, opcodestr # "." # vw # "v">, + Sched<[WriteVIMulAddV, ReadVIMulAddV, ReadVIMulAddV, ReadVMask]>; + def X : VALUrVX<funct6, OPMVX, opcodestr # "." # vw # "x">, + Sched<[WriteVIMulAddX, ReadVIMulAddV, ReadVIMulAddX, ReadVMask]>; } -multiclass VALUr_IV_X<string opcodestr, bits<6> funct6, string vw = "v"> { - def X : VALUrVX<funct6, OPIVX, opcodestr # "." # vw # "x">; +multiclass VWMAC_MV_V_X<string opcodestr, bits<6> funct6, string vw = "v"> { + def V : VALUrVV<funct6, OPMVV, opcodestr # "." # vw # "v">, + Sched<[WriteVIWMulAddV, ReadVIWMulAddV, ReadVIWMulAddV, ReadVMask]>; + def X : VALUrVX<funct6, OPMVX, opcodestr # "." # vw # "x">, + Sched<[WriteVIWMulAddX, ReadVIWMulAddV, ReadVIWMulAddX, ReadVMask]>; } -multiclass VALU_MV_V_X<string opcodestr, bits<6> funct6, string vw = "v"> { - def V : VALUVV<funct6, OPMVV, opcodestr # "." # vw # "v">; - def X : VALUVX<funct6, OPMVX, opcodestr # "." # vw # "x">; +multiclass VWMAC_MV_X<string opcodestr, bits<6> funct6, string vw = "v"> { + def X : VALUrVX<funct6, OPMVX, opcodestr # "." # vw # "x">, + Sched<[WriteVIWMulAddX, ReadVIWMulAddV, ReadVIWMulAddX, ReadVMask]>; +} + +multiclass VALU_MV_VS2<string opcodestr, bits<6> funct6, bits<5> vs1> { + def "" : VALUVs2<funct6, vs1, OPMVV, opcodestr>, + Sched<[WriteVExtV, ReadVExtV, ReadVMask]>; +} + +multiclass VALUm_IV_V_X_I<string opcodestr, bits<6> funct6> { + def VM : VALUmVV<funct6, OPIVV, opcodestr # ".vvm">, + Sched<[WriteVICALUV, ReadVIALUCV, ReadVIALUCV, ReadVMask]>; + def XM : VALUmVX<funct6, OPIVX, opcodestr # ".vxm">, + Sched<[WriteVICALUX, ReadVIALUCV, ReadVIALUCX, ReadVMask]>; + def IM : VALUmVI<funct6, opcodestr # ".vim">, + Sched<[WriteVICALUI, ReadVIALUCV, ReadVMask]>; } -multiclass VALU_MV_V<string opcodestr, bits<6> funct6> { - def _VS : VALUVV<funct6, OPMVV, opcodestr # ".vs">; +multiclass VMRG_IV_V_X_I<string opcodestr, bits<6> funct6> { + def VM : VALUmVV<funct6, OPIVV, opcodestr # ".vvm">, + Sched<[WriteVIMergeV, ReadVIMergeV, ReadVIMergeV, ReadVMask]>; + def XM : VALUmVX<funct6, OPIVX, opcodestr # ".vxm">, + Sched<[WriteVIMergeX, ReadVIMergeV, ReadVIMergeX, ReadVMask]>; + def IM : VALUmVI<funct6, opcodestr # ".vim">, + Sched<[WriteVIMergeI, ReadVIMergeV, ReadVMask]>; } -multiclass VALU_MV_Mask<string opcodestr, bits<6> funct6, string vm = "v"> { - def M : VALUVVNoVm<funct6, OPMVV, opcodestr # "." # vm # "m">; +multiclass VALUm_IV_V_X<string opcodestr, bits<6> funct6> { + def VM : VALUmVV<funct6, OPIVV, opcodestr # ".vvm">, + Sched<[WriteVICALUV, ReadVIALUCV, ReadVIALUCV, ReadVMask]>; + def XM : VALUmVX<funct6, OPIVX, opcodestr # ".vxm">, + Sched<[WriteVICALUX, ReadVIALUCV, ReadVIALUCX, ReadVMask]>; } -multiclass VALU_MV_X<string opcodestr, bits<6> funct6, string vw = "v"> { - def X : VALUVX<funct6, OPMVX, opcodestr # "." # vw # "x">; +multiclass VALUNoVm_IV_V_X_I<string opcodestr, bits<6> funct6, Operand optype = simm5> { + def V : VALUVVNoVm<funct6, OPIVV, opcodestr # ".vv">, + Sched<[WriteVICALUV, ReadVIALUCV, ReadVIALUCV]>; + def X : VALUVXNoVm<funct6, OPIVX, opcodestr # ".vx">, + Sched<[WriteVICALUX, ReadVIALUCV, ReadVIALUCX]>; + def I : VALUVINoVm<funct6, opcodestr # ".vi", optype>, + Sched<[WriteVICALUI, ReadVIALUCV]>; } -multiclass VALUr_MV_V_X<string opcodestr, bits<6> funct6, string vw = "v"> { - def V : VALUrVV<funct6, OPMVV, opcodestr # "." # vw # "v">; - def X : VALUrVX<funct6, OPMVX, opcodestr # "." # vw # "x">; +multiclass VALUNoVm_IV_V_X<string opcodestr, bits<6> funct6> { + def V : VALUVVNoVm<funct6, OPIVV, opcodestr # ".vv">, + Sched<[WriteVICALUV, ReadVIALUCV, ReadVIALUCV]>; + def X : VALUVXNoVm<funct6, OPIVX, opcodestr # ".vx">, + Sched<[WriteVICALUX, ReadVIALUCV, ReadVIALUCX]>; } -multiclass VALUr_MV_X<string opcodestr, bits<6> funct6, string vw = "v"> { - def X : VALUrVX<funct6, OPMVX, opcodestr # "." # vw # "x">; +multiclass VALU_FV_V_F<string opcodestr, bits<6> funct6, string vw = "v"> { + def V : VALUVV<funct6, OPFVV, opcodestr # "." # vw # "v">, + Sched<[WriteVFALUV, ReadVFALUV, ReadVFALUV, ReadVMask]>; + def F : VALUVF<funct6, OPFVF, opcodestr # "." # vw # "f">, + Sched<[WriteVFALUF, ReadVFALUV, ReadVFALUF, ReadVMask]>; } -multiclass VALU_MV_VS2<string opcodestr, bits<6> funct6, bits<5> vs1> { - def "" : VALUVs2<funct6, vs1, OPMVV, opcodestr>; +multiclass VALU_FV_F<string opcodestr, bits<6> funct6, string vw = "v"> { + def F : VALUVF<funct6, OPFVF, opcodestr # "." # vw # "f">, + Sched<[WriteVFALUF, ReadVFALUV, ReadVFALUF, ReadVMask]>; } -multiclass VALUm_IV_V_X_I<string opcodestr, bits<6> funct6> { - def VM : VALUmVV<funct6, OPIVV, opcodestr # ".vvm">; - def XM : VALUmVX<funct6, OPIVX, opcodestr # ".vxm">; - def IM : VALUmVI<funct6, opcodestr # ".vim">; +multiclass VWALU_FV_V_F<string opcodestr, bits<6> funct6, string vw = "v"> { + def V : VALUVV<funct6, OPFVV, opcodestr # "." # vw # "v">, + Sched<[WriteVFWALUV, ReadVFWALUV, ReadVFWALUV, ReadVMask]>; + def F : VALUVF<funct6, OPFVF, opcodestr # "." # vw # "f">, + Sched<[WriteVFWALUF, ReadVFWALUV, ReadVFWALUF, ReadVMask]>; } -multiclass VALUm_IV_V_X<string opcodestr, bits<6> funct6> { - def VM : VALUmVV<funct6, OPIVV, opcodestr # ".vvm">; - def XM : VALUmVX<funct6, OPIVX, opcodestr # ".vxm">; +multiclass VMUL_FV_V_F<string opcodestr, bits<6> funct6, string vw = "v"> { + def V : VALUVV<funct6, OPFVV, opcodestr # "." # vw # "v">, + Sched<[WriteVFMulV, ReadVFMulV, ReadVFMulV, ReadVMask]>; + def F : VALUVF<funct6, OPFVF, opcodestr # "." # vw # "f">, + Sched<[WriteVFMulF, ReadVFMulV, ReadVFMulF, ReadVMask]>; } -multiclass VALUNoVm_IV_V_X_I<string opcodestr, bits<6> funct6, Operand optype = simm5> { - def V : VALUVVNoVm<funct6, OPIVV, opcodestr # ".vv">; - def X : VALUVXNoVm<funct6, OPIVX, opcodestr # ".vx">; - def I : VALUVINoVm<funct6, opcodestr # ".vi", optype>; +multiclass VDIV_FV_V_F<string opcodestr, bits<6> funct6, string vw = "v"> { + def V : VALUVV<funct6, OPFVV, opcodestr # "." # vw # "v">, + Sched<[WriteVFDivV, ReadVFDivV, ReadVFDivV, ReadVMask]>; + def F : VALUVF<funct6, OPFVF, opcodestr # "." # vw # "f">, + Sched<[WriteVFDivF, ReadVFDivV, ReadVFDivF, ReadVMask]>; } -multiclass VALUNoVm_IV_V_X<string opcodestr, bits<6> funct6> { - def V : VALUVVNoVm<funct6, OPIVV, opcodestr # ".vv">; - def X : VALUVXNoVm<funct6, OPIVX, opcodestr # ".vx">; +multiclass VRDIV_FV_F<string opcodestr, bits<6> funct6, string vw = "v"> { + def F : VALUVF<funct6, OPFVF, opcodestr # "." # vw # "f">, + Sched<[WriteVFDivF, ReadVFDivV, ReadVFDivF, ReadVMask]>; } -multiclass VALU_FV_V_F<string opcodestr, bits<6> funct6, string vw = "v"> { - def V : VALUVV<funct6, OPFVV, opcodestr # "." # vw # "v">; - def F : VALUVF<funct6, OPFVF, opcodestr # "." # vw # "f">; +multiclass VWMUL_FV_V_F<string opcodestr, bits<6> funct6, string vw = "v"> { + def V : VALUVV<funct6, OPFVV, opcodestr # "." # vw # "v">, + Sched<[WriteVFWMulV, ReadVFWMulV, ReadVFWMulV, ReadVMask]>; + def F : VALUVF<funct6, OPFVF, opcodestr # "." # vw # "f">, + Sched<[WriteVFWMulF, ReadVFWMulV, ReadVFWMulF, ReadVMask]>; } -multiclass VALU_FV_F<string opcodestr, bits<6> funct6, string vw = "v"> { - def F : VALUVF<funct6, OPFVF, opcodestr # "." # vw # "f">; +multiclass VMAC_FV_V_F<string opcodestr, bits<6> funct6, string vw = "v"> { + def V : VALUrVV<funct6, OPFVV, opcodestr # "." # vw # "v">, + Sched<[WriteVFMulAddV, ReadVFMulAddV, ReadVFMulAddV, ReadVMask]>; + def F : VALUrVF<funct6, OPFVF, opcodestr # "." # vw # "f">, + Sched<[WriteVFMulAddF, ReadVFMulAddV, ReadVFMulAddF, ReadVMask]>; +} + +multiclass VWMAC_FV_V_F<string opcodestr, bits<6> funct6, string vw = "v"> { + def V : VALUrVV<funct6, OPFVV, opcodestr # "." # vw # "v">, + Sched<[WriteVFWMulAddV, ReadVFWMulAddV, ReadVFWMulAddV, ReadVMask]>; + def F : VALUrVF<funct6, OPFVF, opcodestr # "." # vw # "f">, + Sched<[WriteVFWMulAddF, ReadVFWMulAddV, ReadVFWMulAddF, ReadVMask]>; +} + +multiclass VSQR_FV_VS2<string opcodestr, bits<6> funct6, bits<5> vs1> { + def "" : VALUVs2<funct6, vs1, OPFVV, opcodestr>, + Sched<[WriteVFSqrtV, ReadVFSqrtV, ReadVMask]>; +} + +multiclass VRCP_FV_VS2<string opcodestr, bits<6> funct6, bits<5> vs1> { + def "" : VALUVs2<funct6, vs1, OPFVV, opcodestr>, + Sched<[WriteVFRecpV, ReadVFRecpV, ReadVMask]>; +} + +multiclass VCMP_FV_V_F<string opcodestr, bits<6> funct6, string vw = "v"> { + def V : VALUVV<funct6, OPFVV, opcodestr # "." # vw # "v">, + Sched<[WriteVFCmpV, ReadVFCmpV, ReadVFCmpV, ReadVMask]>; + def F : VALUVF<funct6, OPFVF, opcodestr # "." # vw # "f">, + Sched<[WriteVFCmpF, ReadVFCmpV, ReadVFCmpF, ReadVMask]>; +} + +multiclass VCMP_FV_F<string opcodestr, bits<6> funct6, string vw = "v"> { + def F : VALUVF<funct6, OPFVF, opcodestr # "." # vw # "f">, + Sched<[WriteVFCmpF, ReadVFCmpV, ReadVFCmpF, ReadVMask]>; +} + +multiclass VSGNJ_FV_V_F<string opcodestr, bits<6> funct6, string vw = "v"> { + def V : VALUVV<funct6, OPFVV, opcodestr # "." # vw # "v">, + Sched<[WriteVFSgnjV, ReadVFSgnjV, ReadVFSgnjV, ReadVMask]>; + def F : VALUVF<funct6, OPFVF, opcodestr # "." # vw # "f">, + Sched<[WriteVFSgnjF, ReadVFSgnjV, ReadVFSgnjF, ReadVMask]>; +} + +multiclass VCLS_FV_VS2<string opcodestr, bits<6> funct6, bits<5> vs1> { + def "" : VALUVs2<funct6, vs1, OPFVV, opcodestr>, + Sched<[WriteVFClassV, ReadVFClassV, ReadVMask]>; +} + +multiclass VCVTF_IV_VS2<string opcodestr, bits<6> funct6, bits<5> vs1> { + def "" : VALUVs2<funct6, vs1, OPFVV, opcodestr>, + Sched<[WriteVFCvtIToFV, ReadVFCvtIToFV, ReadVMask]>; +} + +multiclass VCVTI_FV_VS2<string opcodestr, bits<6> funct6, bits<5> vs1> { + def "" : VALUVs2<funct6, vs1, OPFVV, opcodestr>, + Sched<[WriteVFCvtFToIV, ReadVFCvtFToIV, ReadVMask]>; +} + +multiclass VWCVTF_IV_VS2<string opcodestr, bits<6> funct6, bits<5> vs1> { + def "" : VALUVs2<funct6, vs1, OPFVV, opcodestr>, + Sched<[WriteVFWCvtIToFV, ReadVFWCvtIToFV, ReadVMask]>; +} + +multiclass VWCVTI_FV_VS2<string opcodestr, bits<6> funct6, bits<5> vs1> { + def "" : VALUVs2<funct6, vs1, OPFVV, opcodestr>, + Sched<[WriteVFWCvtFToIV, ReadVFWCvtFToIV, ReadVMask]>; +} + +multiclass VWCVTF_FV_VS2<string opcodestr, bits<6> funct6, bits<5> vs1> { + def "" : VALUVs2<funct6, vs1, OPFVV, opcodestr>, + Sched<[WriteVFWCvtFToFV, ReadVFWCvtFToFV, ReadVMask]>; +} + +multiclass VNCVTF_IV_VS2<string opcodestr, bits<6> funct6, bits<5> vs1> { + def "" : VALUVs2<funct6, vs1, OPFVV, opcodestr>, + Sched<[WriteVFNCvtIToFV, ReadVFNCvtIToFV, ReadVMask]>; +} + +multiclass VNCVTI_FV_VS2<string opcodestr, bits<6> funct6, bits<5> vs1> { + def "" : VALUVs2<funct6, vs1, OPFVV, opcodestr>, + Sched<[WriteVFNCvtFToIV, ReadVFNCvtFToIV, ReadVMask]>; +} + +multiclass VNCVTF_FV_VS2<string opcodestr, bits<6> funct6, bits<5> vs1> { + def "" : VALUVs2<funct6, vs1, OPFVV, opcodestr>, + Sched<[WriteVFNCvtFToFV, ReadVFNCvtFToFV, ReadVMask]>; +} + +multiclass VRED_MV_V<string opcodestr, bits<6> funct6> { + def _VS : VALUVV<funct6, OPMVV, opcodestr # ".vs">, + Sched<[WriteVIRedV, ReadVIRedV, ReadVIRedV0, ReadVMask]>; +} + +multiclass VWRED_IV_V<string opcodestr, bits<6> funct6> { + def _VS : VALUVV<funct6, OPIVV, opcodestr # ".vs">, + Sched<[WriteVIWRedV, ReadVIWRedV, ReadVIWRedV0, ReadVMask]>; +} + +multiclass VRED_FV_V<string opcodestr, bits<6> funct6> { + def _VS : VALUVV<funct6, OPFVV, opcodestr # ".vs">, + Sched<[WriteVFRedV, ReadVFRedV, ReadVFRedV0, ReadVMask]>; +} + +multiclass VREDO_FV_V<string opcodestr, bits<6> funct6> { + def _VS : VALUVV<funct6, OPFVV, opcodestr # ".vs">, + Sched<[WriteVFRedOV, ReadVFRedOV, ReadVFRedOV0, ReadVMask]>; +} + +multiclass VWRED_FV_V<string opcodestr, bits<6> funct6> { + def _VS : VALUVV<funct6, OPFVV, opcodestr # ".vs">, + Sched<[WriteVFWRedV, ReadVFWRedV, ReadVFWRedV0, ReadVMask]>; +} + +multiclass VWREDO_FV_V<string opcodestr, bits<6> funct6> { + def _VS : VALUVV<funct6, OPFVV, opcodestr # ".vs">, + Sched<[WriteVFWRedOV, ReadVFWRedOV, ReadVFWRedOV0, ReadVMask]>; } -multiclass VALUr_FV_V_F<string opcodestr, bits<6> funct6, string vw = "v"> { - def V : VALUrVV<funct6, OPFVV, opcodestr # "." # vw # "v">; - def F : VALUrVF<funct6, OPFVF, opcodestr # "." # vw # "f">; +multiclass VMALU_MV_Mask<string opcodestr, bits<6> funct6, string vm = "v"> { + def M : VALUVVNoVm<funct6, OPMVV, opcodestr # "." # vm # "m">, + Sched<[WriteVMALUV, ReadVMALUV, ReadVMALUV]>; } -multiclass VALU_FV_V<string opcodestr, bits<6> funct6> { - def _VS : VALUVV<funct6, OPFVV, opcodestr # ".vs">; +multiclass VMSFS_MV_V<string opcodestr, bits<6> funct6, bits<5> vs1> { + def "" : VALUVs2<funct6, vs1, OPMVV, opcodestr>, + Sched<[WriteVMSFSV, ReadVMSFSV, ReadVMask]>; } -multiclass VALU_FV_VS2<string opcodestr, bits<6> funct6, bits<5> vs1> { - def "" : VALUVs2<funct6, vs1, OPFVV, opcodestr>; +multiclass VMIOT_MV_V<string opcodestr, bits<6> funct6, bits<5> vs1> { + def "" : VALUVs2<funct6, vs1, OPMVV, opcodestr>, + Sched<[WriteVMIotV, ReadVMIotV, ReadVMask]>; +} + +multiclass VSHT_IV_V_X_I<string opcodestr, bits<6> funct6, Operand optype = simm5, string vw = "v"> { + def V : VALUVV<funct6, OPIVV, opcodestr # "." # vw # "v">, + Sched<[WriteVShiftV, ReadVShiftV, ReadVShiftV, ReadVMask]>; + def X : VALUVX<funct6, OPIVX, opcodestr # "." # vw # "x">, + Sched<[WriteVShiftX, ReadVShiftV, ReadVShiftX, ReadVMask]>; + def I : VALUVI<funct6, opcodestr # "." # vw # "i", optype>, + Sched<[WriteVShiftI, ReadVShiftV, ReadVMask]>; +} + +multiclass VNSHT_IV_V_X_I<string opcodestr, bits<6> funct6, Operand optype = simm5, string vw = "v"> { + def V : VALUVV<funct6, OPIVV, opcodestr # "." # vw # "v">, + Sched<[WriteVNShiftV, ReadVNShiftV, ReadVNShiftV, ReadVMask]>; + def X : VALUVX<funct6, OPIVX, opcodestr # "." # vw # "x">, + Sched<[WriteVNShiftX, ReadVNShiftV, ReadVNShiftX, ReadVMask]>; + def I : VALUVI<funct6, opcodestr # "." # vw # "i", optype>, + Sched<[WriteVNShiftI, ReadVNShiftV, ReadVMask]>; +} + +multiclass VCMP_IV_V_X_I<string opcodestr, bits<6> funct6, Operand optype = simm5, string vw = "v"> { + def V : VALUVV<funct6, OPIVV, opcodestr # "." # vw # "v">, + Sched<[WriteVICmpV, ReadVICmpV, ReadVICmpV, ReadVMask]>; + def X : VALUVX<funct6, OPIVX, opcodestr # "." # vw # "x">, + Sched<[WriteVICmpX, ReadVICmpV, ReadVICmpX, ReadVMask]>; + def I : VALUVI<funct6, opcodestr # "." # vw # "i", optype>, + Sched<[WriteVICmpI, ReadVICmpV, ReadVMask]>; +} + +multiclass VCMP_IV_X_I<string opcodestr, bits<6> funct6, Operand optype = simm5, string vw = "v"> { + def X : VALUVX<funct6, OPIVX, opcodestr # "." # vw # "x">, + Sched<[WriteVICmpV, ReadVICmpV, ReadVICmpX, ReadVMask]>; + def I : VALUVI<funct6, opcodestr # "." # vw # "i", optype>, + Sched<[WriteVICmpI, ReadVICmpV, ReadVMask]>; +} + +multiclass VCMP_IV_V_X<string opcodestr, bits<6> funct6, Operand optype = simm5, string vw = "v"> { + def V : VALUVV<funct6, OPIVV, opcodestr # "." # vw # "v">, + Sched<[WriteVICmpV, ReadVICmpV, ReadVICmpV, ReadVMask]>; + def X : VALUVX<funct6, OPIVX, opcodestr # "." # vw # "x">, + Sched<[WriteVICmpX, ReadVICmpV, ReadVICmpX, ReadVMask]>; +} + +multiclass VMUL_MV_V_X<string opcodestr, bits<6> funct6, string vw = "v"> { + def V : VALUVV<funct6, OPMVV, opcodestr # "." # vw # "v">, + Sched<[WriteVIMulV, ReadVIMulV, ReadVIMulV, ReadVMask]>; + def X : VALUVX<funct6, OPMVX, opcodestr # "." # vw # "x">, + Sched<[WriteVIMulX, ReadVIMulV, ReadVIMulX, ReadVMask]>; +} + +multiclass VWMUL_MV_V_X<string opcodestr, bits<6> funct6, string vw = "v"> { + def V : VALUVV<funct6, OPMVV, opcodestr # "." # vw # "v">, + Sched<[WriteVIWMulV, ReadVIWMulV, ReadVIWMulV, ReadVMask]>; + def X : VALUVX<funct6, OPMVX, opcodestr # "." # vw # "x">, + Sched<[WriteVIWMulX, ReadVIWMulV, ReadVIWMulX, ReadVMask]>; +} + +multiclass VDIV_MV_V_X<string opcodestr, bits<6> funct6, string vw = "v"> { + def V : VALUVV<funct6, OPMVV, opcodestr # "." # vw # "v">, + Sched<[WriteVIDivV, ReadVIDivV, ReadVIDivV, ReadVMask]>; + def X : VALUVX<funct6, OPMVX, opcodestr # "." # vw # "x">, + Sched<[WriteVIDivX, ReadVIDivV, ReadVIDivX, ReadVMask]>; +} + +multiclass VSALU_IV_V_X_I<string opcodestr, bits<6> funct6, Operand optype = simm5, string vw = "v"> { + def V : VALUVV<funct6, OPIVV, opcodestr # "." # vw # "v">, + Sched<[WriteVSALUV, ReadVSALUV, ReadVSALUV, ReadVMask]>; + def X : VALUVX<funct6, OPIVX, opcodestr # "." # vw # "x">, + Sched<[WriteVSALUX, ReadVSALUV, ReadVSALUX, ReadVMask]>; + def I : VALUVI<funct6, opcodestr # "." # vw # "i", optype>, + Sched<[WriteVSALUI, ReadVSALUV, ReadVMask]>; +} + +multiclass VSALU_IV_V_X<string opcodestr, bits<6> funct6, string vw = "v"> { + def V : VALUVV<funct6, OPIVV, opcodestr # "." # vw # "v">, + Sched<[WriteVSALUV, ReadVSALUV, ReadVSALUV, ReadVMask]>; + def X : VALUVX<funct6, OPIVX, opcodestr # "." # vw # "x">, + Sched<[WriteVSALUX, ReadVSALUV, ReadVSALUX, ReadVMask]>; +} + +multiclass VAALU_MV_V_X<string opcodestr, bits<6> funct6, string vw = "v"> { + def V : VALUVV<funct6, OPMVV, opcodestr # "." # vw # "v">, + Sched<[WriteVAALUV, ReadVAALUV, ReadVAALUV, ReadVMask]>; + def X : VALUVX<funct6, OPMVX, opcodestr # "." # vw # "x">, + Sched<[WriteVAALUX, ReadVAALUV, ReadVAALUX, ReadVMask]>; +} + +multiclass VSMUL_IV_V_X<string opcodestr, bits<6> funct6, string vw = "v"> { + def V : VALUVV<funct6, OPIVV, opcodestr # "." # vw # "v">, + Sched<[WriteVSMulV, ReadVSMulV, ReadVSMulV, ReadVMask]>; + def X : VALUVX<funct6, OPIVX, opcodestr # "." # vw # "x">, + Sched<[WriteVSMulX, ReadVSMulV, ReadVSMulX, ReadVMask]>; +} + +multiclass VSSHF_IV_V_X_I<string opcodestr, bits<6> funct6, Operand optype = simm5, string vw = "v"> { + def V : VALUVV<funct6, OPIVV, opcodestr # "." # vw # "v">, + Sched<[WriteVSShiftV, ReadVSShiftV, ReadVSShiftV, ReadVMask]>; + def X : VALUVX<funct6, OPIVX, opcodestr # "." # vw # "x">, + Sched<[WriteVSShiftX, ReadVSShiftV, ReadVSShiftX, ReadVMask]>; + def I : VALUVI<funct6, opcodestr # "." # vw # "i", optype>, + Sched<[WriteVSShiftI, ReadVSShiftV, ReadVMask]>; +} + +multiclass VNCLP_IV_V_X_I<string opcodestr, bits<6> funct6, Operand optype = simm5, string vw = "v"> { + def V : VALUVV<funct6, OPIVV, opcodestr # "." # vw # "v">, + Sched<[WriteVNClipV, ReadVNClipV, ReadVNClipV, ReadVMask]>; + def X : VALUVX<funct6, OPIVX, opcodestr # "." # vw # "x">, + Sched<[WriteVNClipX, ReadVNClipV, ReadVNClipX, ReadVMask]>; + def I : VALUVI<funct6, opcodestr # "." # vw # "i", optype>, + Sched<[WriteVNClipI, ReadVNClipV, ReadVMask]>; +} + +multiclass VSLD_IV_X_I<string opcodestr, bits<6> funct6, Operand optype = simm5, string vw = "v"> { + def X : VALUVX<funct6, OPIVX, opcodestr # "." # vw # "x">, + Sched<[WriteVISlideX, ReadVISlideV, ReadVISlideX, ReadVMask]>; + def I : VALUVI<funct6, opcodestr # "." # vw # "i", optype>, + Sched<[WriteVISlideI, ReadVISlideV, ReadVMask]>; +} + +multiclass VSLD1_MV_X<string opcodestr, bits<6> funct6, string vw = "v"> { + def X : VALUVX<funct6, OPMVX, opcodestr # "." # vw # "x">, + Sched<[WriteVISlide1X, ReadVISlideV, ReadVISlideX, ReadVMask]>; +} + +multiclass VSLD1_FV_F<string opcodestr, bits<6> funct6, string vw = "v"> { + def F : VALUVF<funct6, OPFVF, opcodestr # "." # vw # "f">, + Sched<[WriteVFSlide1F, ReadVFSlideV, ReadVFSlideF, ReadVMask]>; +} + +multiclass VGTR_IV_V_X_I<string opcodestr, bits<6> funct6, Operand optype = simm5, string vw = "v"> { + def V : VALUVV<funct6, OPIVV, opcodestr # "." # vw # "v">, + Sched<[WriteVGatherV, ReadVGatherV, ReadVGatherV, ReadVMask]>; + def X : VALUVX<funct6, OPIVX, opcodestr # "." # vw # "x">, + Sched<[WriteVGatherX, ReadVGatherV, ReadVGatherX, ReadVMask]>; + def I : VALUVI<funct6, opcodestr # "." # vw # "i", optype>, + Sched<[WriteVGatherI, ReadVGatherV, ReadVMask]>; +} + +multiclass VCPR_MV_Mask<string opcodestr, bits<6> funct6, string vm = "v"> { + def M : VALUVVNoVm<funct6, OPMVV, opcodestr # "." # vm # "m">, + Sched<[WriteVCompressV, ReadVCompressV, ReadVCompressV]>; } multiclass VAMO<RISCVAMOOP amoop, RISCVWidth width, string opcodestr> { @@ -435,11 +779,48 @@ multiclass VAMO<RISCVAMOOP amoop, RISCVWidth width, string opcodestr> { def _UNWD : VAMONoWd<amoop, width, opcodestr>; } -multiclass VWholeLoad<bits<3> nf, string opcodestr, RegisterClass VRC> { - def E8_V : VWholeLoad<nf, LSWidth8, opcodestr # "e8.v", VRC>; - def E16_V : VWholeLoad<nf, LSWidth16, opcodestr # "e16.v", VRC>; - def E32_V : VWholeLoad<nf, LSWidth32, opcodestr # "e32.v", VRC>; - def E64_V : VWholeLoad<nf, LSWidth64, opcodestr # "e64.v", VRC>; +multiclass VWholeLoad1<string opcodestr, RegisterClass VRC> { + def E8_V : VWholeLoad<0, LSWidth8, opcodestr # "e8.v", VRC>, + Sched<[WriteVLD1R8, ReadVLDX]>; + def E16_V : VWholeLoad<0, LSWidth16, opcodestr # "e16.v", VRC>, + Sched<[WriteVLD1R16, ReadVLDX]>; + def E32_V : VWholeLoad<0, LSWidth32, opcodestr # "e32.v", VRC>, + Sched<[WriteVLD1R32, ReadVLDX]>; + def E64_V : VWholeLoad<0, LSWidth64, opcodestr # "e64.v", VRC>, + Sched<[WriteVLD1R64, ReadVLDX]>; +} + +multiclass VWholeLoad2<string opcodestr, RegisterClass VRC> { + def E8_V : VWholeLoad<1, LSWidth8, opcodestr # "e8.v", VRC>, + Sched<[WriteVLD2R8, ReadVLDX]>; + def E16_V : VWholeLoad<1, LSWidth16, opcodestr # "e16.v", VRC>, + Sched<[WriteVLD2R16, ReadVLDX]>; + def E32_V : VWholeLoad<1, LSWidth32, opcodestr # "e32.v", VRC>, + Sched<[WriteVLD2R32, ReadVLDX]>; + def E64_V : VWholeLoad<1, LSWidth64, opcodestr # "e64.v", VRC>, + Sched<[WriteVLD2R64, ReadVLDX]>; +} + +multiclass VWholeLoad4<string opcodestr, RegisterClass VRC> { + def E8_V : VWholeLoad<3, LSWidth8, opcodestr # "e8.v", VRC>, + Sched<[WriteVLD4R8, ReadVLDX]>; + def E16_V : VWholeLoad<3, LSWidth16, opcodestr # "e16.v", VRC>, + Sched<[WriteVLD4R16, ReadVLDX]>; + def E32_V : VWholeLoad<3, LSWidth32, opcodestr # "e32.v", VRC>, + Sched<[WriteVLD4R32, ReadVLDX]>; + def E64_V : VWholeLoad<3, LSWidth64, opcodestr # "e64.v", VRC>, + Sched<[WriteVLD1R64, ReadVLDX]>; +} + +multiclass VWholeLoad8<string opcodestr, RegisterClass VRC> { + def E8_V : VWholeLoad<7, LSWidth8, opcodestr # "e8.v", VRC>, + Sched<[WriteVLD8R8, ReadVLDX]>; + def E16_V : VWholeLoad<7, LSWidth16, opcodestr # "e16.v", VRC>, + Sched<[WriteVLD8R16, ReadVLDX]>; + def E32_V : VWholeLoad<7, LSWidth32, opcodestr # "e32.v", VRC>, + Sched<[WriteVLD8R32, ReadVLDX]>; + def E64_V : VWholeLoad<7, LSWidth64, opcodestr # "e64.v", VRC>, + Sched<[WriteVLD8R64, ReadVLDX]>; } //===----------------------------------------------------------------------===// @@ -459,69 +840,94 @@ def VSETVL : RVInstSetVL<(outs GPR:$rd), (ins GPR:$rs1, GPR:$rs2), } // hasSideEffects = 1, mayLoad = 0, mayStore = 0 // Vector Unit-Stride Instructions -def VLE8_V : VUnitStrideLoad<LUMOPUnitStride, LSWidth8, "vle8.v">; -def VLE16_V : VUnitStrideLoad<LUMOPUnitStride, LSWidth16, "vle16.v">; -def VLE32_V : VUnitStrideLoad<LUMOPUnitStride, LSWidth32, "vle32.v">; -def VLE64_V : VUnitStrideLoad<LUMOPUnitStride, LSWidth64, "vle64.v">; - -def VLE8FF_V : VUnitStrideLoad<LUMOPUnitStrideFF, LSWidth8, "vle8ff.v">; -def VLE16FF_V : VUnitStrideLoad<LUMOPUnitStrideFF, LSWidth16, "vle16ff.v">; -def VLE32FF_V : VUnitStrideLoad<LUMOPUnitStrideFF, LSWidth32, "vle32ff.v">; -def VLE64FF_V : VUnitStrideLoad<LUMOPUnitStrideFF, LSWidth64, "vle64ff.v">; - -def VLE1_V : VUnitStrideLoadMask<"vle1.v">; -def VSE1_V : VUnitStrideStoreMask<"vse1.v">; - -def VSE8_V : VUnitStrideStore<SUMOPUnitStride, LSWidth8, "vse8.v">; -def VSE16_V : VUnitStrideStore<SUMOPUnitStride, LSWidth16, "vse16.v">; -def VSE32_V : VUnitStrideStore<SUMOPUnitStride, LSWidth32, "vse32.v">; -def VSE64_V : VUnitStrideStore<SUMOPUnitStride, LSWidth64, "vse64.v">; +def VLE8_V : VUnitStrideLoad<LUMOPUnitStride, LSWidth8, "vle8.v">, + VLESched<8>; +def VLE16_V : VUnitStrideLoad<LUMOPUnitStride, LSWidth16, "vle16.v">, + VLESched<16>; +def VLE32_V : VUnitStrideLoad<LUMOPUnitStride, LSWidth32, "vle32.v">, + VLESched<32>; +def VLE64_V : VUnitStrideLoad<LUMOPUnitStride, LSWidth64, "vle64.v">, + VLESched<64>; + +def VLE8FF_V : VUnitStrideLoad<LUMOPUnitStrideFF, LSWidth8, "vle8ff.v">, + VLFSched<8>; +def VLE16FF_V : VUnitStrideLoad<LUMOPUnitStrideFF, LSWidth16, "vle16ff.v">, + VLFSched<16>; +def VLE32FF_V : VUnitStrideLoad<LUMOPUnitStrideFF, LSWidth32, "vle32ff.v">, + VLFSched<32>; +def VLE64FF_V : VUnitStrideLoad<LUMOPUnitStrideFF, LSWidth64, "vle64ff.v">, + VLFSched<64>; + +def VLE1_V : VUnitStrideLoadMask<"vle1.v">, + Sched<[WriteVLDM, ReadVLDX]>; +def VSE1_V : VUnitStrideStoreMask<"vse1.v">, + Sched<[WriteVSTM, ReadVSTM, ReadVSTX]>; + +def VSE8_V : VUnitStrideStore<SUMOPUnitStride, LSWidth8, "vse8.v">, + VSESched<8>; +def VSE16_V : VUnitStrideStore<SUMOPUnitStride, LSWidth16, "vse16.v">, + VSESched<16>; +def VSE32_V : VUnitStrideStore<SUMOPUnitStride, LSWidth32, "vse32.v">, + VSESched<32>; +def VSE64_V : VUnitStrideStore<SUMOPUnitStride, LSWidth64, "vse64.v">, + VSESched<64>; // Vector Strided Instructions -def VLSE8_V : VStridedLoad<LSWidth8, "vlse8.v">; -def VLSE16_V : VStridedLoad<LSWidth16, "vlse16.v">; -def VLSE32_V : VStridedLoad<LSWidth32, "vlse32.v">; -def VLSE64_V : VStridedLoad<LSWidth64, "vlse64.v">; - -def VSSE8_V : VStridedStore<LSWidth8, "vsse8.v">; -def VSSE16_V : VStridedStore<LSWidth16, "vsse16.v">; -def VSSE32_V : VStridedStore<LSWidth32, "vsse32.v">; -def VSSE64_V : VStridedStore<LSWidth64, "vsse64.v">; +def VLSE8_V : VStridedLoad<LSWidth8, "vlse8.v">, + VLSSched<8>; +def VLSE16_V : VStridedLoad<LSWidth16, "vlse16.v">, + VLSSched<16>; +def VLSE32_V : VStridedLoad<LSWidth32, "vlse32.v">, + VLSSched<32>; +def VLSE64_V : VStridedLoad<LSWidth64, "vlse64.v">, + VLSSched<32>; + +def VSSE8_V : VStridedStore<LSWidth8, "vsse8.v">, + VSSSched<8>; +def VSSE16_V : VStridedStore<LSWidth16, "vsse16.v">, + VSSSched<16>; +def VSSE32_V : VStridedStore<LSWidth32, "vsse32.v">, + VSSSched<32>; +def VSSE64_V : VStridedStore<LSWidth64, "vsse64.v">, + VSSSched<64>; // Vector Indexed Instructions -def VLUXEI8_V : VIndexedLoad<MOPLDIndexedUnord, LSWidth8, "vluxei8.v">; -def VLUXEI16_V : VIndexedLoad<MOPLDIndexedUnord, LSWidth16, "vluxei16.v">; -def VLUXEI32_V : VIndexedLoad<MOPLDIndexedUnord, LSWidth32, "vluxei32.v">; -def VLUXEI64_V : VIndexedLoad<MOPLDIndexedUnord, LSWidth64, "vluxei64.v">; - -def VLOXEI8_V : VIndexedLoad<MOPLDIndexedOrder, LSWidth8, "vloxei8.v">; -def VLOXEI16_V : VIndexedLoad<MOPLDIndexedOrder, LSWidth16, "vloxei16.v">; -def VLOXEI32_V : VIndexedLoad<MOPLDIndexedOrder, LSWidth32, "vloxei32.v">; -def VLOXEI64_V : VIndexedLoad<MOPLDIndexedOrder, LSWidth64, "vloxei64.v">; - -def VSUXEI8_V : VIndexedStore<MOPSTIndexedUnord, LSWidth8, "vsuxei8.v">; -def VSUXEI16_V : VIndexedStore<MOPSTIndexedUnord, LSWidth16, "vsuxei16.v">; -def VSUXEI32_V : VIndexedStore<MOPSTIndexedUnord, LSWidth32, "vsuxei32.v">; -def VSUXEI64_V : VIndexedStore<MOPSTIndexedUnord, LSWidth64, "vsuxei64.v">; - -def VSOXEI8_V : VIndexedStore<MOPSTIndexedOrder, LSWidth8, "vsoxei8.v">; -def VSOXEI16_V : VIndexedStore<MOPSTIndexedOrder, LSWidth16, "vsoxei16.v">; -def VSOXEI32_V : VIndexedStore<MOPSTIndexedOrder, LSWidth32, "vsoxei32.v">; -def VSOXEI64_V : VIndexedStore<MOPSTIndexedOrder, LSWidth64, "vsoxei64.v">; - -defm VL1R : VWholeLoad<0, "vl1r", VR>; -defm VL2R : VWholeLoad<1, "vl2r", VRM2>; -defm VL4R : VWholeLoad<3, "vl4r", VRM4>; -defm VL8R : VWholeLoad<7, "vl8r", VRM8>; +foreach n = [8, 16, 32, 64] in { +defvar w = !cast<RISCVWidth>("LSWidth" # n); + +def VLUXEI # n # _V : + VIndexedLoad<MOPLDIndexedUnord, w, "vluxei" # n # ".v">, + VLXSched<n, "U">; +def VLOXEI # n # _V : + VIndexedLoad<MOPLDIndexedOrder, w, "vloxei" # n # ".v">, + VLXSched<n, "O">; + +def VSUXEI # n # _V : + VIndexedStore<MOPSTIndexedUnord, w, "vsuxei" # n # ".v">, + VSXSched<n, "U">; +def VSOXEI # n # _V : + VIndexedStore<MOPSTIndexedOrder, w, "vsoxei" # n # ".v">, + VSXSched<n, "O">; +} + +defm VL1R : VWholeLoad1<"vl1r", VR>; +defm VL2R : VWholeLoad2<"vl2r", VRM2>; +defm VL4R : VWholeLoad4<"vl4r", VRM4>; +defm VL8R : VWholeLoad8<"vl8r", VRM8>; + def : InstAlias<"vl1r.v $vd, (${rs1})", (VL1RE8_V VR:$vd, GPR:$rs1)>; def : InstAlias<"vl2r.v $vd, (${rs1})", (VL2RE8_V VRM2:$vd, GPR:$rs1)>; def : InstAlias<"vl4r.v $vd, (${rs1})", (VL4RE8_V VRM4:$vd, GPR:$rs1)>; def : InstAlias<"vl8r.v $vd, (${rs1})", (VL8RE8_V VRM8:$vd, GPR:$rs1)>; -def VS1R_V : VWholeStore<0, "vs1r.v", VR>; -def VS2R_V : VWholeStore<1, "vs2r.v", VRM2>; -def VS4R_V : VWholeStore<3, "vs4r.v", VRM4>; -def VS8R_V : VWholeStore<7, "vs8r.v", VRM8>; +def VS1R_V : VWholeStore<0, "vs1r.v", VR>, + Sched<[WriteVST1R, ReadVST1R, ReadVSTX]>; +def VS2R_V : VWholeStore<1, "vs2r.v", VRM2>, + Sched<[WriteVST2R, ReadVST2R, ReadVSTX]>; +def VS4R_V : VWholeStore<3, "vs4r.v", VRM4>, + Sched<[WriteVST4R, ReadVST4R, ReadVSTX]>; +def VS8R_V : VWholeStore<7, "vs8r.v", VRM8>, + Sched<[WriteVST8R, ReadVST8R, ReadVSTX]>; // Vector Single-Width Integer Add and Subtract defm VADD_V : VALU_IV_V_X_I<"vadd", 0b000000>; @@ -588,9 +994,9 @@ def : InstAlias<"vnot.v $vd, $vs$vm", (VXOR_VI VR:$vd, VR:$vs, -1, VMaskOp:$vm)>; // Vector Single-Width Bit Shift Instructions -defm VSLL_V : VALU_IV_V_X_I<"vsll", 0b100101, uimm5>; -defm VSRL_V : VALU_IV_V_X_I<"vsrl", 0b101000, uimm5>; -defm VSRA_V : VALU_IV_V_X_I<"vsra", 0b101001, uimm5>; +defm VSLL_V : VSHT_IV_V_X_I<"vsll", 0b100101, uimm5>; +defm VSRL_V : VSHT_IV_V_X_I<"vsrl", 0b101000, uimm5>; +defm VSRA_V : VSHT_IV_V_X_I<"vsra", 0b101001, uimm5>; // Vector Narrowing Integer Right Shift Instructions // Refer to 11.3. Narrowing Vector Arithmetic Instructions @@ -598,8 +1004,8 @@ defm VSRA_V : VALU_IV_V_X_I<"vsra", 0b101001, uimm5>; // vector register group (specified by vs2). The destination vector register // group cannot overlap the mask register if used, unless LMUL=1. let Constraints = "@earlyclobber $vd" in { -defm VNSRL_W : VALU_IV_V_X_I<"vnsrl", 0b101100, uimm5, "w">; -defm VNSRA_W : VALU_IV_V_X_I<"vnsra", 0b101101, uimm5, "w">; +defm VNSRL_W : VNSHT_IV_V_X_I<"vnsrl", 0b101100, uimm5, "w">; +defm VNSRA_W : VNSHT_IV_V_X_I<"vnsra", 0b101101, uimm5, "w">; } // Constraints = "@earlyclobber $vd" def : InstAlias<"vncvt.x.x.w $vd, $vs$vm", @@ -607,14 +1013,14 @@ def : InstAlias<"vncvt.x.x.w $vd, $vs$vm", // Vector Integer Comparison Instructions let RVVConstraint = NoConstraint in { -defm VMSEQ_V : VALU_IV_V_X_I<"vmseq", 0b011000>; -defm VMSNE_V : VALU_IV_V_X_I<"vmsne", 0b011001>; -defm VMSLTU_V : VALU_IV_V_X<"vmsltu", 0b011010>; -defm VMSLT_V : VALU_IV_V_X<"vmslt", 0b011011>; -defm VMSLEU_V : VALU_IV_V_X_I<"vmsleu", 0b011100>; -defm VMSLE_V : VALU_IV_V_X_I<"vmsle", 0b011101>; -defm VMSGTU_V : VALU_IV_X_I<"vmsgtu", 0b011110>; -defm VMSGT_V : VALU_IV_X_I<"vmsgt", 0b011111>; +defm VMSEQ_V : VCMP_IV_V_X_I<"vmseq", 0b011000>; +defm VMSNE_V : VCMP_IV_V_X_I<"vmsne", 0b011001>; +defm VMSLTU_V : VCMP_IV_V_X<"vmsltu", 0b011010>; +defm VMSLT_V : VCMP_IV_V_X<"vmslt", 0b011011>; +defm VMSLEU_V : VCMP_IV_V_X_I<"vmsleu", 0b011100>; +defm VMSLE_V : VCMP_IV_V_X_I<"vmsle", 0b011101>; +defm VMSGTU_V : VCMP_IV_X_I<"vmsgtu", 0b011110>; +defm VMSGT_V : VCMP_IV_X_I<"vmsgt", 0b011111>; } // RVVConstraint = NoConstraint def : InstAlias<"vmsgtu.vv $vd, $va, $vb$vm", @@ -672,84 +1078,87 @@ def PseudoVMSGE_VX_M_T : Pseudo<(outs VR:$vd, VRNoV0:$scratch), } // Vector Integer Min/Max Instructions -defm VMINU_V : VALU_IV_V_X<"vminu", 0b000100>; -defm VMIN_V : VALU_IV_V_X<"vmin", 0b000101>; -defm VMAXU_V : VALU_IV_V_X<"vmaxu", 0b000110>; -defm VMAX_V : VALU_IV_V_X<"vmax", 0b000111>; +defm VMINU_V : VCMP_IV_V_X<"vminu", 0b000100>; +defm VMIN_V : VCMP_IV_V_X<"vmin", 0b000101>; +defm VMAXU_V : VCMP_IV_V_X<"vmaxu", 0b000110>; +defm VMAX_V : VCMP_IV_V_X<"vmax", 0b000111>; // Vector Single-Width Integer Multiply Instructions -defm VMUL_V : VALU_MV_V_X<"vmul", 0b100101>; -defm VMULH_V : VALU_MV_V_X<"vmulh", 0b100111>; -defm VMULHU_V : VALU_MV_V_X<"vmulhu", 0b100100>; -defm VMULHSU_V : VALU_MV_V_X<"vmulhsu", 0b100110>; +defm VMUL_V : VMUL_MV_V_X<"vmul", 0b100101>; +defm VMULH_V : VMUL_MV_V_X<"vmulh", 0b100111>; +defm VMULHU_V : VMUL_MV_V_X<"vmulhu", 0b100100>; +defm VMULHSU_V : VMUL_MV_V_X<"vmulhsu", 0b100110>; // Vector Integer Divide Instructions -defm VDIVU_V : VALU_MV_V_X<"vdivu", 0b100000>; -defm VDIV_V : VALU_MV_V_X<"vdiv", 0b100001>; -defm VREMU_V : VALU_MV_V_X<"vremu", 0b100010>; -defm VREM_V : VALU_MV_V_X<"vrem", 0b100011>; +defm VDIVU_V : VDIV_MV_V_X<"vdivu", 0b100000>; +defm VDIV_V : VDIV_MV_V_X<"vdiv", 0b100001>; +defm VREMU_V : VDIV_MV_V_X<"vremu", 0b100010>; +defm VREM_V : VDIV_MV_V_X<"vrem", 0b100011>; // Vector Widening Integer Multiply Instructions let Constraints = "@earlyclobber $vd", RVVConstraint = WidenV in { -defm VWMUL_V : VALU_MV_V_X<"vwmul", 0b111011>; -defm VWMULU_V : VALU_MV_V_X<"vwmulu", 0b111000>; -defm VWMULSU_V : VALU_MV_V_X<"vwmulsu", 0b111010>; +defm VWMUL_V : VWMUL_MV_V_X<"vwmul", 0b111011>; +defm VWMULU_V : VWMUL_MV_V_X<"vwmulu", 0b111000>; +defm VWMULSU_V : VWMUL_MV_V_X<"vwmulsu", 0b111010>; } // Constraints = "@earlyclobber $vd", RVVConstraint = WidenV // Vector Single-Width Integer Multiply-Add Instructions -defm VMACC_V : VALUr_MV_V_X<"vmacc", 0b101101>; -defm VNMSAC_V : VALUr_MV_V_X<"vnmsac", 0b101111>; -defm VMADD_V : VALUr_MV_V_X<"vmadd", 0b101001>; -defm VNMSUB_V : VALUr_MV_V_X<"vnmsub", 0b101011>; +defm VMACC_V : VMAC_MV_V_X<"vmacc", 0b101101>; +defm VNMSAC_V : VMAC_MV_V_X<"vnmsac", 0b101111>; +defm VMADD_V : VMAC_MV_V_X<"vmadd", 0b101001>; +defm VNMSUB_V : VMAC_MV_V_X<"vnmsub", 0b101011>; // Vector Widening Integer Multiply-Add Instructions let Constraints = "@earlyclobber $vd", RVVConstraint = WidenV in { -defm VWMACCU_V : VALUr_MV_V_X<"vwmaccu", 0b111100>; -defm VWMACC_V : VALUr_MV_V_X<"vwmacc", 0b111101>; -defm VWMACCSU_V : VALUr_MV_V_X<"vwmaccsu", 0b111111>; -defm VWMACCUS_V : VALUr_MV_X<"vwmaccus", 0b111110>; +defm VWMACCU_V : VWMAC_MV_V_X<"vwmaccu", 0b111100>; +defm VWMACC_V : VWMAC_MV_V_X<"vwmacc", 0b111101>; +defm VWMACCSU_V : VWMAC_MV_V_X<"vwmaccsu", 0b111111>; +defm VWMACCUS_V : VWMAC_MV_X<"vwmaccus", 0b111110>; } // Constraints = "@earlyclobber $vd", RVVConstraint = WidenV // Vector Integer Merge Instructions -defm VMERGE_V : VALUm_IV_V_X_I<"vmerge", 0b010111>; +defm VMERGE_V : VMRG_IV_V_X_I<"vmerge", 0b010111>; // Vector Integer Move Instructions let hasSideEffects = 0, mayLoad = 0, mayStore = 0, vs2 = 0, vm = 1, RVVConstraint = NoConstraint in { // op vd, vs1 def VMV_V_V : RVInstVV<0b010111, OPIVV, (outs VR:$vd), - (ins VR:$vs1), "vmv.v.v", "$vd, $vs1">; + (ins VR:$vs1), "vmv.v.v", "$vd, $vs1">, + Sched<[WriteVIMovV, ReadVIMovV]>; // op vd, rs1 def VMV_V_X : RVInstVX<0b010111, OPIVX, (outs VR:$vd), - (ins GPR:$rs1), "vmv.v.x", "$vd, $rs1">; + (ins GPR:$rs1), "vmv.v.x", "$vd, $rs1">, + Sched<[WriteVIMovX, ReadVIMovX]>; // op vd, imm def VMV_V_I : RVInstIVI<0b010111, (outs VR:$vd), - (ins simm5:$imm), "vmv.v.i", "$vd, $imm">; + (ins simm5:$imm), "vmv.v.i", "$vd, $imm">, + Sched<[WriteVIMovI]>; } // hasSideEffects = 0, mayLoad = 0, mayStore = 0 // Vector Fixed-Point Arithmetic Instructions -defm VSADDU_V : VALU_IV_V_X_I<"vsaddu", 0b100000>; -defm VSADD_V : VALU_IV_V_X_I<"vsadd", 0b100001>; -defm VSSUBU_V : VALU_IV_V_X<"vssubu", 0b100010>; -defm VSSUB_V : VALU_IV_V_X<"vssub", 0b100011>; +defm VSADDU_V : VSALU_IV_V_X_I<"vsaddu", 0b100000>; +defm VSADD_V : VSALU_IV_V_X_I<"vsadd", 0b100001>; +defm VSSUBU_V : VSALU_IV_V_X<"vssubu", 0b100010>; +defm VSSUB_V : VSALU_IV_V_X<"vssub", 0b100011>; // Vector Single-Width Averaging Add and Subtract -defm VAADDU_V : VALU_MV_V_X<"vaaddu", 0b001000>; -defm VAADD_V : VALU_MV_V_X<"vaadd", 0b001001>; -defm VASUBU_V : VALU_MV_V_X<"vasubu", 0b001010>; -defm VASUB_V : VALU_MV_V_X<"vasub", 0b001011>; +defm VAADDU_V : VAALU_MV_V_X<"vaaddu", 0b001000>; +defm VAADD_V : VAALU_MV_V_X<"vaadd", 0b001001>; +defm VASUBU_V : VAALU_MV_V_X<"vasubu", 0b001010>; +defm VASUB_V : VAALU_MV_V_X<"vasub", 0b001011>; // Vector Single-Width Fractional Multiply with Rounding and Saturation -defm VSMUL_V : VALU_IV_V_X<"vsmul", 0b100111>; +defm VSMUL_V : VSMUL_IV_V_X<"vsmul", 0b100111>; // Vector Single-Width Scaling Shift Instructions -defm VSSRL_V : VALU_IV_V_X_I<"vssrl", 0b101010, uimm5>; -defm VSSRA_V : VALU_IV_V_X_I<"vssra", 0b101011, uimm5>; +defm VSSRL_V : VSSHF_IV_V_X_I<"vssrl", 0b101010, uimm5>; +defm VSSRA_V : VSSHF_IV_V_X_I<"vssra", 0b101011, uimm5>; // Vector Narrowing Fixed-Point Clip Instructions let Constraints = "@earlyclobber $vd" in { -defm VNCLIPU_W : VALU_IV_V_X_I<"vnclipu", 0b101110, uimm5, "w">; -defm VNCLIP_W : VALU_IV_V_X_I<"vnclip", 0b101111, uimm5, "w">; +defm VNCLIPU_W : VNCLP_IV_V_X_I<"vnclipu", 0b101110, uimm5, "w">; +defm VNCLIP_W : VNCLP_IV_V_X_I<"vnclip", 0b101111, uimm5, "w">; } // Constraints = "@earlyclobber $vd" } // Predicates = [HasStdExtV] @@ -762,60 +1171,60 @@ defm VFRSUB_V : VALU_FV_F<"vfrsub", 0b100111>; // Vector Widening Floating-Point Add/Subtract Instructions let Constraints = "@earlyclobber $vd" in { let RVVConstraint = WidenV in { -defm VFWADD_V : VALU_FV_V_F<"vfwadd", 0b110000>; -defm VFWSUB_V : VALU_FV_V_F<"vfwsub", 0b110010>; +defm VFWADD_V : VWALU_FV_V_F<"vfwadd", 0b110000>; +defm VFWSUB_V : VWALU_FV_V_F<"vfwsub", 0b110010>; } // RVVConstraint = WidenV // Set earlyclobber for following instructions for second and mask operands. // This has the downside that the earlyclobber constraint is too coarse and // will impose unnecessary restrictions by not allowing the destination to // overlap with the first (wide) operand. let RVVConstraint = WidenW in { -defm VFWADD_W : VALU_FV_V_F<"vfwadd", 0b110100, "w">; -defm VFWSUB_W : VALU_FV_V_F<"vfwsub", 0b110110, "w">; +defm VFWADD_W : VWALU_FV_V_F<"vfwadd", 0b110100, "w">; +defm VFWSUB_W : VWALU_FV_V_F<"vfwsub", 0b110110, "w">; } // RVVConstraint = WidenW } // Constraints = "@earlyclobber $vd" // Vector Single-Width Floating-Point Multiply/Divide Instructions -defm VFMUL_V : VALU_FV_V_F<"vfmul", 0b100100>; -defm VFDIV_V : VALU_FV_V_F<"vfdiv", 0b100000>; -defm VFRDIV_V : VALU_FV_F<"vfrdiv", 0b100001>; +defm VFMUL_V : VMUL_FV_V_F<"vfmul", 0b100100>; +defm VFDIV_V : VDIV_FV_V_F<"vfdiv", 0b100000>; +defm VFRDIV_V : VRDIV_FV_F<"vfrdiv", 0b100001>; // Vector Widening Floating-Point Multiply let Constraints = "@earlyclobber $vd", RVVConstraint = WidenV in { -defm VFWMUL_V : VALU_FV_V_F<"vfwmul", 0b111000>; +defm VFWMUL_V : VWMUL_FV_V_F<"vfwmul", 0b111000>; } // Constraints = "@earlyclobber $vd", RVVConstraint = WidenV // Vector Single-Width Floating-Point Fused Multiply-Add Instructions -defm VFMACC_V : VALUr_FV_V_F<"vfmacc", 0b101100>; -defm VFNMACC_V : VALUr_FV_V_F<"vfnmacc", 0b101101>; -defm VFMSAC_V : VALUr_FV_V_F<"vfmsac", 0b101110>; -defm VFNMSAC_V : VALUr_FV_V_F<"vfnmsac", 0b101111>; -defm VFMADD_V : VALUr_FV_V_F<"vfmadd", 0b101000>; -defm VFNMADD_V : VALUr_FV_V_F<"vfnmadd", 0b101001>; -defm VFMSUB_V : VALUr_FV_V_F<"vfmsub", 0b101010>; -defm VFNMSUB_V : VALUr_FV_V_F<"vfnmsub", 0b101011>; +defm VFMACC_V : VMAC_FV_V_F<"vfmacc", 0b101100>; +defm VFNMACC_V : VMAC_FV_V_F<"vfnmacc", 0b101101>; +defm VFMSAC_V : VMAC_FV_V_F<"vfmsac", 0b101110>; +defm VFNMSAC_V : VMAC_FV_V_F<"vfnmsac", 0b101111>; +defm VFMADD_V : VMAC_FV_V_F<"vfmadd", 0b101000>; +defm VFNMADD_V : VMAC_FV_V_F<"vfnmadd", 0b101001>; +defm VFMSUB_V : VMAC_FV_V_F<"vfmsub", 0b101010>; +defm VFNMSUB_V : VMAC_FV_V_F<"vfnmsub", 0b101011>; // Vector Widening Floating-Point Fused Multiply-Add Instructions let Constraints = "@earlyclobber $vd", RVVConstraint = WidenV in { -defm VFWMACC_V : VALUr_FV_V_F<"vfwmacc", 0b111100>; -defm VFWNMACC_V : VALUr_FV_V_F<"vfwnmacc", 0b111101>; -defm VFWMSAC_V : VALUr_FV_V_F<"vfwmsac", 0b111110>; -defm VFWNMSAC_V : VALUr_FV_V_F<"vfwnmsac", 0b111111>; +defm VFWMACC_V : VWMAC_FV_V_F<"vfwmacc", 0b111100>; +defm VFWNMACC_V : VWMAC_FV_V_F<"vfwnmacc", 0b111101>; +defm VFWMSAC_V : VWMAC_FV_V_F<"vfwmsac", 0b111110>; +defm VFWNMSAC_V : VWMAC_FV_V_F<"vfwnmsac", 0b111111>; } // Constraints = "@earlyclobber $vd", RVVConstraint = WidenV // Vector Floating-Point Square-Root Instruction -defm VFSQRT_V : VALU_FV_VS2<"vfsqrt.v", 0b010011, 0b00000>; -defm VFRSQRT7_V : VALU_FV_VS2<"vfrsqrt7.v", 0b010011, 0b00100>; -defm VFREC7_V : VALU_FV_VS2<"vfrec7.v", 0b010011, 0b00101>; +defm VFSQRT_V : VSQR_FV_VS2<"vfsqrt.v", 0b010011, 0b00000>; +defm VFRSQRT7_V : VRCP_FV_VS2<"vfrsqrt7.v", 0b010011, 0b00100>; +defm VFREC7_V : VRCP_FV_VS2<"vfrec7.v", 0b010011, 0b00101>; // Vector Floating-Point MIN/MAX Instructions -defm VFMIN_V : VALU_FV_V_F<"vfmin", 0b000100>; -defm VFMAX_V : VALU_FV_V_F<"vfmax", 0b000110>; +defm VFMIN_V : VCMP_FV_V_F<"vfmin", 0b000100>; +defm VFMAX_V : VCMP_FV_V_F<"vfmax", 0b000110>; // Vector Floating-Point Sign-Injection Instructions -defm VFSGNJ_V : VALU_FV_V_F<"vfsgnj", 0b001000>; -defm VFSGNJN_V : VALU_FV_V_F<"vfsgnjn", 0b001001>; -defm VFSGNJX_V : VALU_FV_V_F<"vfsgnjx", 0b001010>; +defm VFSGNJ_V : VSGNJ_FV_V_F<"vfsgnj", 0b001000>; +defm VFSGNJN_V : VSGNJ_FV_V_F<"vfsgnjn", 0b001001>; +defm VFSGNJX_V : VSGNJ_FV_V_F<"vfsgnjx", 0b001010>; def : InstAlias<"vfneg.v $vd, $vs$vm", (VFSGNJN_VV VR:$vd, VR:$vs, VR:$vs, VMaskOp:$vm)>; @@ -824,12 +1233,12 @@ def : InstAlias<"vfabs.v $vd, $vs$vm", // Vector Floating-Point Compare Instructions let RVVConstraint = NoConstraint in { -defm VMFEQ_V : VALU_FV_V_F<"vmfeq", 0b011000>; -defm VMFNE_V : VALU_FV_V_F<"vmfne", 0b011100>; -defm VMFLT_V : VALU_FV_V_F<"vmflt", 0b011011>; -defm VMFLE_V : VALU_FV_V_F<"vmfle", 0b011001>; -defm VMFGT_V : VALU_FV_F<"vmfgt", 0b011101>; -defm VMFGE_V : VALU_FV_F<"vmfge", 0b011111>; +defm VMFEQ_V : VCMP_FV_V_F<"vmfeq", 0b011000>; +defm VMFNE_V : VCMP_FV_V_F<"vmfne", 0b011100>; +defm VMFLT_V : VCMP_FV_V_F<"vmflt", 0b011011>; +defm VMFLE_V : VCMP_FV_V_F<"vmfle", 0b011001>; +defm VMFGT_V : VCMP_FV_F<"vmfgt", 0b011101>; +defm VMFGE_V : VCMP_FV_F<"vmfge", 0b011111>; } // RVVConstraint = NoConstraint def : InstAlias<"vmfgt.vv $vd, $va, $vb$vm", @@ -838,68 +1247,70 @@ def : InstAlias<"vmfge.vv $vd, $va, $vb$vm", (VMFLE_VV VR:$vd, VR:$vb, VR:$va, VMaskOp:$vm), 0>; // Vector Floating-Point Classify Instruction -defm VFCLASS_V : VALU_FV_VS2<"vfclass.v", 0b010011, 0b10000>; +defm VFCLASS_V : VCLS_FV_VS2<"vfclass.v", 0b010011, 0b10000>; let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in { + // Vector Floating-Point Merge Instruction +let vm = 0 in def VFMERGE_VFM : RVInstVX<0b010111, OPFVF, (outs VR:$vd), (ins VR:$vs2, FPR32:$rs1, VMV0:$v0), - "vfmerge.vfm", "$vd, $vs2, $rs1, v0"> { - let vm = 0; -} + "vfmerge.vfm", "$vd, $vs2, $rs1, v0">, + Sched<[WriteVFMergeV, ReadVFMergeV, ReadVFMergeF, ReadVMask]>; // Vector Floating-Point Move Instruction let RVVConstraint = NoConstraint in +let vm = 1, vs2 = 0 in def VFMV_V_F : RVInstVX<0b010111, OPFVF, (outs VR:$vd), - (ins FPR32:$rs1), "vfmv.v.f", "$vd, $rs1"> { - let vs2 = 0; - let vm = 1; -} + (ins FPR32:$rs1), "vfmv.v.f", "$vd, $rs1">, + Sched<[WriteVFMovV, ReadVFMovF]>; + } // hasSideEffects = 0, mayLoad = 0, mayStore = 0 // Single-Width Floating-Point/Integer Type-Convert Instructions -defm VFCVT_XU_F_V : VALU_FV_VS2<"vfcvt.xu.f.v", 0b010010, 0b00000>; -defm VFCVT_X_F_V : VALU_FV_VS2<"vfcvt.x.f.v", 0b010010, 0b00001>; -defm VFCVT_RTZ_XU_F_V : VALU_FV_VS2<"vfcvt.rtz.xu.f.v", 0b010010, 0b00110>; -defm VFCVT_RTZ_X_F_V : VALU_FV_VS2<"vfcvt.rtz.x.f.v", 0b010010, 0b00111>; -defm VFCVT_F_XU_V : VALU_FV_VS2<"vfcvt.f.xu.v", 0b010010, 0b00010>; -defm VFCVT_F_X_V : VALU_FV_VS2<"vfcvt.f.x.v", 0b010010, 0b00011>; +defm VFCVT_XU_F_V : VCVTI_FV_VS2<"vfcvt.xu.f.v", 0b010010, 0b00000>; +defm VFCVT_X_F_V : VCVTI_FV_VS2<"vfcvt.x.f.v", 0b010010, 0b00001>; +defm VFCVT_RTZ_XU_F_V : VCVTI_FV_VS2<"vfcvt.rtz.xu.f.v", 0b010010, 0b00110>; +defm VFCVT_RTZ_X_F_V : VCVTI_FV_VS2<"vfcvt.rtz.x.f.v", 0b010010, 0b00111>; +defm VFCVT_F_XU_V : VCVTF_IV_VS2<"vfcvt.f.xu.v", 0b010010, 0b00010>; +defm VFCVT_F_X_V : VCVTF_IV_VS2<"vfcvt.f.x.v", 0b010010, 0b00011>; // Widening Floating-Point/Integer Type-Convert Instructions let Constraints = "@earlyclobber $vd", RVVConstraint = WidenCvt in { -defm VFWCVT_XU_F_V : VALU_FV_VS2<"vfwcvt.xu.f.v", 0b010010, 0b01000>; -defm VFWCVT_X_F_V : VALU_FV_VS2<"vfwcvt.x.f.v", 0b010010, 0b01001>; -defm VFWCVT_RTZ_XU_F_V : VALU_FV_VS2<"vfwcvt.rtz.xu.f.v", 0b010010, 0b01110>; -defm VFWCVT_RTZ_X_F_V : VALU_FV_VS2<"vfwcvt.rtz.x.f.v", 0b010010, 0b01111>; -defm VFWCVT_F_XU_V : VALU_FV_VS2<"vfwcvt.f.xu.v", 0b010010, 0b01010>; -defm VFWCVT_F_X_V : VALU_FV_VS2<"vfwcvt.f.x.v", 0b010010, 0b01011>; -defm VFWCVT_F_F_V : VALU_FV_VS2<"vfwcvt.f.f.v", 0b010010, 0b01100>; +defm VFWCVT_XU_F_V : VWCVTI_FV_VS2<"vfwcvt.xu.f.v", 0b010010, 0b01000>; +defm VFWCVT_X_F_V : VWCVTI_FV_VS2<"vfwcvt.x.f.v", 0b010010, 0b01001>; +defm VFWCVT_RTZ_XU_F_V : VWCVTI_FV_VS2<"vfwcvt.rtz.xu.f.v", 0b010010, 0b01110>; +defm VFWCVT_RTZ_X_F_V : VWCVTI_FV_VS2<"vfwcvt.rtz.x.f.v", 0b010010, 0b01111>; +defm VFWCVT_F_XU_V : VWCVTF_IV_VS2<"vfwcvt.f.xu.v", 0b010010, 0b01010>; +defm VFWCVT_F_X_V : VWCVTF_IV_VS2<"vfwcvt.f.x.v", 0b010010, 0b01011>; +defm VFWCVT_F_F_V : VWCVTF_FV_VS2<"vfwcvt.f.f.v", 0b010010, 0b01100>; } // Constraints = "@earlyclobber $vd", RVVConstraint = WidenCvt // Narrowing Floating-Point/Integer Type-Convert Instructions let Constraints = "@earlyclobber $vd" in { -defm VFNCVT_XU_F_W : VALU_FV_VS2<"vfncvt.xu.f.w", 0b010010, 0b10000>; -defm VFNCVT_X_F_W : VALU_FV_VS2<"vfncvt.x.f.w", 0b010010, 0b10001>; -defm VFNCVT_RTZ_XU_F_W : VALU_FV_VS2<"vfncvt.rtz.xu.f.w", 0b010010, 0b10110>; -defm VFNCVT_RTZ_X_F_W : VALU_FV_VS2<"vfncvt.rtz.x.f.w", 0b010010, 0b10111>; -defm VFNCVT_F_XU_W : VALU_FV_VS2<"vfncvt.f.xu.w", 0b010010, 0b10010>; -defm VFNCVT_F_X_W : VALU_FV_VS2<"vfncvt.f.x.w", 0b010010, 0b10011>; -defm VFNCVT_F_F_W : VALU_FV_VS2<"vfncvt.f.f.w", 0b010010, 0b10100>; -defm VFNCVT_ROD_F_F_W : VALU_FV_VS2<"vfncvt.rod.f.f.w", 0b010010, 0b10101>; +defm VFNCVT_XU_F_W : VNCVTI_FV_VS2<"vfncvt.xu.f.w", 0b010010, 0b10000>; +defm VFNCVT_X_F_W : VNCVTI_FV_VS2<"vfncvt.x.f.w", 0b010010, 0b10001>; +defm VFNCVT_RTZ_XU_F_W : VNCVTI_FV_VS2<"vfncvt.rtz.xu.f.w", 0b010010, 0b10110>; +defm VFNCVT_RTZ_X_F_W : VNCVTI_FV_VS2<"vfncvt.rtz.x.f.w", 0b010010, 0b10111>; +defm VFNCVT_F_XU_W : VNCVTF_IV_VS2<"vfncvt.f.xu.w", 0b010010, 0b10010>; +defm VFNCVT_F_X_W : VNCVTF_IV_VS2<"vfncvt.f.x.w", 0b010010, 0b10011>; +defm VFNCVT_F_F_W : VNCVTF_FV_VS2<"vfncvt.f.f.w", 0b010010, 0b10100>; +defm VFNCVT_ROD_F_F_W : VNCVTF_FV_VS2<"vfncvt.rod.f.f.w", 0b010010, 0b10101>; } // Constraints = "@earlyclobber $vd" } // Predicates = [HasStdExtV, HasStdExtF] let Predicates = [HasStdExtV] in { + // Vector Single-Width Integer Reduction Instructions let RVVConstraint = NoConstraint in { -defm VREDSUM : VALU_MV_V<"vredsum", 0b000000>; -defm VREDMAXU : VALU_MV_V<"vredmaxu", 0b000110>; -defm VREDMAX : VALU_MV_V<"vredmax", 0b000111>; -defm VREDMINU : VALU_MV_V<"vredminu", 0b000100>; -defm VREDMIN : VALU_MV_V<"vredmin", 0b000101>; -defm VREDAND : VALU_MV_V<"vredand", 0b000001>; -defm VREDOR : VALU_MV_V<"vredor", 0b000010>; -defm VREDXOR : VALU_MV_V<"vredxor", 0b000011>; +defm VREDSUM : VRED_MV_V<"vredsum", 0b000000>; +defm VREDMAXU : VRED_MV_V<"vredmaxu", 0b000110>; +defm VREDMAX : VRED_MV_V<"vredmax", 0b000111>; +defm VREDMINU : VRED_MV_V<"vredminu", 0b000100>; +defm VREDMIN : VRED_MV_V<"vredmin", 0b000101>; +defm VREDAND : VRED_MV_V<"vredand", 0b000001>; +defm VREDOR : VRED_MV_V<"vredor", 0b000010>; +defm VREDXOR : VRED_MV_V<"vredxor", 0b000011>; } // RVVConstraint = NoConstraint // Vector Widening Integer Reduction Instructions @@ -908,18 +1319,19 @@ let Constraints = "@earlyclobber $vd", RVVConstraint = NoConstraint in { // This has the downside that the earlyclobber constraint is too coarse and // will impose unnecessary restrictions by not allowing the destination to // overlap with the first (wide) operand. -defm VWREDSUMU : VALU_IV_V<"vwredsumu", 0b110000>; -defm VWREDSUM : VALU_IV_V<"vwredsum", 0b110001>; +defm VWREDSUMU : VWRED_IV_V<"vwredsumu", 0b110000>; +defm VWREDSUM : VWRED_IV_V<"vwredsum", 0b110001>; } // Constraints = "@earlyclobber $vd", RVVConstraint = NoConstraint + } // Predicates = [HasStdExtV] let Predicates = [HasStdExtV, HasStdExtF] in { // Vector Single-Width Floating-Point Reduction Instructions let RVVConstraint = NoConstraint in { -defm VFREDOSUM : VALU_FV_V<"vfredosum", 0b000011>; -defm VFREDSUM : VALU_FV_V<"vfredsum", 0b000001>; -defm VFREDMAX : VALU_FV_V<"vfredmax", 0b000111>; -defm VFREDMIN : VALU_FV_V<"vfredmin", 0b000101>; +defm VFREDOSUM : VREDO_FV_V<"vfredosum", 0b000011>; +defm VFREDSUM : VRED_FV_V<"vfredsum", 0b000001>; +defm VFREDMAX : VRED_FV_V<"vfredmax", 0b000111>; +defm VFREDMIN : VRED_FV_V<"vfredmin", 0b000101>; } // RVVConstraint = NoConstraint // Vector Widening Floating-Point Reduction Instructions @@ -928,22 +1340,22 @@ let Constraints = "@earlyclobber $vd", RVVConstraint = NoConstraint in { // This has the downside that the earlyclobber constraint is too coarse and // will impose unnecessary restrictions by not allowing the destination to // overlap with the first (wide) operand. -defm VFWREDOSUM : VALU_FV_V<"vfwredosum", 0b110011>; -defm VFWREDSUM : VALU_FV_V<"vfwredsum", 0b110001>; +defm VFWREDOSUM : VWREDO_FV_V<"vfwredosum", 0b110011>; +defm VFWREDSUM : VWRED_FV_V<"vfwredsum", 0b110001>; } // Constraints = "@earlyclobber $vd", RVVConstraint = NoConstraint } // Predicates = [HasStdExtV, HasStdExtF] let Predicates = [HasStdExtV] in { // Vector Mask-Register Logical Instructions let RVVConstraint = NoConstraint in { -defm VMAND_M : VALU_MV_Mask<"vmand", 0b011001, "m">; -defm VMNAND_M : VALU_MV_Mask<"vmnand", 0b011101, "m">; -defm VMANDNOT_M : VALU_MV_Mask<"vmandnot", 0b011000, "m">; -defm VMXOR_M : VALU_MV_Mask<"vmxor", 0b011011, "m">; -defm VMOR_M : VALU_MV_Mask<"vmor", 0b011010, "m">; -defm VMNOR_M : VALU_MV_Mask<"vmnor", 0b011110, "m">; -defm VMORNOT_M : VALU_MV_Mask<"vmornot", 0b011100, "m">; -defm VMXNOR_M : VALU_MV_Mask<"vmxnor", 0b011111, "m">; +defm VMAND_M : VMALU_MV_Mask<"vmand", 0b011001, "m">; +defm VMNAND_M : VMALU_MV_Mask<"vmnand", 0b011101, "m">; +defm VMANDNOT_M : VMALU_MV_Mask<"vmandnot", 0b011000, "m">; +defm VMXOR_M : VMALU_MV_Mask<"vmxor", 0b011011, "m">; +defm VMOR_M : VMALU_MV_Mask<"vmor", 0b011010, "m">; +defm VMNOR_M : VMALU_MV_Mask<"vmnor", 0b011110, "m">; +defm VMORNOT_M : VMALU_MV_Mask<"vmornot", 0b011100, "m">; +defm VMXNOR_M : VMALU_MV_Mask<"vmxnor", 0b011111, "m">; } def : InstAlias<"vmmv.m $vd, $vs", @@ -957,98 +1369,113 @@ def : InstAlias<"vmnot.m $vd, $vs", let hasSideEffects = 0, mayLoad = 0, mayStore = 0, RVVConstraint = NoConstraint in { + // Vector mask population count vpopc def VPOPC_M : RVInstV<0b010000, 0b10000, OPMVV, (outs GPR:$vd), - (ins VR:$vs2, VMaskOp:$vm), - "vpopc.m", "$vd, $vs2$vm">; + (ins VR:$vs2, VMaskOp:$vm), + "vpopc.m", "$vd, $vs2$vm">, + Sched<[WriteVMPopV, ReadVMPopV, ReadVMask]>; // vfirst find-first-set mask bit def VFIRST_M : RVInstV<0b010000, 0b10001, OPMVV, (outs GPR:$vd), - (ins VR:$vs2, VMaskOp:$vm), - "vfirst.m", "$vd, $vs2$vm">; + (ins VR:$vs2, VMaskOp:$vm), + "vfirst.m", "$vd, $vs2$vm">, + Sched<[WriteVMFFSV, ReadVMFFSV, ReadVMask]>; + } // hasSideEffects = 0, mayLoad = 0, mayStore = 0 let Constraints = "@earlyclobber $vd", RVVConstraint = Iota in { + // vmsbf.m set-before-first mask bit -defm VMSBF_M : VALU_MV_VS2<"vmsbf.m", 0b010100, 0b00001>; +defm VMSBF_M : VMSFS_MV_V<"vmsbf.m", 0b010100, 0b00001>; // vmsif.m set-including-first mask bit -defm VMSIF_M : VALU_MV_VS2<"vmsif.m", 0b010100, 0b00011>; +defm VMSIF_M : VMSFS_MV_V<"vmsif.m", 0b010100, 0b00011>; // vmsof.m set-only-first mask bit -defm VMSOF_M : VALU_MV_VS2<"vmsof.m", 0b010100, 0b00010>; +defm VMSOF_M : VMSFS_MV_V<"vmsof.m", 0b010100, 0b00010>; // Vector Iota Instruction -defm VIOTA_M : VALU_MV_VS2<"viota.m", 0b010100, 0b10000>; +defm VIOTA_M : VMIOT_MV_V<"viota.m", 0b010100, 0b10000>; + } // Constraints = "@earlyclobber $vd", RVVConstraint = Iota // Vector Element Index Instruction let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in { + +let vs2 = 0 in def VID_V : RVInstV<0b010100, 0b10001, OPMVV, (outs VR:$vd), - (ins VMaskOp:$vm), "vid.v", "$vd$vm"> { - let vs2 = 0; -} + (ins VMaskOp:$vm), "vid.v", "$vd$vm">, + Sched<[WriteVMIdxV, ReadVMask]>; // Integer Scalar Move Instructions let vm = 1, RVVConstraint = NoConstraint in { def VMV_X_S : RVInstV<0b010000, 0b00000, OPMVV, (outs GPR:$vd), - (ins VR:$vs2), "vmv.x.s", "$vd, $vs2">; + (ins VR:$vs2), "vmv.x.s", "$vd, $vs2">, + Sched<[WriteVIMovVX, ReadVIMovVX]>; let Constraints = "$vd = $vd_wb" in def VMV_S_X : RVInstV2<0b010000, 0b00000, OPMVX, (outs VR:$vd_wb), - (ins VR:$vd, GPR:$rs1), "vmv.s.x", "$vd, $rs1">; - + (ins VR:$vd, GPR:$rs1), "vmv.s.x", "$vd, $rs1">, + Sched<[WriteVIMovXV, ReadVIMovXV, ReadVIMovXX]>; } + } // hasSideEffects = 0, mayLoad = 0, mayStore = 0 + } // Predicates = [HasStdExtV] let Predicates = [HasStdExtV, HasStdExtF] in { + let hasSideEffects = 0, mayLoad = 0, mayStore = 0, vm = 1, RVVConstraint = NoConstraint in { // Floating-Point Scalar Move Instructions def VFMV_F_S : RVInstV<0b010000, 0b00000, OPFVV, (outs FPR32:$vd), - (ins VR:$vs2), "vfmv.f.s", "$vd, $vs2">; + (ins VR:$vs2), "vfmv.f.s", "$vd, $vs2">, + Sched<[WriteVFMovVF, ReadVFMovVF]>; let Constraints = "$vd = $vd_wb" in def VFMV_S_F : RVInstV2<0b010000, 0b00000, OPFVF, (outs VR:$vd_wb), - (ins VR:$vd, FPR32:$rs1), "vfmv.s.f", "$vd, $rs1">; + (ins VR:$vd, FPR32:$rs1), "vfmv.s.f", "$vd, $rs1">, + Sched<[WriteVFMovFV, ReadVFMovFV, ReadVFMovFX]>; } // hasSideEffects = 0, mayLoad = 0, mayStore = 0, vm = 1 + } // Predicates = [HasStdExtV, HasStdExtF] let Predicates = [HasStdExtV] in { // Vector Slide Instructions let Constraints = "@earlyclobber $vd", RVVConstraint = SlideUp in { -defm VSLIDEUP_V : VALU_IV_X_I<"vslideup", 0b001110, uimm5>; -defm VSLIDE1UP_V : VALU_MV_X<"vslide1up", 0b001110>; +defm VSLIDEUP_V : VSLD_IV_X_I<"vslideup", 0b001110, uimm5>; +defm VSLIDE1UP_V : VSLD1_MV_X<"vslide1up", 0b001110>; } // Constraints = "@earlyclobber $vd", RVVConstraint = SlideUp -defm VSLIDEDOWN_V : VALU_IV_X_I<"vslidedown", 0b001111, uimm5>; -defm VSLIDE1DOWN_V : VALU_MV_X<"vslide1down", 0b001111>; +defm VSLIDEDOWN_V : VSLD_IV_X_I<"vslidedown", 0b001111, uimm5>; +defm VSLIDE1DOWN_V : VSLD1_MV_X<"vslide1down", 0b001111>; } // Predicates = [HasStdExtV] let Predicates = [HasStdExtV, HasStdExtF] in { let Constraints = "@earlyclobber $vd", RVVConstraint = SlideUp in { -defm VFSLIDE1UP_V : VALU_FV_F<"vfslide1up", 0b001110>; +defm VFSLIDE1UP_V : VSLD1_FV_F<"vfslide1up", 0b001110>; } // Constraints = "@earlyclobber $vd", RVVConstraint = SlideUp -defm VFSLIDE1DOWN_V : VALU_FV_F<"vfslide1down", 0b001111>; +defm VFSLIDE1DOWN_V : VSLD1_FV_F<"vfslide1down", 0b001111>; } // Predicates = [HasStdExtV, HasStdExtF] let Predicates = [HasStdExtV] in { // Vector Register Gather Instruction let Constraints = "@earlyclobber $vd", RVVConstraint = Vrgather in { -defm VRGATHER_V : VALU_IV_V_X_I<"vrgather", 0b001100, uimm5>; -def VRGATHEREI16_VV : VALUVV<0b001110, OPIVV, "vrgatherei16.vv">; +defm VRGATHER_V : VGTR_IV_V_X_I<"vrgather", 0b001100, uimm5>; +def VRGATHEREI16_VV : VALUVV<0b001110, OPIVV, "vrgatherei16.vv">, + Sched<[WriteVGatherV, ReadVGatherV, ReadVGatherV]>; } // Constraints = "@earlyclobber $vd", RVVConstraint = Vrgather // Vector Compress Instruction let Constraints = "@earlyclobber $vd", RVVConstraint = Vcompress in { -defm VCOMPRESS_V : VALU_MV_Mask<"vcompress", 0b010111>; +defm VCOMPRESS_V : VCPR_MV_Mask<"vcompress", 0b010111>; } // Constraints = "@earlyclobber $vd", RVVConstraint = Vcompress let hasSideEffects = 0, mayLoad = 0, mayStore = 0, RVVConstraint = NoConstraint in { -foreach nf = [1, 2, 4, 8] in { - def VMV#nf#R_V : RVInstV<0b100111, !add(nf, -1), OPIVI, (outs VR:$vd), - (ins VR:$vs2), "vmv" # nf # "r.v", - "$vd, $vs2"> { - let Uses = []; - let vm = 1; - } +foreach n = [1, 2, 4, 8] in { + def VMV#n#R_V : RVInstV<0b100111, !add(n, -1), OPIVI, (outs VR:$vd), + (ins VR:$vs2), "vmv" # n # "r.v", "$vd, $vs2">, + VMVRSched<n> { + let Uses = []; + let vm = 1; +} } } // hasSideEffects = 0, mayLoad = 0, mayStore = 0 } // Predicates = [HasStdExtV] diff --git a/llvm/lib/Target/RISCV/RISCVSchedRocket.td b/llvm/lib/Target/RISCV/RISCVSchedRocket.td index ed26a5026114..14f59152ed42 100644 --- a/llvm/lib/Target/RISCV/RISCVSchedRocket.td +++ b/llvm/lib/Target/RISCV/RISCVSchedRocket.td @@ -231,6 +231,9 @@ def : ReadAdvance<ReadFMovI64ToF64, 0>; def : ReadAdvance<ReadFClass32, 0>; def : ReadAdvance<ReadFClass64, 0>; +//===----------------------------------------------------------------------===// +// Unsupported extensions +defm : UnsupportedSchedV; defm : UnsupportedSchedZba; defm : UnsupportedSchedZbb; defm : UnsupportedSchedZfh; diff --git a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td index 314af180aca1..75ca6ca861be 100644 --- a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td +++ b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td @@ -219,6 +219,9 @@ def : ReadAdvance<ReadFMovI64ToF64, 0>; def : ReadAdvance<ReadFClass32, 0>; def : ReadAdvance<ReadFClass64, 0>; +//===----------------------------------------------------------------------===// +// Unsupported extensions +defm : UnsupportedSchedV; defm : UnsupportedSchedZba; defm : UnsupportedSchedZbb; defm : UnsupportedSchedZfh; diff --git a/llvm/lib/Target/RISCV/RISCVSchedule.td b/llvm/lib/Target/RISCV/RISCVSchedule.td index f31e4af46c1b..4971ca1d4e3e 100644 --- a/llvm/lib/Target/RISCV/RISCVSchedule.td +++ b/llvm/lib/Target/RISCV/RISCVSchedule.td @@ -230,3 +230,4 @@ def : ReadAdvance<ReadFSqrt16, 0>; // Include the scheduler resources for other instruction extensions. include "RISCVScheduleB.td" +include "RISCVScheduleV.td" diff --git a/llvm/lib/Target/RISCV/RISCVScheduleV.td b/llvm/lib/Target/RISCV/RISCVScheduleV.td new file mode 100644 index 000000000000..43af1802d706 --- /dev/null +++ b/llvm/lib/Target/RISCV/RISCVScheduleV.td @@ -0,0 +1,820 @@ +//===-- RISCVScheduleV.td - RISCV Scheduling Definitions V -*- tablegen -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +/// Define scheduler resources associated with def operands. + +// 7. Vector Loads and Stores +// 7.4. Vector Unit-Stride Instructions +def WriteVLDE8 : SchedWrite; +def WriteVLDE16 : SchedWrite; +def WriteVLDE32 : SchedWrite; +def WriteVLDE64 : SchedWrite; +def WriteVSTE8 : SchedWrite; +def WriteVSTE16 : SchedWrite; +def WriteVSTE32 : SchedWrite; +def WriteVSTE64 : SchedWrite; +// 7.4.1. Vector Unit-Strided Mask +def WriteVLDM : SchedWrite; +def WriteVSTM : SchedWrite; +// 7.5. Vector Strided Instructions +def WriteVLDS8 : SchedWrite; +def WriteVLDS16 : SchedWrite; +def WriteVLDS32 : SchedWrite; +def WriteVLDS64 : SchedWrite; +def WriteVSTS8 : SchedWrite; +def WriteVSTS16 : SchedWrite; +def WriteVSTS32 : SchedWrite; +def WriteVSTS64 : SchedWrite; +// 7.6. Vector Indexed Instructions +def WriteVLDUX8 : SchedWrite; +def WriteVLDUX16 : SchedWrite; +def WriteVLDUX32 : SchedWrite; +def WriteVLDUX64 : SchedWrite; +def WriteVLDOX8 : SchedWrite; +def WriteVLDOX16 : SchedWrite; +def WriteVLDOX32 : SchedWrite; +def WriteVLDOX64 : SchedWrite; +def WriteVSTUX8 : SchedWrite; +def WriteVSTUX16 : SchedWrite; +def WriteVSTUX32 : SchedWrite; +def WriteVSTUX64 : SchedWrite; +def WriteVSTOX8 : SchedWrite; +def WriteVSTOX16 : SchedWrite; +def WriteVSTOX32 : SchedWrite; +def WriteVSTOX64 : SchedWrite; +// 7.7. Vector Unit-stride Fault-Only-First Loads +def WriteVLDFF8 : SchedWrite; +def WriteVLDFF16 : SchedWrite; +def WriteVLDFF32 : SchedWrite; +def WriteVLDFF64 : SchedWrite; +// 7.9. Vector Whole Register Instructions +def WriteVLD1R8 : SchedWrite; +def WriteVLD1R16 : SchedWrite; +def WriteVLD1R32 : SchedWrite; +def WriteVLD1R64 : SchedWrite; +def WriteVLD2R8 : SchedWrite; +def WriteVLD2R16 : SchedWrite; +def WriteVLD2R32 : SchedWrite; +def WriteVLD2R64 : SchedWrite; +def WriteVLD4R8 : SchedWrite; +def WriteVLD4R16 : SchedWrite; +def WriteVLD4R32 : SchedWrite; +def WriteVLD4R64 : SchedWrite; +def WriteVLD8R8 : SchedWrite; +def WriteVLD8R16 : SchedWrite; +def WriteVLD8R32 : SchedWrite; +def WriteVLD8R64 : SchedWrite; +def WriteVST1R : SchedWrite; +def WriteVST2R : SchedWrite; +def WriteVST4R : SchedWrite; +def WriteVST8R : SchedWrite; + +// 11. Vector Integer Arithmetic Instructions +// 11.1. Vector Single-Width Integer Add and Subtract +// 11.5. Vector Bitwise Logical Instructions +def WriteVIALUV : SchedWrite; +def WriteVIALUX : SchedWrite; +def WriteVIALUI : SchedWrite; +// 11.2. Vector Widening Integer Add/Subtract +def WriteVIWALUV : SchedWrite; +def WriteVIWALUX : SchedWrite; +def WriteVIWALUI : SchedWrite; +// 11.3. Vector Integer Extension +def WriteVExtV : SchedWrite; +// 11.4. Vector Integer Arithmetic with Carry or Borrow Instructions +def WriteVICALUV : SchedWrite; +def WriteVICALUX : SchedWrite; +def WriteVICALUI : SchedWrite; +// 11.6. Vector Single-Width Bit Shift Instructions +def WriteVShiftV : SchedWrite; +def WriteVShiftX : SchedWrite; +def WriteVShiftI : SchedWrite; +// 11.7. Vector Narrowing Integer Right Shift Instructions +def WriteVNShiftV : SchedWrite; +def WriteVNShiftX : SchedWrite; +def WriteVNShiftI : SchedWrite; +// 11.8. Vector Integer Comparison Instructions +// 11.9. Vector Integer Min/Max Instructions +def WriteVICmpV : SchedWrite; +def WriteVICmpX : SchedWrite; +def WriteVICmpI : SchedWrite; +// 11.10. Vector Single-Width Integer Multiply Instructions +def WriteVIMulV : SchedWrite; +def WriteVIMulX : SchedWrite; +// 11.11. Vector Integer Divide Instructions +def WriteVIDivV : SchedWrite; +def WriteVIDivX : SchedWrite; +// 11.12. Vector Widening Integer Multiply Instructions +def WriteVIWMulV : SchedWrite; +def WriteVIWMulX : SchedWrite; +// 11.13. Vector Single-Width Integer Multiply-Add Instructions +def WriteVIMulAddV : SchedWrite; +def WriteVIMulAddX : SchedWrite; +// 11.14. Vector Widening Integer Multiply-Add Instructions +def WriteVIWMulAddV : SchedWrite; +def WriteVIWMulAddX : SchedWrite; +// 11.15. Vector Integer Merge Instructions +def WriteVIMergeV : SchedWrite; +def WriteVIMergeX : SchedWrite; +def WriteVIMergeI : SchedWrite; +// 11.16. Vector Integer Move Instructions +def WriteVIMovV : SchedWrite; +def WriteVIMovX : SchedWrite; +def WriteVIMovI : SchedWrite; + +// 12. Vector Fixed-Point Arithmetic Instructions +// 12.1. Vector Single-Width Saturating Add and Subtract +def WriteVSALUV : SchedWrite; +def WriteVSALUX : SchedWrite; +def WriteVSALUI : SchedWrite; +// 12.2. Vector Single-Width Averaging Add and Subtract +def WriteVAALUV : SchedWrite; +def WriteVAALUX : SchedWrite; +// 12.3. Vector Single-Width Fractional Multiply with Rounding and Saturation +def WriteVSMulV : SchedWrite; +def WriteVSMulX : SchedWrite; +// 12.4. Vector Single-Width Scaling Shift Instructions +def WriteVSShiftV : SchedWrite; +def WriteVSShiftX : SchedWrite; +def WriteVSShiftI : SchedWrite; +// 12.5. Vector Narrowing Fixed-Point Clip Instructions +def WriteVNClipV : SchedWrite; +def WriteVNClipX : SchedWrite; +def WriteVNClipI : SchedWrite; + +// 13. Vector Floating-Point Instructions +// 13.2. Vector Single-Width Floating-Point Add/Subtract Instructions +def WriteVFALUV : SchedWrite; +def WriteVFALUF : SchedWrite; +// 13.3. Vector Widening Floating-Point Add/Subtract Instructions +def WriteVFWALUV : SchedWrite; +def WriteVFWALUF : SchedWrite; +// 13.4. Vector Single-Width Floating-Point Multiply/Divide Instructions +def WriteVFMulV : SchedWrite; +def WriteVFMulF : SchedWrite; +def WriteVFDivV : SchedWrite; +def WriteVFDivF : SchedWrite; +// 13.5. Vector Widening Floating-Point Multiply +def WriteVFWMulV : SchedWrite; +def WriteVFWMulF : SchedWrite; +// 13.6. Vector Single-Width Floating-Point Fused Multiply-Add Instructions +def WriteVFMulAddV : SchedWrite; +def WriteVFMulAddF : SchedWrite; +// 13.7. Vector Widening Floating-Point Fused Multiply-Add Instructions +def WriteVFWMulAddV : SchedWrite; +def WriteVFWMulAddF : SchedWrite; +// 13.8. Vector Floating-Point Square-Root Instruction +def WriteVFSqrtV : SchedWrite; +// 13.9. Vector Floating-Point Reciprocal Square-Root Estimate Instruction +// 13.10. Vector Floating-Point Reciprocal Estimate Instruction +def WriteVFRecpV : SchedWrite; +// 13.11. Vector Floating-Point MIN/MAX Instructions +// 13.13. Vector Floating-Point Compare Instructions +def WriteVFCmpV : SchedWrite; +def WriteVFCmpF : SchedWrite; +// 13.12. Vector Floating-Point Sign-Injection Instructions +def WriteVFSgnjV : SchedWrite; +def WriteVFSgnjF : SchedWrite; +// 13.14. Vector Floating-Point Classify Instruction +def WriteVFClassV : SchedWrite; +// 13.15. Vector Floating-Point Merge Instruction +def WriteVFMergeV : SchedWrite; +// 13.16. Vector Floating-Point Move Instruction +def WriteVFMovV : SchedWrite; +// 13.17. Single-Width Floating-Point/Integer Type-Convert Instructions +def WriteVFCvtIToFV : SchedWrite; +def WriteVFCvtFToIV : SchedWrite; +def WriteVFCvtFToFV : SchedWrite; +// 13.18. Widening Floating-Point/Integer Type-Convert Instructions +def WriteVFWCvtIToFV : SchedWrite; +def WriteVFWCvtFToIV : SchedWrite; +def WriteVFWCvtFToFV : SchedWrite; +// 13.19. Narrowing Floating-Point/Integer Type-Convert Instructions +def WriteVFNCvtIToFV : SchedWrite; +def WriteVFNCvtFToIV : SchedWrite; +def WriteVFNCvtFToFV : SchedWrite; + +// 14. Vector Reduction Operations +// 14.1. Vector Single-Width Integer Reduction Instructions +def WriteVIRedV : SchedWrite; +// 14.2. Vector Widening Integer Reduction Instructions +def WriteVIWRedV : SchedWrite; +// 14.3. Vector Single-Width Floating-Point Reduction Instructions +def WriteVFRedV : SchedWrite; +def WriteVFRedOV : SchedWrite; +// 14.4. Vector Widening Floating-Point Reduction Instructions +def WriteVFWRedV : SchedWrite; +def WriteVFWRedOV : SchedWrite; + +// 15. Vector Mask Instructions +// 15.1. Vector Mask-Register Logical Instructions +def WriteVMALUV : SchedWrite; +// 15.2. Vector Mask Population Count +def WriteVMPopV : SchedWrite; +// 15.3. Vector Find-First-Set Mask Bit +def WriteVMFFSV : SchedWrite; +// 15.4. Vector Set-Before-First Mask Bit +// 15.5. Vector Set-Including-First Mask Bit +// 15.6. Vector Set-only-First Mask Bit +def WriteVMSFSV : SchedWrite; +// 15.8. Vector Iota Instruction +def WriteVMIotV : SchedWrite; +// 15.9. Vector Element Index Instruction +def WriteVMIdxV : SchedWrite; + +// 16. Vector Permutation Instructions +// 16.1. Integer Scalar Move Instructions +def WriteVIMovVX : SchedWrite; +def WriteVIMovXV : SchedWrite; +// 16.2. Floating-Point Scalar Move Instructions +def WriteVFMovVF : SchedWrite; +def WriteVFMovFV : SchedWrite; +// 16.3. Vector Slide Instructions +def WriteVISlideX : SchedWrite; +def WriteVISlideI : SchedWrite; +def WriteVISlide1X : SchedWrite; +def WriteVFSlide1F : SchedWrite; +// 16.4. Vector Register Gather Instructions +def WriteVGatherV : SchedWrite; +def WriteVGatherX : SchedWrite; +def WriteVGatherI : SchedWrite; +// 16.5. Vector Compress Instruction +def WriteVCompressV : SchedWrite; +// 16.6. Whole Vector Register Move +def WriteVMov1V : SchedWrite; +def WriteVMov2V : SchedWrite; +def WriteVMov4V : SchedWrite; +def WriteVMov8V : SchedWrite; + +//===----------------------------------------------------------------------===// +/// Define scheduler resources associated with use operands. + +// 7. Vector Loads and Stores +def ReadVLDX : SchedRead; +def ReadVSTX : SchedRead; +// 7.4. Vector Unit-Stride Instructions +def ReadVSTE8V : SchedRead; +def ReadVSTE16V : SchedRead; +def ReadVSTE32V : SchedRead; +def ReadVSTE64V : SchedRead; +// 7.4.1. Vector Unit-Strided Mask +def ReadVSTM : SchedRead; +// 7.5. Vector Strided Instructions +def ReadVLDSX : SchedRead; +def ReadVSTSX : SchedRead; +def ReadVSTS8V : SchedRead; +def ReadVSTS16V : SchedRead; +def ReadVSTS32V : SchedRead; +def ReadVSTS64V : SchedRead; +// 7.6. Vector Indexed Instructions +def ReadVLDUXV : SchedRead; +def ReadVLDOXV : SchedRead; +def ReadVSTUX8 : SchedRead; +def ReadVSTUX16 : SchedRead; +def ReadVSTUX32 : SchedRead; +def ReadVSTUX64 : SchedRead; +def ReadVSTUXV : SchedRead; +def ReadVSTUX8V : SchedRead; +def ReadVSTUX16V : SchedRead; +def ReadVSTUX32V : SchedRead; +def ReadVSTUX64V : SchedRead; +def ReadVSTOX8 : SchedRead; +def ReadVSTOX16 : SchedRead; +def ReadVSTOX32 : SchedRead; +def ReadVSTOX64 : SchedRead; +def ReadVSTOXV : SchedRead; +def ReadVSTOX8V : SchedRead; +def ReadVSTOX16V : SchedRead; +def ReadVSTOX32V : SchedRead; +def ReadVSTOX64V : SchedRead; +// 7.9. Vector Whole Register Instructions +def ReadVST1R : SchedRead; +def ReadVST2R : SchedRead; +def ReadVST4R : SchedRead; +def ReadVST8R : SchedRead; + +// 11. Vector Integer Arithmetic Instructions +// 11.1. Vector Single-Width Integer Add and Subtract +// 11.5. Vector Bitwise Logical Instructions +def ReadVIALUV : SchedRead; +def ReadVIALUX : SchedRead; +// 11.2. Vector Widening Integer Add/Subtract +def ReadVIWALUV : SchedRead; +def ReadVIWALUX : SchedRead; +// 11.3. Vector Integer Extension +def ReadVExtV : SchedRead; +// 11.4. Vector Integer Arithmetic with Carry or Borrow Instructions +def ReadVIALUCV : SchedRead; +def ReadVIALUCX : SchedRead; +// 11.6. Vector Single-Width Bit Shift Instructions +def ReadVShiftV : SchedRead; +def ReadVShiftX : SchedRead; +// 11.7. Vector Narrowing Integer Right Shift Instructions +def ReadVNShiftV : SchedRead; +def ReadVNShiftX : SchedRead; +// 11.8. Vector Integer Comparison Instructions +// 11.9. Vector Integer Min/Max Instructions +def ReadVICmpV : SchedRead; +def ReadVICmpX : SchedRead; +// 11.10. Vector Single-Width Integer Multiply Instructions +def ReadVIMulV : SchedRead; +def ReadVIMulX : SchedRead; +// 11.11. Vector Integer Divide Instructions +def ReadVIDivV : SchedRead; +def ReadVIDivX : SchedRead; +// 11.12. Vector Widening Integer Multiply Instructions +def ReadVIWMulV : SchedRead; +def ReadVIWMulX : SchedRead; +// 11.13. Vector Single-Width Integer Multiply-Add Instructions +def ReadVIMulAddV : SchedRead; +def ReadVIMulAddX : SchedRead; +// 11.14. Vector Widening Integer Multiply-Add Instructions +def ReadVIWMulAddV : SchedRead; +def ReadVIWMulAddX : SchedRead; +// 11.15. Vector Integer Merge Instructions +def ReadVIMergeV : SchedRead; +def ReadVIMergeX : SchedRead; +// 11.16. Vector Integer Move Instructions +def ReadVIMovV : SchedRead; +def ReadVIMovX : SchedRead; + +// 12. Vector Fixed-Point Arithmetic Instructions +// 12.1. Vector Single-Width Saturating Add and Subtract +def ReadVSALUV : SchedRead; +def ReadVSALUX : SchedRead; +// 12.2. Vector Single-Width Averaging Add and Subtract +def ReadVAALUV : SchedRead; +def ReadVAALUX : SchedRead; +// 12.3. Vector Single-Width Fractional Multiply with Rounding and Saturation +def ReadVSMulV : SchedRead; +def ReadVSMulX : SchedRead; +// 12.4. Vector Single-Width Scaling Shift Instructions +def ReadVSShiftV : SchedRead; +def ReadVSShiftX : SchedRead; +// 12.5. Vector Narrowing Fixed-Point Clip Instructions +def ReadVNClipV : SchedRead; +def ReadVNClipX : SchedRead; + +// 13. Vector Floating-Point Instructions +// 13.2. Vector Single-Width Floating-Point Add/Subtract Instructions +def ReadVFALUV : SchedRead; +def ReadVFALUF : SchedRead; +// 13.3. Vector Widening Floating-Point Add/Subtract Instructions +def ReadVFWALUV : SchedRead; +def ReadVFWALUF : SchedRead; +// 13.4. Vector Single-Width Floating-Point Multiply/Divide Instructions +def ReadVFMulV : SchedRead; +def ReadVFMulF : SchedRead; +def ReadVFDivV : SchedRead; +def ReadVFDivF : SchedRead; +// 13.5. Vector Widening Floating-Point Multiply +def ReadVFWMulV : SchedRead; +def ReadVFWMulF : SchedRead; +// 13.6. Vector Single-Width Floating-Point Fused Multiply-Add Instructions +def ReadVFMulAddV : SchedRead; +def ReadVFMulAddF : SchedRead; +// 13.7. Vector Widening Floating-Point Fused Multiply-Add Instructions +def ReadVFWMulAddV : SchedRead; +def ReadVFWMulAddF : SchedRead; +// 13.8. Vector Floating-Point Square-Root Instruction +def ReadVFSqrtV : SchedRead; +// 13.9. Vector Floating-Point Reciprocal Square-Root Estimate Instruction +// 13.10. Vector Floating-Point Reciprocal Estimate Instruction +def ReadVFRecpV : SchedRead; +// 13.11. Vector Floating-Point MIN/MAX Instructions +// 13.13. Vector Floating-Point Compare Instructions +def ReadVFCmpV : SchedRead; +def ReadVFCmpF : SchedRead; +// 13.12. Vector Floating-Point Sign-Injection Instructions +def ReadVFSgnjV : SchedRead; +def ReadVFSgnjF : SchedRead; +// 13.14. Vector Floating-Point Classify Instruction +def ReadVFClassV : SchedRead; +// 13.15. Vector Floating-Point Merge Instruction +def ReadVFMergeV : SchedRead; +def ReadVFMergeF : SchedRead; +// 13.16. Vector Floating-Point Move Instruction +def ReadVFMovF : SchedRead; +// 13.17. Single-Width Floating-Point/Integer Type-Convert Instructions +def ReadVFCvtIToFV : SchedRead; +def ReadVFCvtFToIV : SchedRead; +// 13.18. Widening Floating-Point/Integer Type-Convert Instructions +def ReadVFWCvtIToFV : SchedRead; +def ReadVFWCvtFToIV : SchedRead; +def ReadVFWCvtFToFV : SchedRead; +// 13.19. Narrowing Floating-Point/Integer Type-Convert Instructions +def ReadVFNCvtIToFV : SchedRead; +def ReadVFNCvtFToIV : SchedRead; +def ReadVFNCvtFToFV : SchedRead; + +// 14. Vector Reduction Operations +// 14.1. Vector Single-Width Integer Reduction Instructions +def ReadVIRedV : SchedRead; +def ReadVIRedV0 : SchedRead; +// 14.2. Vector Widening Integer Reduction Instructions +def ReadVIWRedV : SchedRead; +def ReadVIWRedV0 : SchedRead; +// 14.3. Vector Single-Width Floating-Point Reduction Instructions +def ReadVFRedV : SchedRead; +def ReadVFRedV0 : SchedRead; +def ReadVFRedOV : SchedRead; +def ReadVFRedOV0 : SchedRead; +// 14.4. Vector Widening Floating-Point Reduction Instructions +def ReadVFWRedV : SchedRead; +def ReadVFWRedV0 : SchedRead; +def ReadVFWRedOV : SchedRead; +def ReadVFWRedOV0 : SchedRead; + +// 15. Vector Mask Instructions +// 15.1. Vector Mask-Register Logical Instructions +def ReadVMALUV : SchedRead; +// 15.2. Vector Mask Population Count +def ReadVMPopV : SchedRead; +// 15.3. Vector Find-First-Set Mask Bit +def ReadVMFFSV : SchedRead; +// 15.4. Vector Set-Before-First Mask Bit +// 15.5. Vector Set-Including-First Mask Bit +// 15.6. Vector Set-only-First Mask Bit +def ReadVMSFSV : SchedRead; +// 15.8. Vector Iota Instruction +def ReadVMIotV : SchedRead; + +// 16. Vector Permutation Instructions +// 16.1. Integer Scalar Move Instructions +def ReadVIMovVX : SchedRead; +def ReadVIMovXV : SchedRead; +def ReadVIMovXX : SchedRead; +// 16.2. Floating-Point Scalar Move Instructions +def ReadVFMovVF : SchedRead; +def ReadVFMovFV : SchedRead; +def ReadVFMovFX : SchedRead; +// 16.3. Vector Slide Instructions +def ReadVISlideV : SchedRead; +def ReadVISlideX : SchedRead; +def ReadVFSlideV : SchedRead; +def ReadVFSlideF : SchedRead; +// 16.4. Vector Register Gather Instructions +def ReadVGatherV : SchedRead; +def ReadVGatherX : SchedRead; +// 16.5. Vector Compress Instruction +def ReadVCompressV : SchedRead; +// 16.6. Whole Vector Register Move +def ReadVMov1V : SchedRead; +def ReadVMov2V : SchedRead; +def ReadVMov4V : SchedRead; +def ReadVMov8V : SchedRead; + +// Others +def ReadVMask : SchedRead; + +//===----------------------------------------------------------------------===// +/// Define default scheduler resources for V. + +multiclass UnsupportedSchedV { +let Unsupported = true in { + +// 7. Vector Loads and Stores +def : WriteRes<WriteVLDE8, []>; +def : WriteRes<WriteVLDE16, []>; +def : WriteRes<WriteVLDE32, []>; +def : WriteRes<WriteVLDE64, []>; +def : WriteRes<WriteVSTE8, []>; +def : WriteRes<WriteVSTE16, []>; +def : WriteRes<WriteVSTE32, []>; +def : WriteRes<WriteVSTE64, []>; +def : WriteRes<WriteVLDM, []>; +def : WriteRes<WriteVSTM, []>; +def : WriteRes<WriteVLDS8, []>; +def : WriteRes<WriteVLDS16, []>; +def : WriteRes<WriteVLDS32, []>; +def : WriteRes<WriteVLDS64, []>; +def : WriteRes<WriteVSTS8, []>; +def : WriteRes<WriteVSTS16, []>; +def : WriteRes<WriteVSTS32, []>; +def : WriteRes<WriteVSTS64, []>; +def : WriteRes<WriteVLDUX8, []>; +def : WriteRes<WriteVLDUX16, []>; +def : WriteRes<WriteVLDUX32, []>; +def : WriteRes<WriteVLDUX64, []>; +def : WriteRes<WriteVLDOX8, []>; +def : WriteRes<WriteVLDOX16, []>; +def : WriteRes<WriteVLDOX32, []>; +def : WriteRes<WriteVLDOX64, []>; +def : WriteRes<WriteVSTUX8, []>; +def : WriteRes<WriteVSTUX16, []>; +def : WriteRes<WriteVSTUX32, []>; +def : WriteRes<WriteVSTUX64, []>; +def : WriteRes<WriteVSTOX8, []>; +def : WriteRes<WriteVSTOX16, []>; +def : WriteRes<WriteVSTOX32, []>; +def : WriteRes<WriteVSTOX64, []>; +def : WriteRes<WriteVLDFF8, []>; +def : WriteRes<WriteVLDFF16, []>; +def : WriteRes<WriteVLDFF32, []>; +def : WriteRes<WriteVLDFF64, []>; +def : WriteRes<WriteVLD1R8, []>; +def : WriteRes<WriteVLD1R16, []>; +def : WriteRes<WriteVLD1R32, []>; +def : WriteRes<WriteVLD1R64, []>; +def : WriteRes<WriteVLD2R8, []>; +def : WriteRes<WriteVLD2R16, []>; +def : WriteRes<WriteVLD2R32, []>; +def : WriteRes<WriteVLD2R64, []>; +def : WriteRes<WriteVLD4R8, []>; +def : WriteRes<WriteVLD4R16, []>; +def : WriteRes<WriteVLD4R32, []>; +def : WriteRes<WriteVLD4R64, []>; +def : WriteRes<WriteVLD8R8, []>; +def : WriteRes<WriteVLD8R16, []>; +def : WriteRes<WriteVLD8R32, []>; +def : WriteRes<WriteVLD8R64, []>; +def : WriteRes<WriteVST1R, []>; +def : WriteRes<WriteVST2R, []>; +def : WriteRes<WriteVST4R, []>; +def : WriteRes<WriteVST8R, []>; + +// 12. Vector Integer Arithmetic Instructions +def : WriteRes<WriteVIALUV, []>; +def : WriteRes<WriteVIALUX, []>; +def : WriteRes<WriteVIALUI, []>; +def : WriteRes<WriteVIWALUV, []>; +def : WriteRes<WriteVIWALUX, []>; +def : WriteRes<WriteVIWALUI, []>; +def : WriteRes<WriteVExtV, []>; +def : WriteRes<WriteVICALUV, []>; +def : WriteRes<WriteVICALUX, []>; +def : WriteRes<WriteVICALUI, []>; +def : WriteRes<WriteVShiftV, []>; +def : WriteRes<WriteVShiftX, []>; +def : WriteRes<WriteVShiftI, []>; +def : WriteRes<WriteVNShiftV, []>; +def : WriteRes<WriteVNShiftX, []>; +def : WriteRes<WriteVNShiftI, []>; +def : WriteRes<WriteVICmpV, []>; +def : WriteRes<WriteVICmpX, []>; +def : WriteRes<WriteVICmpI, []>; +def : WriteRes<WriteVIMulV, []>; +def : WriteRes<WriteVIMulX, []>; +def : WriteRes<WriteVIDivV, []>; +def : WriteRes<WriteVIDivX, []>; +def : WriteRes<WriteVIWMulV, []>; +def : WriteRes<WriteVIWMulX, []>; +def : WriteRes<WriteVIMulAddV, []>; +def : WriteRes<WriteVIMulAddX, []>; +def : WriteRes<WriteVIWMulAddV, []>; +def : WriteRes<WriteVIWMulAddX, []>; +def : WriteRes<WriteVIMergeV, []>; +def : WriteRes<WriteVIMergeX, []>; +def : WriteRes<WriteVIMergeI, []>; +def : WriteRes<WriteVIMovV, []>; +def : WriteRes<WriteVIMovX, []>; +def : WriteRes<WriteVIMovI, []>; + +// 13. Vector Fixed-Point Arithmetic Instructions +def : WriteRes<WriteVSALUV, []>; +def : WriteRes<WriteVSALUX, []>; +def : WriteRes<WriteVSALUI, []>; +def : WriteRes<WriteVAALUV, []>; +def : WriteRes<WriteVAALUX, []>; +def : WriteRes<WriteVSMulV, []>; +def : WriteRes<WriteVSMulX, []>; +def : WriteRes<WriteVSShiftV, []>; +def : WriteRes<WriteVSShiftX, []>; +def : WriteRes<WriteVSShiftI, []>; +def : WriteRes<WriteVNClipV, []>; +def : WriteRes<WriteVNClipX, []>; +def : WriteRes<WriteVNClipI, []>; + +// 14. Vector Floating-Point Instructions +def : WriteRes<WriteVFALUV, []>; +def : WriteRes<WriteVFALUF, []>; +def : WriteRes<WriteVFWALUV, []>; +def : WriteRes<WriteVFWALUF, []>; +def : WriteRes<WriteVFMulV, []>; +def : WriteRes<WriteVFMulF, []>; +def : WriteRes<WriteVFDivV, []>; +def : WriteRes<WriteVFDivF, []>; +def : WriteRes<WriteVFWMulV, []>; +def : WriteRes<WriteVFWMulF, []>; +def : WriteRes<WriteVFMulAddV, []>; +def : WriteRes<WriteVFMulAddF, []>; +def : WriteRes<WriteVFWMulAddV, []>; +def : WriteRes<WriteVFWMulAddF, []>; +def : WriteRes<WriteVFSqrtV, []>; +def : WriteRes<WriteVFRecpV, []>; +def : WriteRes<WriteVFCmpV, []>; +def : WriteRes<WriteVFCmpF, []>; +def : WriteRes<WriteVFSgnjV, []>; +def : WriteRes<WriteVFSgnjF, []>; +def : WriteRes<WriteVFClassV, []>; +def : WriteRes<WriteVFMergeV, []>; +def : WriteRes<WriteVFMovV, []>; +def : WriteRes<WriteVFCvtIToFV, []>; +def : WriteRes<WriteVFCvtFToIV, []>; +def : WriteRes<WriteVFCvtFToFV, []>; +def : WriteRes<WriteVFWCvtIToFV, []>; +def : WriteRes<WriteVFWCvtFToIV, []>; +def : WriteRes<WriteVFWCvtFToFV, []>; +def : WriteRes<WriteVFNCvtIToFV, []>; +def : WriteRes<WriteVFNCvtFToIV, []>; +def : WriteRes<WriteVFNCvtFToFV, []>; + +// 15. Vector Reduction Operations +def : WriteRes<WriteVIRedV, []>; +def : WriteRes<WriteVIWRedV, []>; +def : WriteRes<WriteVFRedV, []>; +def : WriteRes<WriteVFRedOV, []>; +def : WriteRes<WriteVFWRedV, []>; +def : WriteRes<WriteVFWRedOV, []>; + +// 16. Vector Mask Instructions +def : WriteRes<WriteVMALUV, []>; +def : WriteRes<WriteVMPopV, []>; +def : WriteRes<WriteVMFFSV, []>; +def : WriteRes<WriteVMSFSV, []>; +def : WriteRes<WriteVMIotV, []>; +def : WriteRes<WriteVMIdxV, []>; + +// 17. Vector Permutation Instructions +def : WriteRes<WriteVIMovVX, []>; +def : WriteRes<WriteVIMovXV, []>; +def : WriteRes<WriteVFMovVF, []>; +def : WriteRes<WriteVFMovFV, []>; +def : WriteRes<WriteVISlideX, []>; +def : WriteRes<WriteVISlideI, []>; +def : WriteRes<WriteVISlide1X, []>; +def : WriteRes<WriteVFSlide1F, []>; +def : WriteRes<WriteVGatherV, []>; +def : WriteRes<WriteVGatherX, []>; +def : WriteRes<WriteVGatherI, []>; +def : WriteRes<WriteVCompressV, []>; +def : WriteRes<WriteVMov1V, []>; +def : WriteRes<WriteVMov2V, []>; +def : WriteRes<WriteVMov4V, []>; +def : WriteRes<WriteVMov8V, []>; + +// 7. Vector Loads and Stores +def : ReadAdvance<ReadVLDX, 0>; +def : ReadAdvance<ReadVSTX, 0>; +def : ReadAdvance<ReadVSTE8V, 0>; +def : ReadAdvance<ReadVSTE16V, 0>; +def : ReadAdvance<ReadVSTE32V, 0>; +def : ReadAdvance<ReadVSTE64V, 0>; +def : ReadAdvance<ReadVSTM, 0>; +def : ReadAdvance<ReadVLDSX, 0>; +def : ReadAdvance<ReadVSTSX, 0>; +def : ReadAdvance<ReadVSTS8V, 0>; +def : ReadAdvance<ReadVSTS16V, 0>; +def : ReadAdvance<ReadVSTS32V, 0>; +def : ReadAdvance<ReadVSTS64V, 0>; +def : ReadAdvance<ReadVLDUXV, 0>; +def : ReadAdvance<ReadVLDOXV, 0>; +def : ReadAdvance<ReadVSTUXV, 0>; +def : ReadAdvance<ReadVSTUX8, 0>; +def : ReadAdvance<ReadVSTUX16, 0>; +def : ReadAdvance<ReadVSTUX32, 0>; +def : ReadAdvance<ReadVSTUX64, 0>; +def : ReadAdvance<ReadVSTUX8V, 0>; +def : ReadAdvance<ReadVSTUX16V, 0>; +def : ReadAdvance<ReadVSTUX32V, 0>; +def : ReadAdvance<ReadVSTUX64V, 0>; +def : ReadAdvance<ReadVSTOX8, 0>; +def : ReadAdvance<ReadVSTOX16, 0>; +def : ReadAdvance<ReadVSTOX32, 0>; +def : ReadAdvance<ReadVSTOX64, 0>; +def : ReadAdvance<ReadVSTOXV, 0>; +def : ReadAdvance<ReadVSTOX8V, 0>; +def : ReadAdvance<ReadVSTOX16V, 0>; +def : ReadAdvance<ReadVSTOX32V, 0>; +def : ReadAdvance<ReadVSTOX64V, 0>; +def : ReadAdvance<ReadVST1R, 0>; +def : ReadAdvance<ReadVST2R, 0>; +def : ReadAdvance<ReadVST4R, 0>; +def : ReadAdvance<ReadVST8R, 0>; + +// 12. Vector Integer Arithmetic Instructions +def : ReadAdvance<ReadVIALUV, 0>; +def : ReadAdvance<ReadVIALUX, 0>; +def : ReadAdvance<ReadVIWALUV, 0>; +def : ReadAdvance<ReadVIWALUX, 0>; +def : ReadAdvance<ReadVExtV, 0>; +def : ReadAdvance<ReadVIALUCV, 0>; +def : ReadAdvance<ReadVIALUCX, 0>; +def : ReadAdvance<ReadVShiftV, 0>; +def : ReadAdvance<ReadVShiftX, 0>; +def : ReadAdvance<ReadVNShiftV, 0>; +def : ReadAdvance<ReadVNShiftX, 0>; +def : ReadAdvance<ReadVICmpV, 0>; +def : ReadAdvance<ReadVICmpX, 0>; +def : ReadAdvance<ReadVIMulV, 0>; +def : ReadAdvance<ReadVIMulX, 0>; +def : ReadAdvance<ReadVIDivV, 0>; +def : ReadAdvance<ReadVIDivX, 0>; +def : ReadAdvance<ReadVIWMulV, 0>; +def : ReadAdvance<ReadVIWMulX, 0>; +def : ReadAdvance<ReadVIMulAddV, 0>; +def : ReadAdvance<ReadVIMulAddX, 0>; +def : ReadAdvance<ReadVIWMulAddV, 0>; +def : ReadAdvance<ReadVIWMulAddX, 0>; +def : ReadAdvance<ReadVIMergeV, 0>; +def : ReadAdvance<ReadVIMergeX, 0>; +def : ReadAdvance<ReadVIMovV, 0>; +def : ReadAdvance<ReadVIMovX, 0>; + +// 13. Vector Fixed-Point Arithmetic Instructions +def : ReadAdvance<ReadVSALUV, 0>; +def : ReadAdvance<ReadVSALUX, 0>; +def : ReadAdvance<ReadVAALUV, 0>; +def : ReadAdvance<ReadVAALUX, 0>; +def : ReadAdvance<ReadVSMulV, 0>; +def : ReadAdvance<ReadVSMulX, 0>; +def : ReadAdvance<ReadVSShiftV, 0>; +def : ReadAdvance<ReadVSShiftX, 0>; +def : ReadAdvance<ReadVNClipV, 0>; +def : ReadAdvance<ReadVNClipX, 0>; + +// 14. Vector Floating-Point Instructions +def : ReadAdvance<ReadVFALUV, 0>; +def : ReadAdvance<ReadVFALUF, 0>; +def : ReadAdvance<ReadVFWALUV, 0>; +def : ReadAdvance<ReadVFWALUF, 0>; +def : ReadAdvance<ReadVFMulV, 0>; +def : ReadAdvance<ReadVFMulF, 0>; +def : ReadAdvance<ReadVFDivV, 0>; +def : ReadAdvance<ReadVFDivF, 0>; +def : ReadAdvance<ReadVFWMulV, 0>; +def : ReadAdvance<ReadVFWMulF, 0>; +def : ReadAdvance<ReadVFMulAddV, 0>; +def : ReadAdvance<ReadVFMulAddF, 0>; +def : ReadAdvance<ReadVFWMulAddV, 0>; +def : ReadAdvance<ReadVFWMulAddF, 0>; +def : ReadAdvance<ReadVFSqrtV, 0>; +def : ReadAdvance<ReadVFRecpV, 0>; +def : ReadAdvance<ReadVFCmpV, 0>; +def : ReadAdvance<ReadVFCmpF, 0>; +def : ReadAdvance<ReadVFSgnjV, 0>; +def : ReadAdvance<ReadVFSgnjF, 0>; +def : ReadAdvance<ReadVFClassV, 0>; +def : ReadAdvance<ReadVFMergeV, 0>; +def : ReadAdvance<ReadVFMergeF, 0>; +def : ReadAdvance<ReadVFMovF, 0>; +def : ReadAdvance<ReadVFCvtIToFV, 0>; +def : ReadAdvance<ReadVFCvtFToIV, 0>; +def : ReadAdvance<ReadVFWCvtIToFV, 0>; +def : ReadAdvance<ReadVFWCvtFToIV, 0>; +def : ReadAdvance<ReadVFWCvtFToFV, 0>; +def : ReadAdvance<ReadVFNCvtIToFV, 0>; +def : ReadAdvance<ReadVFNCvtFToIV, 0>; +def : ReadAdvance<ReadVFNCvtFToFV, 0>; + +// 15. Vector Reduction Operations +def : ReadAdvance<ReadVIRedV, 0>; +def : ReadAdvance<ReadVIRedV0, 0>; +def : ReadAdvance<ReadVIWRedV, 0>; +def : ReadAdvance<ReadVIWRedV0, 0>; +def : ReadAdvance<ReadVFRedV, 0>; +def : ReadAdvance<ReadVFRedV0, 0>; +def : ReadAdvance<ReadVFRedOV, 0>; +def : ReadAdvance<ReadVFRedOV0, 0>; +def : ReadAdvance<ReadVFWRedV, 0>; +def : ReadAdvance<ReadVFWRedV0, 0>; +def : ReadAdvance<ReadVFWRedOV, 0>; +def : ReadAdvance<ReadVFWRedOV0, 0>; + +// 16. Vector Mask Instructions +def : ReadAdvance<ReadVMALUV, 0>; +def : ReadAdvance<ReadVMPopV, 0>; +def : ReadAdvance<ReadVMFFSV, 0>; +def : ReadAdvance<ReadVMSFSV, 0>; +def : ReadAdvance<ReadVMIotV, 0>; + +// 17. Vector Permutation Instructions +def : ReadAdvance<ReadVIMovVX, 0>; +def : ReadAdvance<ReadVIMovXV, 0>; +def : ReadAdvance<ReadVIMovXX, 0>; +def : ReadAdvance<ReadVFMovVF, 0>; +def : ReadAdvance<ReadVFMovFV, 0>; +def : ReadAdvance<ReadVFMovFX, 0>; +def : ReadAdvance<ReadVISlideV, 0>; +def : ReadAdvance<ReadVISlideX, 0>; +def : ReadAdvance<ReadVFSlideV, 0>; +def : ReadAdvance<ReadVFSlideF, 0>; +def : ReadAdvance<ReadVGatherV, 0>; +def : ReadAdvance<ReadVGatherX, 0>; +def : ReadAdvance<ReadVCompressV, 0>; +def : ReadAdvance<ReadVMov1V, 0>; +def : ReadAdvance<ReadVMov2V, 0>; +def : ReadAdvance<ReadVMov4V, 0>; +def : ReadAdvance<ReadVMov8V, 0>; + +// Others +def : ReadAdvance<ReadVMask, 0>; + +} // Unsupported +} // UnsupportedSchedV diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 3a64b3460030..a69850896436 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -6704,17 +6704,21 @@ static bool getTargetConstantBitsFromNode(SDValue Op, unsigned EltSizeInBits, if (Op.getOpcode() == X86ISD::SUBV_BROADCAST_LOAD) { auto *MemIntr = cast<MemIntrinsicSDNode>(Op); SDValue Ptr = MemIntr->getBasePtr(); + // The source constant may be larger than the subvector broadcast, + // ensure we extract the correct subvector constants. if (const Constant *Cst = getTargetConstantFromBasePtr(Ptr)) { Type *CstTy = Cst->getType(); unsigned CstSizeInBits = CstTy->getPrimitiveSizeInBits(); - if (!CstTy->isVectorTy() || (SizeInBits % CstSizeInBits) != 0) + unsigned SubVecSizeInBits = MemIntr->getMemoryVT().getStoreSizeInBits(); + if (!CstTy->isVectorTy() || (CstSizeInBits % SubVecSizeInBits) != 0 || + (SizeInBits % SubVecSizeInBits) != 0) return false; - unsigned SubEltSizeInBits = CstTy->getScalarSizeInBits(); - unsigned NumSubElts = CstSizeInBits / SubEltSizeInBits; - unsigned NumSubVecs = SizeInBits / CstSizeInBits; + unsigned CstEltSizeInBits = CstTy->getScalarSizeInBits(); + unsigned NumSubElts = SubVecSizeInBits / CstEltSizeInBits; + unsigned NumSubVecs = SizeInBits / SubVecSizeInBits; APInt UndefSubElts(NumSubElts, 0); SmallVector<APInt, 64> SubEltBits(NumSubElts * NumSubVecs, - APInt(SubEltSizeInBits, 0)); + APInt(CstEltSizeInBits, 0)); for (unsigned i = 0; i != NumSubElts; ++i) { if (!CollectConstantBits(Cst->getAggregateElement(i), SubEltBits[i], UndefSubElts, i)) diff --git a/llvm/lib/Target/X86/X86InstrArithmetic.td b/llvm/lib/Target/X86/X86InstrArithmetic.td index e83e1e74ff52..ba00e7da81f9 100644 --- a/llvm/lib/Target/X86/X86InstrArithmetic.td +++ b/llvm/lib/Target/X86/X86InstrArithmetic.td @@ -708,6 +708,19 @@ class BinOpRM<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, mnemonic, "{$src2, $src1|$src1, $src2}", pattern>, Sched<[sched.Folded, sched.ReadAfterFold]>; +// BinOpRM - Instructions like "adc reg, reg, [mem]". +// There is an implicit register read at the end of the operand sequence. +class BinOpRM_ImplicitUse<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, + dag outlist, X86FoldableSchedWrite sched, list<dag> pattern> + : ITy<opcode, MRMSrcMem, typeinfo, outlist, + (ins typeinfo.RegClass:$src1, typeinfo.MemOperand:$src2), + mnemonic, "{$src2, $src1|$src1, $src2}", pattern>, + Sched<[sched.Folded, sched.ReadAfterFold, + // base, scale, index, offset, segment. + ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault, + // implicit register read. + sched.ReadAfterFold]>; + // BinOpRM_F - Instructions like "cmp reg, [mem]". class BinOpRM_F<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, SDNode opnode> @@ -725,7 +738,7 @@ class BinOpRM_RF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, // BinOpRM_RFF - Instructions like "adc reg, reg, [mem]". class BinOpRM_RFF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, SDNode opnode> - : BinOpRM<opcode, mnemonic, typeinfo, (outs typeinfo.RegClass:$dst), WriteADC, + : BinOpRM_ImplicitUse<opcode, mnemonic, typeinfo, (outs typeinfo.RegClass:$dst), WriteADC, [(set typeinfo.RegClass:$dst, EFLAGS, (opnode typeinfo.RegClass:$src1, (typeinfo.LoadNode addr:$src2), EFLAGS))]>; @@ -805,7 +818,11 @@ class BinOpMR_RMW<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, SDNode opnode> : BinOpMR<opcode, mnemonic, typeinfo, [(store (opnode (load addr:$dst), typeinfo.RegClass:$src), addr:$dst), - (implicit EFLAGS)]>, Sched<[WriteALURMW]>; + (implicit EFLAGS)]>, Sched<[WriteALURMW, + // base, scale, index, offset, segment + ReadDefault, ReadDefault, ReadDefault, + ReadDefault, ReadDefault, + WriteALU.ReadAfterFold]>; // reg // BinOpMR_RMW_FF - Instructions like "adc [mem], reg". class BinOpMR_RMW_FF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, @@ -813,7 +830,12 @@ class BinOpMR_RMW_FF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, : BinOpMR<opcode, mnemonic, typeinfo, [(store (opnode (load addr:$dst), typeinfo.RegClass:$src, EFLAGS), addr:$dst), - (implicit EFLAGS)]>, Sched<[WriteADCRMW]>; + (implicit EFLAGS)]>, Sched<[WriteADCRMW, + // base, scale, index, offset, segment + ReadDefault, ReadDefault, ReadDefault, + ReadDefault, ReadDefault, + WriteALU.ReadAfterFold, // reg + WriteALU.ReadAfterFold]>; // EFLAGS // BinOpMR_F - Instructions like "cmp [mem], reg". class BinOpMR_F<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp index 762317425026..91b16ec66ee3 100644 --- a/llvm/lib/Transforms/IPO/Attributor.cpp +++ b/llvm/lib/Transforms/IPO/Attributor.cpp @@ -32,6 +32,7 @@ #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/NoFolder.h" #include "llvm/IR/ValueHandle.h" @@ -250,10 +251,12 @@ Value *AA::getWithType(Value &V, Type &Ty) { return Constant::getNullValue(&Ty); if (C->getType()->isPointerTy() && Ty.isPointerTy()) return ConstantExpr::getPointerCast(C, &Ty); - if (C->getType()->isIntegerTy() && Ty.isIntegerTy()) - return ConstantExpr::getTrunc(C, &Ty, /* OnlyIfReduced */ true); - if (C->getType()->isFloatingPointTy() && Ty.isFloatingPointTy()) - return ConstantExpr::getFPTrunc(C, &Ty, /* OnlyIfReduced */ true); + if (C->getType()->getPrimitiveSizeInBits() >= Ty.getPrimitiveSizeInBits()) { + if (C->getType()->isIntegerTy() && Ty.isIntegerTy()) + return ConstantExpr::getTrunc(C, &Ty, /* OnlyIfReduced */ true); + if (C->getType()->isFloatingPointTy() && Ty.isFloatingPointTy()) + return ConstantExpr::getFPTrunc(C, &Ty, /* OnlyIfReduced */ true); + } } return nullptr; } @@ -1023,7 +1026,7 @@ bool Attributor::checkForAllUses(function_ref<bool(const Use &, bool &)> Pred, while (!Worklist.empty()) { const Use *U = Worklist.pop_back_val(); - if (!Visited.insert(U).second) + if (isa<PHINode>(U->getUser()) && !Visited.insert(U).second) continue; LLVM_DEBUG(dbgs() << "[Attributor] Check use: " << **U << " in " << *U->getUser() << "\n"); @@ -1925,49 +1928,85 @@ void Attributor::createShallowWrapper(Function &F) { NumFnShallowWrappersCreated++; } +bool Attributor::isInternalizable(Function &F) { + if (F.isDeclaration() || F.hasLocalLinkage() || + GlobalValue::isInterposableLinkage(F.getLinkage())) + return false; + return true; +} + Function *Attributor::internalizeFunction(Function &F, bool Force) { if (!AllowDeepWrapper && !Force) return nullptr; - if (F.isDeclaration() || F.hasLocalLinkage() || - GlobalValue::isInterposableLinkage(F.getLinkage())) + if (!isInternalizable(F)) return nullptr; - Module &M = *F.getParent(); - FunctionType *FnTy = F.getFunctionType(); - - // create a copy of the current function - Function *Copied = Function::Create(FnTy, F.getLinkage(), F.getAddressSpace(), - F.getName() + ".internalized"); - ValueToValueMapTy VMap; - auto *NewFArgIt = Copied->arg_begin(); - for (auto &Arg : F.args()) { - auto ArgName = Arg.getName(); - NewFArgIt->setName(ArgName); - VMap[&Arg] = &(*NewFArgIt++); - } - SmallVector<ReturnInst *, 8> Returns; - - // Copy the body of the original function to the new one - CloneFunctionInto(Copied, &F, VMap, CloneFunctionChangeType::LocalChangesOnly, - Returns); + SmallPtrSet<Function *, 2> FnSet = {&F}; + DenseMap<Function *, Function *> InternalizedFns; + internalizeFunctions(FnSet, InternalizedFns); - // Set the linakage and visibility late as CloneFunctionInto has some implicit - // requirements. - Copied->setVisibility(GlobalValue::DefaultVisibility); - Copied->setLinkage(GlobalValue::PrivateLinkage); + return InternalizedFns[&F]; +} - // Copy metadata - SmallVector<std::pair<unsigned, MDNode *>, 1> MDs; - F.getAllMetadata(MDs); - for (auto MDIt : MDs) - if (!Copied->hasMetadata()) - Copied->addMetadata(MDIt.first, *MDIt.second); +bool Attributor::internalizeFunctions(SmallPtrSetImpl<Function *> &FnSet, + DenseMap<Function *, Function *> &FnMap) { + for (Function *F : FnSet) + if (!Attributor::isInternalizable(*F)) + return false; - M.getFunctionList().insert(F.getIterator(), Copied); - F.replaceAllUsesWith(Copied); - Copied->setDSOLocal(true); + FnMap.clear(); + // Generate the internalized version of each function. + for (Function *F : FnSet) { + Module &M = *F->getParent(); + FunctionType *FnTy = F->getFunctionType(); + + // Create a copy of the current function + Function *Copied = + Function::Create(FnTy, F->getLinkage(), F->getAddressSpace(), + F->getName() + ".internalized"); + ValueToValueMapTy VMap; + auto *NewFArgIt = Copied->arg_begin(); + for (auto &Arg : F->args()) { + auto ArgName = Arg.getName(); + NewFArgIt->setName(ArgName); + VMap[&Arg] = &(*NewFArgIt++); + } + SmallVector<ReturnInst *, 8> Returns; + + // Copy the body of the original function to the new one + CloneFunctionInto(Copied, F, VMap, + CloneFunctionChangeType::LocalChangesOnly, Returns); + + // Set the linakage and visibility late as CloneFunctionInto has some + // implicit requirements. + Copied->setVisibility(GlobalValue::DefaultVisibility); + Copied->setLinkage(GlobalValue::PrivateLinkage); + + // Copy metadata + SmallVector<std::pair<unsigned, MDNode *>, 1> MDs; + F->getAllMetadata(MDs); + for (auto MDIt : MDs) + if (!Copied->hasMetadata()) + Copied->addMetadata(MDIt.first, *MDIt.second); + + M.getFunctionList().insert(F->getIterator(), Copied); + Copied->setDSOLocal(true); + FnMap[F] = Copied; + } + + // Replace all uses of the old function with the new internalized function + // unless the caller is a function that was just internalized. + for (Function *F : FnSet) { + auto &InternalizedFn = FnMap[F]; + auto IsNotInternalized = [&](Use &U) -> bool { + if (auto *CB = dyn_cast<CallBase>(U.getUser())) + return !FnMap.lookup(CB->getCaller()); + return false; + }; + F->replaceUsesWithIf(InternalizedFn, IsNotInternalized); + } - return Copied; + return true; } bool Attributor::isValidFunctionSignatureRewrite( diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp index 98ce286d5139..3529923a9082 100644 --- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp +++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp @@ -1149,19 +1149,23 @@ struct AAPointerInfoFloating : public AAPointerInfoImpl { return true; }; + /// Helper struct, will support ranges eventually. + struct OffsetInfo { + int64_t Offset = AA::PointerInfo::OffsetAndSize::Unknown; + + bool operator==(const OffsetInfo &OI) const { return Offset == OI.Offset; } + }; + /// See AbstractAttribute::updateImpl(...). ChangeStatus updateImpl(Attributor &A) override { using namespace AA::PointerInfo; State S = getState(); ChangeStatus Changed = ChangeStatus::UNCHANGED; Value &AssociatedValue = getAssociatedValue(); - struct OffsetInfo { - int64_t Offset = 0; - }; const DataLayout &DL = A.getDataLayout(); DenseMap<Value *, OffsetInfo> OffsetInfoMap; - OffsetInfoMap[&AssociatedValue] = {}; + OffsetInfoMap[&AssociatedValue] = OffsetInfo{0}; auto HandlePassthroughUser = [&](Value *Usr, OffsetInfo &PtrOI, bool &Follow) { @@ -1219,8 +1223,48 @@ struct AAPointerInfoFloating : public AAPointerInfoImpl { Follow = true; return true; } - if (isa<CastInst>(Usr) || isa<PHINode>(Usr) || isa<SelectInst>(Usr)) + if (isa<CastInst>(Usr) || isa<SelectInst>(Usr)) return HandlePassthroughUser(Usr, PtrOI, Follow); + + // For PHIs we need to take care of the recurrence explicitly as the value + // might change while we iterate through a loop. For now, we give up if + // the PHI is not invariant. + if (isa<PHINode>(Usr)) { + // Check if the PHI is invariant (so far). + OffsetInfo &UsrOI = OffsetInfoMap[Usr]; + if (UsrOI == PtrOI) + return true; + + // Check if the PHI operand has already an unknown offset as we can't + // improve on that anymore. + if (PtrOI.Offset == OffsetAndSize::Unknown) { + UsrOI = PtrOI; + Follow = true; + return true; + } + + // Check if the PHI operand is not dependent on the PHI itself. + APInt Offset(DL.getIndexTypeSizeInBits(AssociatedValue.getType()), 0); + if (&AssociatedValue == CurPtr->stripAndAccumulateConstantOffsets( + DL, Offset, /* AllowNonInbounds */ true)) { + if (Offset != PtrOI.Offset) { + LLVM_DEBUG(dbgs() + << "[AAPointerInfo] PHI operand pointer offset mismatch " + << *CurPtr << " in " << *Usr << "\n"); + return false; + } + return HandlePassthroughUser(Usr, PtrOI, Follow); + } + + // TODO: Approximate in case we know the direction of the recurrence. + LLVM_DEBUG(dbgs() << "[AAPointerInfo] PHI operand is too complex " + << *CurPtr << " in " << *Usr << "\n"); + UsrOI = PtrOI; + UsrOI.Offset = OffsetAndSize::Unknown; + Follow = true; + return true; + } + if (auto *LoadI = dyn_cast<LoadInst>(Usr)) return handleAccess(A, *LoadI, *CurPtr, /* Content */ nullptr, AccessKind::AK_READ, PtrOI.Offset, Changed, diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp index b80349352719..d6b97915ede6 100644 --- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp +++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp @@ -4176,28 +4176,32 @@ PreservedAnalyses OpenMPOptPass::run(Module &M, ModuleAnalysisManager &AM) { ORE.emit([&]() { OptimizationRemarkAnalysis ORA(DEBUG_TYPE, "OMP140", &F); return ORA << "Could not internalize function. " - << "Some optimizations may not be possible."; + << "Some optimizations may not be possible. [OMP140]"; }); }; // Create internal copies of each function if this is a kernel Module. This // allows iterprocedural passes to see every call edge. - DenseSet<const Function *> InternalizedFuncs; - if (isOpenMPDevice(M)) + DenseMap<Function *, Function *> InternalizedMap; + if (isOpenMPDevice(M)) { + SmallPtrSet<Function *, 16> InternalizeFns; for (Function &F : M) if (!F.isDeclaration() && !Kernels.contains(&F) && IsCalled(F) && !DisableInternalization) { - if (Attributor::internalizeFunction(F, /* Force */ true)) { - InternalizedFuncs.insert(&F); + if (Attributor::isInternalizable(F)) { + InternalizeFns.insert(&F); } else if (!F.hasLocalLinkage() && !F.hasFnAttribute(Attribute::Cold)) { EmitRemark(F); } } + Attributor::internalizeFunctions(InternalizeFns, InternalizedMap); + } + // Look at every function in the Module unless it was internalized. SmallVector<Function *, 16> SCC; for (Function &F : M) - if (!F.isDeclaration() && !InternalizedFuncs.contains(&F)) + if (!F.isDeclaration() && !InternalizedMap.lookup(&F)) SCC.push_back(&F); if (SCC.empty()) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp index 2b0ef0c5f2cc..c5e14ebf3ae3 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -5158,6 +5158,83 @@ Instruction *InstCombinerImpl::foldICmpUsingKnownBits(ICmpInst &I) { if (!isa<Constant>(Op1) && Op1Min == Op1Max) return new ICmpInst(Pred, Op0, ConstantExpr::getIntegerValue(Ty, Op1Min)); + // Don't break up a clamp pattern -- (min(max X, Y), Z) -- by replacing a + // min/max canonical compare with some other compare. That could lead to + // conflict with select canonicalization and infinite looping. + // FIXME: This constraint may go away if min/max intrinsics are canonical. + auto isMinMaxCmp = [&](Instruction &Cmp) { + if (!Cmp.hasOneUse()) + return false; + Value *A, *B; + SelectPatternFlavor SPF = matchSelectPattern(Cmp.user_back(), A, B).Flavor; + if (!SelectPatternResult::isMinOrMax(SPF)) + return false; + return match(Op0, m_MaxOrMin(m_Value(), m_Value())) || + match(Op1, m_MaxOrMin(m_Value(), m_Value())); + }; + if (!isMinMaxCmp(I)) { + switch (Pred) { + default: + break; + case ICmpInst::ICMP_ULT: { + if (Op1Min == Op0Max) // A <u B -> A != B if max(A) == min(B) + return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1); + const APInt *CmpC; + if (match(Op1, m_APInt(CmpC))) { + // A <u C -> A == C-1 if min(A)+1 == C + if (*CmpC == Op0Min + 1) + return new ICmpInst(ICmpInst::ICMP_EQ, Op0, + ConstantInt::get(Op1->getType(), *CmpC - 1)); + // X <u C --> X == 0, if the number of zero bits in the bottom of X + // exceeds the log2 of C. + if (Op0Known.countMinTrailingZeros() >= CmpC->ceilLogBase2()) + return new ICmpInst(ICmpInst::ICMP_EQ, Op0, + Constant::getNullValue(Op1->getType())); + } + break; + } + case ICmpInst::ICMP_UGT: { + if (Op1Max == Op0Min) // A >u B -> A != B if min(A) == max(B) + return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1); + const APInt *CmpC; + if (match(Op1, m_APInt(CmpC))) { + // A >u C -> A == C+1 if max(a)-1 == C + if (*CmpC == Op0Max - 1) + return new ICmpInst(ICmpInst::ICMP_EQ, Op0, + ConstantInt::get(Op1->getType(), *CmpC + 1)); + // X >u C --> X != 0, if the number of zero bits in the bottom of X + // exceeds the log2 of C. + if (Op0Known.countMinTrailingZeros() >= CmpC->getActiveBits()) + return new ICmpInst(ICmpInst::ICMP_NE, Op0, + Constant::getNullValue(Op1->getType())); + } + break; + } + case ICmpInst::ICMP_SLT: { + if (Op1Min == Op0Max) // A <s B -> A != B if max(A) == min(B) + return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1); + const APInt *CmpC; + if (match(Op1, m_APInt(CmpC))) { + if (*CmpC == Op0Min + 1) // A <s C -> A == C-1 if min(A)+1 == C + return new ICmpInst(ICmpInst::ICMP_EQ, Op0, + ConstantInt::get(Op1->getType(), *CmpC - 1)); + } + break; + } + case ICmpInst::ICMP_SGT: { + if (Op1Max == Op0Min) // A >s B -> A != B if min(A) == max(B) + return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1); + const APInt *CmpC; + if (match(Op1, m_APInt(CmpC))) { + if (*CmpC == Op0Max - 1) // A >s C -> A == C+1 if max(A)-1 == C + return new ICmpInst(ICmpInst::ICMP_EQ, Op0, + ConstantInt::get(Op1->getType(), *CmpC + 1)); + } + break; + } + } + } + // Based on the range information we know about the LHS, see if we can // simplify this comparison. For example, (x&4) < 8 is always true. switch (Pred) { @@ -5219,21 +5296,6 @@ Instruction *InstCombinerImpl::foldICmpUsingKnownBits(ICmpInst &I) { return replaceInstUsesWith(I, ConstantInt::getTrue(I.getType())); if (Op0Min.uge(Op1Max)) // A <u B -> false if min(A) >= max(B) return replaceInstUsesWith(I, ConstantInt::getFalse(I.getType())); - if (Op1Min == Op0Max) // A <u B -> A != B if max(A) == min(B) - return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1); - - const APInt *CmpC; - if (match(Op1, m_APInt(CmpC))) { - // A <u C -> A == C-1 if min(A)+1 == C - if (*CmpC == Op0Min + 1) - return new ICmpInst(ICmpInst::ICMP_EQ, Op0, - ConstantInt::get(Op1->getType(), *CmpC - 1)); - // X <u C --> X == 0, if the number of zero bits in the bottom of X - // exceeds the log2 of C. - if (Op0Known.countMinTrailingZeros() >= CmpC->ceilLogBase2()) - return new ICmpInst(ICmpInst::ICMP_EQ, Op0, - Constant::getNullValue(Op1->getType())); - } break; } case ICmpInst::ICMP_UGT: { @@ -5241,21 +5303,6 @@ Instruction *InstCombinerImpl::foldICmpUsingKnownBits(ICmpInst &I) { return replaceInstUsesWith(I, ConstantInt::getTrue(I.getType())); if (Op0Max.ule(Op1Min)) // A >u B -> false if max(A) <= max(B) return replaceInstUsesWith(I, ConstantInt::getFalse(I.getType())); - if (Op1Max == Op0Min) // A >u B -> A != B if min(A) == max(B) - return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1); - - const APInt *CmpC; - if (match(Op1, m_APInt(CmpC))) { - // A >u C -> A == C+1 if max(a)-1 == C - if (*CmpC == Op0Max - 1) - return new ICmpInst(ICmpInst::ICMP_EQ, Op0, - ConstantInt::get(Op1->getType(), *CmpC + 1)); - // X >u C --> X != 0, if the number of zero bits in the bottom of X - // exceeds the log2 of C. - if (Op0Known.countMinTrailingZeros() >= CmpC->getActiveBits()) - return new ICmpInst(ICmpInst::ICMP_NE, Op0, - Constant::getNullValue(Op1->getType())); - } break; } case ICmpInst::ICMP_SLT: { @@ -5263,14 +5310,6 @@ Instruction *InstCombinerImpl::foldICmpUsingKnownBits(ICmpInst &I) { return replaceInstUsesWith(I, ConstantInt::getTrue(I.getType())); if (Op0Min.sge(Op1Max)) // A <s B -> false if min(A) >= max(C) return replaceInstUsesWith(I, ConstantInt::getFalse(I.getType())); - if (Op1Min == Op0Max) // A <s B -> A != B if max(A) == min(B) - return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1); - const APInt *CmpC; - if (match(Op1, m_APInt(CmpC))) { - if (*CmpC == Op0Min + 1) // A <s C -> A == C-1 if min(A)+1 == C - return new ICmpInst(ICmpInst::ICMP_EQ, Op0, - ConstantInt::get(Op1->getType(), *CmpC - 1)); - } break; } case ICmpInst::ICMP_SGT: { @@ -5278,14 +5317,6 @@ Instruction *InstCombinerImpl::foldICmpUsingKnownBits(ICmpInst &I) { return replaceInstUsesWith(I, ConstantInt::getTrue(I.getType())); if (Op0Max.sle(Op1Min)) // A >s B -> false if max(A) <= min(B) return replaceInstUsesWith(I, ConstantInt::getFalse(I.getType())); - if (Op1Max == Op0Min) // A >s B -> A != B if min(A) == max(B) - return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1); - const APInt *CmpC; - if (match(Op1, m_APInt(CmpC))) { - if (*CmpC == Op0Max - 1) // A >s C -> A == C+1 if max(A)-1 == C - return new ICmpInst(ICmpInst::ICMP_EQ, Op0, - ConstantInt::get(Op1->getType(), *CmpC + 1)); - } break; } case ICmpInst::ICMP_SGE: diff --git a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp index a8474e27383d..80abc775299a 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp @@ -261,8 +261,8 @@ private: bool PointerReplacer::collectUsers(Instruction &I) { for (auto U : I.users()) { - Instruction *Inst = cast<Instruction>(&*U); - if (LoadInst *Load = dyn_cast<LoadInst>(Inst)) { + auto *Inst = cast<Instruction>(&*U); + if (auto *Load = dyn_cast<LoadInst>(Inst)) { if (Load->isVolatile()) return false; Worklist.insert(Load); @@ -270,7 +270,9 @@ bool PointerReplacer::collectUsers(Instruction &I) { Worklist.insert(Inst); if (!collectUsers(*Inst)) return false; - } else if (isa<MemTransferInst>(Inst)) { + } else if (auto *MI = dyn_cast<MemTransferInst>(Inst)) { + if (MI->isVolatile()) + return false; Worklist.insert(Inst); } else if (Inst->isLifetimeStartOrEnd()) { continue; diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp index ce2b913dba61..5bbc3c87ca4f 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -3230,7 +3230,8 @@ Instruction *InstCombinerImpl::visitSelectInst(SelectInst &SI) { Value *Mask; if (match(TrueVal, m_Zero()) && match(FalseVal, m_MaskedLoad(m_Value(), m_Value(), m_Value(Mask), - m_CombineOr(m_Undef(), m_Zero())))) { + m_CombineOr(m_Undef(), m_Zero()))) && + (CondVal->getType() == Mask->getType())) { // We can remove the select by ensuring the load zeros all lanes the // select would have. We determine this by proving there is no overlap // between the load and select masks. diff --git a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp index b585818af595..404852f1dd4d 100644 --- a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -1981,6 +1981,9 @@ class LSRInstance { /// IV users that belong to profitable IVChains. SmallPtrSet<Use*, MaxChains> IVIncSet; + /// Induction variables that were generated and inserted by the SCEV Expander. + SmallVector<llvm::WeakVH, 2> ScalarEvolutionIVs; + void OptimizeShadowIV(); bool FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse); ICmpInst *OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse); @@ -2085,6 +2088,9 @@ public: TargetLibraryInfo &TLI, MemorySSAUpdater *MSSAU); bool getChanged() const { return Changed; } + const SmallVectorImpl<WeakVH> &getScalarEvolutionIVs() const { + return ScalarEvolutionIVs; + } void print_factors_and_types(raw_ostream &OS) const; void print_fixups(raw_ostream &OS) const; @@ -5589,6 +5595,11 @@ void LSRInstance::ImplementSolution( GenerateIVChain(Chain, Rewriter, DeadInsts); Changed = true; } + + for (const WeakVH &IV : Rewriter.getInsertedIVs()) + if (IV && dyn_cast<Instruction>(&*IV)->getParent()) + ScalarEvolutionIVs.push_back(IV); + // Clean up after ourselves. This must be done before deleting any // instructions. Rewriter.clear(); @@ -5859,87 +5870,399 @@ void LoopStrengthReduce::getAnalysisUsage(AnalysisUsage &AU) const { AU.addPreserved<MemorySSAWrapperPass>(); } -using EqualValues = SmallVector<std::tuple<WeakVH, int64_t>, 4>; -using EqualValuesMap = - DenseMap<DbgValueInst *, SmallVector<std::pair<unsigned, EqualValues>>>; -using LocationMap = - DenseMap<DbgValueInst *, std::pair<DIExpression *, Metadata *>>; +struct SCEVDbgValueBuilder { + SCEVDbgValueBuilder() = default; + SCEVDbgValueBuilder(const SCEVDbgValueBuilder &Base) { + Values = Base.Values; + Expr = Base.Expr; + } + + /// The DIExpression as we translate the SCEV. + SmallVector<uint64_t, 6> Expr; + /// The location ops of the DIExpression. + SmallVector<llvm::ValueAsMetadata *, 2> Values; + + void pushOperator(uint64_t Op) { Expr.push_back(Op); } + void pushUInt(uint64_t Operand) { Expr.push_back(Operand); } + + /// Add a DW_OP_LLVM_arg to the expression, followed by the index of the value + /// in the set of values referenced by the expression. + void pushValue(llvm::Value *V) { + Expr.push_back(llvm::dwarf::DW_OP_LLVM_arg); + auto *It = + std::find(Values.begin(), Values.end(), llvm::ValueAsMetadata::get(V)); + unsigned ArgIndex = 0; + if (It != Values.end()) { + ArgIndex = std::distance(Values.begin(), It); + } else { + ArgIndex = Values.size(); + Values.push_back(llvm::ValueAsMetadata::get(V)); + } + Expr.push_back(ArgIndex); + } + + void pushValue(const SCEVUnknown *U) { + llvm::Value *V = cast<SCEVUnknown>(U)->getValue(); + pushValue(V); + } + + bool pushConst(const SCEVConstant *C) { + if (C->getAPInt().getMinSignedBits() > 64) + return false; + Expr.push_back(llvm::dwarf::DW_OP_consts); + Expr.push_back(C->getAPInt().getSExtValue()); + return true; + } + + /// Several SCEV types are sequences of the same arithmetic operator applied + /// to constants and values that may be extended or truncated. + bool pushArithmeticExpr(const llvm::SCEVCommutativeExpr *CommExpr, + uint64_t DwarfOp) { + assert((isa<llvm::SCEVAddExpr>(CommExpr) || isa<SCEVMulExpr>(CommExpr)) && + "Expected arithmetic SCEV type"); + bool Success = true; + unsigned EmitOperator = 0; + for (auto &Op : CommExpr->operands()) { + Success &= pushSCEV(Op); + + if (EmitOperator >= 1) + pushOperator(DwarfOp); + ++EmitOperator; + } + return Success; + } + + // TODO: Identify and omit noop casts. + bool pushCast(const llvm::SCEVCastExpr *C, bool IsSigned) { + const llvm::SCEV *Inner = C->getOperand(0); + const llvm::Type *Type = C->getType(); + uint64_t ToWidth = Type->getIntegerBitWidth(); + bool Success = pushSCEV(Inner); + uint64_t CastOps[] = {dwarf::DW_OP_LLVM_convert, ToWidth, + IsSigned ? llvm::dwarf::DW_ATE_signed + : llvm::dwarf::DW_ATE_unsigned}; + for (const auto &Op : CastOps) + pushOperator(Op); + return Success; + } + + // TODO: MinMax - although these haven't been encountered in the test suite. + bool pushSCEV(const llvm::SCEV *S) { + bool Success = true; + if (const SCEVConstant *StartInt = dyn_cast<SCEVConstant>(S)) { + Success &= pushConst(StartInt); + + } else if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) { + if (!U->getValue()) + return false; + pushValue(U->getValue()); + + } else if (const SCEVMulExpr *MulRec = dyn_cast<SCEVMulExpr>(S)) { + Success &= pushArithmeticExpr(MulRec, llvm::dwarf::DW_OP_mul); + + } else if (const SCEVUDivExpr *UDiv = dyn_cast<SCEVUDivExpr>(S)) { + Success &= pushSCEV(UDiv->getLHS()); + Success &= pushSCEV(UDiv->getRHS()); + pushOperator(llvm::dwarf::DW_OP_div); + + } else if (const SCEVCastExpr *Cast = dyn_cast<SCEVCastExpr>(S)) { + // Assert if a new and unknown SCEVCastEXpr type is encountered. + assert((isa<SCEVZeroExtendExpr>(Cast) || isa<SCEVTruncateExpr>(Cast) || + isa<SCEVPtrToIntExpr>(Cast) || isa<SCEVSignExtendExpr>(Cast)) && + "Unexpected cast type in SCEV."); + Success &= pushCast(Cast, (isa<SCEVSignExtendExpr>(Cast))); + + } else if (const SCEVAddExpr *AddExpr = dyn_cast<SCEVAddExpr>(S)) { + Success &= pushArithmeticExpr(AddExpr, llvm::dwarf::DW_OP_plus); + + } else if (isa<SCEVAddRecExpr>(S)) { + // Nested SCEVAddRecExpr are generated by nested loops and are currently + // unsupported. + return false; + + } else { + return false; + } + return Success; + } + + void setFinalExpression(llvm::DbgValueInst &DI, const DIExpression *OldExpr) { + // Re-state assumption that this dbg.value is not variadic. Any remaining + // opcodes in its expression operate on a single value already on the + // expression stack. Prepend our operations, which will re-compute and + // place that value on the expression stack. + assert(!DI.hasArgList()); + auto *NewExpr = + DIExpression::prependOpcodes(OldExpr, Expr, /*StackValue*/ true); + DI.setExpression(NewExpr); + + auto ValArrayRef = llvm::ArrayRef<llvm::ValueAsMetadata *>(Values); + DI.setRawLocation(llvm::DIArgList::get(DI.getContext(), ValArrayRef)); + } + + /// If a DVI can be emitted without a DIArgList, omit DW_OP_llvm_arg and the + /// location op index 0. + void setShortFinalExpression(llvm::DbgValueInst &DI, + const DIExpression *OldExpr) { + assert((Expr[0] == llvm::dwarf::DW_OP_LLVM_arg && Expr[1] == 0) && + "Expected DW_OP_llvm_arg and 0."); + DI.replaceVariableLocationOp( + 0u, llvm::MetadataAsValue::get(DI.getContext(), Values[0])); + + // See setFinalExpression: prepend our opcodes on the start of any old + // expression opcodes. + assert(!DI.hasArgList()); + llvm::SmallVector<uint64_t, 6> FinalExpr(Expr.begin() + 2, Expr.end()); + auto *NewExpr = + DIExpression::prependOpcodes(OldExpr, FinalExpr, /*StackValue*/ true); + DI.setExpression(NewExpr); + } + + /// Once the IV and variable SCEV translation is complete, write it to the + /// source DVI. + void applyExprToDbgValue(llvm::DbgValueInst &DI, + const DIExpression *OldExpr) { + assert(!Expr.empty() && "Unexpected empty expression."); + // Emit a simpler form if only a single location is referenced. + if (Values.size() == 1 && Expr[0] == llvm::dwarf::DW_OP_LLVM_arg && + Expr[1] == 0) { + setShortFinalExpression(DI, OldExpr); + } else { + setFinalExpression(DI, OldExpr); + } + } + + /// Return true if the combination of arithmetic operator and underlying + /// SCEV constant value is an identity function. + bool isIdentityFunction(uint64_t Op, const SCEV *S) { + if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S)) { + if (C->getAPInt().getMinSignedBits() > 64) + return false; + int64_t I = C->getAPInt().getSExtValue(); + switch (Op) { + case llvm::dwarf::DW_OP_plus: + case llvm::dwarf::DW_OP_minus: + return I == 0; + case llvm::dwarf::DW_OP_mul: + case llvm::dwarf::DW_OP_div: + return I == 1; + } + } + return false; + } + + /// Convert a SCEV of a value to a DIExpression that is pushed onto the + /// builder's expression stack. The stack should already contain an + /// expression for the iteration count, so that it can be multiplied by + /// the stride and added to the start. + /// Components of the expression are omitted if they are an identity function. + /// Chain (non-affine) SCEVs are not supported. + bool SCEVToValueExpr(const llvm::SCEVAddRecExpr &SAR, ScalarEvolution &SE) { + assert(SAR.isAffine() && "Expected affine SCEV"); + // TODO: Is this check needed? + if (isa<SCEVAddRecExpr>(SAR.getStart())) + return false; + + const SCEV *Start = SAR.getStart(); + const SCEV *Stride = SAR.getStepRecurrence(SE); + + // Skip pushing arithmetic noops. + if (!isIdentityFunction(llvm::dwarf::DW_OP_mul, Stride)) { + if (!pushSCEV(Stride)) + return false; + pushOperator(llvm::dwarf::DW_OP_mul); + } + if (!isIdentityFunction(llvm::dwarf::DW_OP_plus, Start)) { + if (!pushSCEV(Start)) + return false; + pushOperator(llvm::dwarf::DW_OP_plus); + } + return true; + } + + /// Convert a SCEV of a value to a DIExpression that is pushed onto the + /// builder's expression stack. The stack should already contain an + /// expression for the iteration count, so that it can be multiplied by + /// the stride and added to the start. + /// Components of the expression are omitted if they are an identity function. + bool SCEVToIterCountExpr(const llvm::SCEVAddRecExpr &SAR, + ScalarEvolution &SE) { + assert(SAR.isAffine() && "Expected affine SCEV"); + if (isa<SCEVAddRecExpr>(SAR.getStart())) { + LLVM_DEBUG(dbgs() << "scev-salvage: IV SCEV. Unsupported nested AddRec: " + << SAR << '\n'); + return false; + } + const SCEV *Start = SAR.getStart(); + const SCEV *Stride = SAR.getStepRecurrence(SE); + + // Skip pushing arithmetic noops. + if (!isIdentityFunction(llvm::dwarf::DW_OP_minus, Start)) { + if (!pushSCEV(Start)) + return false; + pushOperator(llvm::dwarf::DW_OP_minus); + } + if (!isIdentityFunction(llvm::dwarf::DW_OP_div, Stride)) { + if (!pushSCEV(Stride)) + return false; + pushOperator(llvm::dwarf::DW_OP_div); + } + return true; + } +}; + +struct DVIRecoveryRec { + DbgValueInst *DVI; + DIExpression *Expr; + Metadata *LocationOp; + const llvm::SCEV *SCEV; +}; + +static bool RewriteDVIUsingIterCount(DVIRecoveryRec CachedDVI, + const SCEVDbgValueBuilder &IterationCount, + ScalarEvolution &SE) { + // LSR may add locations to previously single location-op DVIs which + // are currently not supported. + if (CachedDVI.DVI->getNumVariableLocationOps() != 1) + return false; + + // SCEVs for SSA values are most frquently of the form + // {start,+,stride}, but sometimes they are ({start,+,stride} + %a + ..). + // This is because %a is a PHI node that is not the IV. However, these + // SCEVs have not been observed to result in debuginfo-lossy optimisations, + // so its not expected this point will be reached. + if (!isa<SCEVAddRecExpr>(CachedDVI.SCEV)) + return false; + + LLVM_DEBUG(dbgs() << "scev-salvage: Value to salvage SCEV: " + << *CachedDVI.SCEV << '\n'); + + const auto *Rec = cast<SCEVAddRecExpr>(CachedDVI.SCEV); + if (!Rec->isAffine()) + return false; + + // Initialise a new builder with the iteration count expression. In + // combination with the value's SCEV this enables recovery. + SCEVDbgValueBuilder RecoverValue(IterationCount); + if (!RecoverValue.SCEVToValueExpr(*Rec, SE)) + return false; + + LLVM_DEBUG(dbgs() << "scev-salvage: Updating: " << *CachedDVI.DVI << '\n'); + RecoverValue.applyExprToDbgValue(*CachedDVI.DVI, CachedDVI.Expr); + LLVM_DEBUG(dbgs() << "scev-salvage: to: " << *CachedDVI.DVI << '\n'); + return true; +} + +static bool +DbgRewriteSalvageableDVIs(llvm::Loop *L, ScalarEvolution &SE, + llvm::PHINode *LSRInductionVar, + SmallVector<DVIRecoveryRec, 2> &DVIToUpdate) { + if (DVIToUpdate.empty()) + return false; -static void DbgGatherEqualValues(Loop *L, ScalarEvolution &SE, - EqualValuesMap &DbgValueToEqualSet, - LocationMap &DbgValueToLocation) { + const llvm::SCEV *SCEVInductionVar = SE.getSCEV(LSRInductionVar); + assert(SCEVInductionVar && + "Anticipated a SCEV for the post-LSR induction variable"); + + bool Changed = false; + if (const SCEVAddRecExpr *IVAddRec = + dyn_cast<SCEVAddRecExpr>(SCEVInductionVar)) { + if (!IVAddRec->isAffine()) + return false; + + SCEVDbgValueBuilder IterCountExpr; + IterCountExpr.pushValue(LSRInductionVar); + if (!IterCountExpr.SCEVToIterCountExpr(*IVAddRec, SE)) + return false; + + LLVM_DEBUG(dbgs() << "scev-salvage: IV SCEV: " << *SCEVInductionVar + << '\n'); + + // Needn't salvage if the location op hasn't been undef'd by LSR. + for (auto &DVIRec : DVIToUpdate) { + if (!DVIRec.DVI->isUndef()) + continue; + + // Some DVIs that were single location-op when cached are now multi-op, + // due to LSR optimisations. However, multi-op salvaging is not yet + // supported by SCEV salvaging. But, we can attempt a salvage by restoring + // the pre-LSR single-op expression. + if (DVIRec.DVI->hasArgList()) { + if (!DVIRec.DVI->getVariableLocationOp(0)) + continue; + llvm::Type *Ty = DVIRec.DVI->getVariableLocationOp(0)->getType(); + DVIRec.DVI->setRawLocation( + llvm::ValueAsMetadata::get(UndefValue::get(Ty))); + DVIRec.DVI->setExpression(DVIRec.Expr); + } + + Changed |= RewriteDVIUsingIterCount(DVIRec, IterCountExpr, SE); + } + } + return Changed; +} + +/// Identify and cache salvageable DVI locations and expressions along with the +/// corresponding SCEV(s). Also ensure that the DVI is not deleted before +static void +DbgGatherSalvagableDVI(Loop *L, ScalarEvolution &SE, + SmallVector<DVIRecoveryRec, 2> &SalvageableDVISCEVs, + SmallSet<AssertingVH<DbgValueInst>, 2> &DVIHandles) { for (auto &B : L->getBlocks()) { for (auto &I : *B) { auto DVI = dyn_cast<DbgValueInst>(&I); if (!DVI) continue; - for (unsigned Idx = 0; Idx < DVI->getNumVariableLocationOps(); ++Idx) { - // TODO: We can duplicate results if the same arg appears more than - // once. - Value *V = DVI->getVariableLocationOp(Idx); - if (!V || !SE.isSCEVable(V->getType())) - continue; - auto DbgValueSCEV = SE.getSCEV(V); - EqualValues EqSet; - for (PHINode &Phi : L->getHeader()->phis()) { - if (V->getType() != Phi.getType()) - continue; - if (!SE.isSCEVable(Phi.getType())) - continue; - auto PhiSCEV = SE.getSCEV(&Phi); - Optional<APInt> Offset = - SE.computeConstantDifference(DbgValueSCEV, PhiSCEV); - if (Offset && Offset->getMinSignedBits() <= 64) - EqSet.emplace_back( - std::make_tuple(&Phi, Offset.getValue().getSExtValue())); - } - DbgValueToEqualSet[DVI].push_back({Idx, std::move(EqSet)}); - // If we fall back to using this raw location, at least one location op - // must be dead. A DIArgList will automatically undef arguments when - // they become unavailable, but a ValueAsMetadata will not; since we - // know the value should be undef, we use the undef value directly here. - Metadata *RawLocation = - DVI->hasArgList() ? DVI->getRawLocation() - : ValueAsMetadata::get(UndefValue::get( - DVI->getVariableLocationOp(0)->getType())); - DbgValueToLocation[DVI] = {DVI->getExpression(), RawLocation}; - } + + if (DVI->hasArgList()) + continue; + + if (!DVI->getVariableLocationOp(0) || + !SE.isSCEVable(DVI->getVariableLocationOp(0)->getType())) + continue; + + SalvageableDVISCEVs.push_back( + {DVI, DVI->getExpression(), DVI->getRawLocation(), + SE.getSCEV(DVI->getVariableLocationOp(0))}); + DVIHandles.insert(DVI); } } } -static void DbgApplyEqualValues(EqualValuesMap &DbgValueToEqualSet, - LocationMap &DbgValueToLocation) { - for (auto A : DbgValueToEqualSet) { - auto *DVI = A.first; - // Only update those that are now undef. - if (!DVI->isUndef()) +/// Ideally pick the PHI IV inserted by ScalarEvolutionExpander. As a fallback +/// any PHi from the loop header is usable, but may have less chance of +/// surviving subsequent transforms. +static llvm::PHINode *GetInductionVariable(const Loop &L, ScalarEvolution &SE, + const LSRInstance &LSR) { + // For now, just pick the first IV generated and inserted. Ideally pick an IV + // that is unlikely to be optimised away by subsequent transforms. + for (const WeakVH &IV : LSR.getScalarEvolutionIVs()) { + if (!IV) continue; - // The dbg.value may have had its value or expression changed during LSR by - // a failed salvage attempt; refresh them from the map. - auto *DbgDIExpr = DbgValueToLocation[DVI].first; - DVI->setRawLocation(DbgValueToLocation[DVI].second); - DVI->setExpression(DbgDIExpr); - assert(DVI->isUndef() && "dbg.value with non-undef location should not " - "have been modified by LSR."); - for (auto IdxEV : A.second) { - unsigned Idx = IdxEV.first; - for (auto EV : IdxEV.second) { - auto EVHandle = std::get<WeakVH>(EV); - if (!EVHandle) - continue; - int64_t Offset = std::get<int64_t>(EV); - DVI->replaceVariableLocationOp(Idx, EVHandle); - if (Offset) { - SmallVector<uint64_t, 8> Ops; - DIExpression::appendOffset(Ops, Offset); - DbgDIExpr = DIExpression::appendOpsToArg(DbgDIExpr, Ops, Idx, true); - } - DVI->setExpression(DbgDIExpr); - break; - } + + assert(isa<PHINode>(&*IV) && "Expected PhI node."); + if (SE.isSCEVable((*IV).getType())) { + PHINode *Phi = dyn_cast<PHINode>(&*IV); + LLVM_DEBUG(dbgs() << "scev-salvage: IV : " << *IV + << "with SCEV: " << *SE.getSCEV(Phi) << "\n"); + return Phi; } } + + for (PHINode &Phi : L.getHeader()->phis()) { + if (!SE.isSCEVable(Phi.getType())) + continue; + + const llvm::SCEV *PhiSCEV = SE.getSCEV(&Phi); + if (const llvm::SCEVAddRecExpr *Rec = dyn_cast<SCEVAddRecExpr>(PhiSCEV)) + if (!Rec->isAffine()) + continue; + + LLVM_DEBUG(dbgs() << "scev-salvage: Selected IV from loop header: " << Phi + << " with SCEV: " << *PhiSCEV << "\n"); + return Φ + } + return nullptr; } static bool ReduceLoopStrength(Loop *L, IVUsers &IU, ScalarEvolution &SE, @@ -5948,20 +6271,21 @@ static bool ReduceLoopStrength(Loop *L, IVUsers &IU, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo &TLI, MemorySSA *MSSA) { + // Debug preservation - before we start removing anything identify which DVI + // meet the salvageable criteria and store their DIExpression and SCEVs. + SmallVector<DVIRecoveryRec, 2> SalvageableDVI; + SmallSet<AssertingVH<DbgValueInst>, 2> DVIHandles; + DbgGatherSalvagableDVI(L, SE, SalvageableDVI, DVIHandles); + bool Changed = false; std::unique_ptr<MemorySSAUpdater> MSSAU; if (MSSA) MSSAU = std::make_unique<MemorySSAUpdater>(MSSA); // Run the main LSR transformation. - Changed |= - LSRInstance(L, IU, SE, DT, LI, TTI, AC, TLI, MSSAU.get()).getChanged(); - - // Debug preservation - before we start removing anything create equivalence - // sets for the llvm.dbg.value intrinsics. - EqualValuesMap DbgValueToEqualSet; - LocationMap DbgValueToLocation; - DbgGatherEqualValues(L, SE, DbgValueToEqualSet, DbgValueToLocation); + const LSRInstance &Reducer = + LSRInstance(L, IU, SE, DT, LI, TTI, AC, TLI, MSSAU.get()); + Changed |= Reducer.getChanged(); // Remove any extra phis created by processing inner loops. Changed |= DeleteDeadPHIs(L->getHeader(), &TLI, MSSAU.get()); @@ -5981,8 +6305,22 @@ static bool ReduceLoopStrength(Loop *L, IVUsers &IU, ScalarEvolution &SE, } } - DbgApplyEqualValues(DbgValueToEqualSet, DbgValueToLocation); + if (SalvageableDVI.empty()) + return Changed; + + // Obtain relevant IVs and attempt to rewrite the salvageable DVIs with + // expressions composed using the derived iteration count. + // TODO: Allow for multiple IV references for nested AddRecSCEVs + for (auto &L : LI) { + if (llvm::PHINode *IV = GetInductionVariable(*L, SE, Reducer)) + DbgRewriteSalvageableDVIs(L, SE, IV, SalvageableDVI); + else { + LLVM_DEBUG(dbgs() << "scev-salvage: SCEV salvaging not possible. An IV " + "could not be identified.\n"); + } + } + DVIHandles.clear(); return Changed; } diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp index 5ec01454e5b2..fe160d5415bd 100644 --- a/llvm/lib/Transforms/Scalar/SROA.cpp +++ b/llvm/lib/Transforms/Scalar/SROA.cpp @@ -2811,10 +2811,11 @@ private: if (BeginOffset > NewAllocaBeginOffset || EndOffset < NewAllocaEndOffset) return false; + // Length must be in range for FixedVectorType. auto *C = cast<ConstantInt>(II.getLength()); - if (C->getBitWidth() > 64) + const uint64_t Len = C->getLimitedValue(); + if (Len > std::numeric_limits<unsigned>::max()) return false; - const auto Len = C->getZExtValue(); auto *Int8Ty = IntegerType::getInt8Ty(NewAI.getContext()); auto *SrcTy = FixedVectorType::get(Int8Ty, Len); return canConvertValue(DL, SrcTy, AllocaTy) && diff --git a/llvm/lib/Transforms/Utils/PredicateInfo.cpp b/llvm/lib/Transforms/Utils/PredicateInfo.cpp index 91280762aaa7..bd2b6fafdf2e 100644 --- a/llvm/lib/Transforms/Utils/PredicateInfo.cpp +++ b/llvm/lib/Transforms/Utils/PredicateInfo.cpp @@ -16,6 +16,7 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/CFG.h" #include "llvm/IR/AssemblyAnnotationWriter.h" @@ -23,6 +24,7 @@ #include "llvm/IR/Dominators.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InstIterator.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Metadata.h" @@ -566,10 +568,18 @@ Value *PredicateInfoBuilder::materializeStack(unsigned int &Counter, // to ensure we dominate all of our uses. Always insert right before the // relevant instruction (terminator, assume), so that we insert in proper // order in the case of multiple predicateinfo in the same block. + // The number of named values is used to detect if a new declaration was + // added. If so, that declaration is tracked so that it can be removed when + // the analysis is done. The corner case were a new declaration results in + // a name clash and the old name being renamed is not considered as that + // represents an invalid module. if (isa<PredicateWithEdge>(ValInfo)) { IRBuilder<> B(getBranchTerminator(ValInfo)); + auto NumDecls = F.getParent()->getNumNamedValues(); Function *IF = Intrinsic::getDeclaration( F.getParent(), Intrinsic::ssa_copy, Op->getType()); + if (NumDecls != F.getParent()->getNumNamedValues()) + PI.CreatedDeclarations.insert(IF); CallInst *PIC = B.CreateCall(IF, Op, Op->getName() + "." + Twine(Counter++)); PI.PredicateMap.insert({PIC, ValInfo}); @@ -581,8 +591,11 @@ Value *PredicateInfoBuilder::materializeStack(unsigned int &Counter, // Insert the predicate directly after the assume. While it also holds // directly before it, assume(i1 true) is not a useful fact. IRBuilder<> B(PAssume->AssumeInst->getNextNode()); + auto NumDecls = F.getParent()->getNumNamedValues(); Function *IF = Intrinsic::getDeclaration( F.getParent(), Intrinsic::ssa_copy, Op->getType()); + if (NumDecls != F.getParent()->getNumNamedValues()) + PI.CreatedDeclarations.insert(IF); CallInst *PIC = B.CreateCall(IF, Op); PI.PredicateMap.insert({PIC, ValInfo}); Result.Def = PIC; @@ -761,6 +774,23 @@ PredicateInfo::PredicateInfo(Function &F, DominatorTree &DT, Builder.buildPredicateInfo(); } +// Remove all declarations we created . The PredicateInfo consumers are +// responsible for remove the ssa_copy calls created. +PredicateInfo::~PredicateInfo() { + // Collect function pointers in set first, as SmallSet uses a SmallVector + // internally and we have to remove the asserting value handles first. + SmallPtrSet<Function *, 20> FunctionPtrs; + for (auto &F : CreatedDeclarations) + FunctionPtrs.insert(&*F); + CreatedDeclarations.clear(); + + for (Function *F : FunctionPtrs) { + assert(F->user_begin() == F->user_end() && + "PredicateInfo consumer did not remove all SSA copies."); + F->eraseFromParent(); + } +} + Optional<PredicateConstraint> PredicateBase::getConstraint() const { switch (Type) { case PT_Assume: @@ -827,6 +857,19 @@ void PredicateInfoPrinterLegacyPass::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired<AssumptionCacheTracker>(); } +// Replace ssa_copy calls created by PredicateInfo with their operand. +static void replaceCreatedSSACopys(PredicateInfo &PredInfo, Function &F) { + for (Instruction &Inst : llvm::make_early_inc_range(instructions(F))) { + const auto *PI = PredInfo.getPredicateInfoFor(&Inst); + auto *II = dyn_cast<IntrinsicInst>(&Inst); + if (!PI || !II || II->getIntrinsicID() != Intrinsic::ssa_copy) + continue; + + Inst.replaceAllUsesWith(II->getOperand(0)); + Inst.eraseFromParent(); + } +} + bool PredicateInfoPrinterLegacyPass::runOnFunction(Function &F) { auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); @@ -834,6 +877,8 @@ bool PredicateInfoPrinterLegacyPass::runOnFunction(Function &F) { PredInfo->print(dbgs()); if (VerifyPredicateInfo) PredInfo->verifyPredicateInfo(); + + replaceCreatedSSACopys(*PredInfo, F); return false; } @@ -845,6 +890,7 @@ PreservedAnalyses PredicateInfoPrinterPass::run(Function &F, auto PredInfo = std::make_unique<PredicateInfo>(F, DT, AC); PredInfo->print(OS); + replaceCreatedSSACopys(*PredInfo, F); return PreservedAnalyses::all(); } diff --git a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp index 5af1c37e6197..3978e1e29825 100644 --- a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp +++ b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp @@ -1393,9 +1393,10 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized, // can ensure that IVIncrement dominates the current uses. PostIncLoops = SavedPostIncLoops; - // Remember this PHI, even in post-inc mode. + // Remember this PHI, even in post-inc mode. LSR SCEV-based salvaging is most + // effective when we are able to use an IV inserted here, so record it. InsertedValues.insert(PN); - + InsertedIVs.push_back(PN); return PN; } diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index f24ae6b100d5..671bc6b5212b 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -5433,6 +5433,21 @@ void LoopVectorizationCostModel::collectLoopUniforms(ElementCount VF) { // lane 0 demanded or b) are uses which demand only lane 0 of their operand. for (auto *BB : TheLoop->blocks()) for (auto &I : *BB) { + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(&I)) { + switch (II->getIntrinsicID()) { + case Intrinsic::sideeffect: + case Intrinsic::experimental_noalias_scope_decl: + case Intrinsic::assume: + case Intrinsic::lifetime_start: + case Intrinsic::lifetime_end: + if (TheLoop->hasLoopInvariantOperands(&I)) + addToWorklistIfAllowed(&I); + break; + default: + break; + } + } + // If there's no pointer operand, there's nothing to do. auto *Ptr = getLoadStorePointerOperand(&I); if (!Ptr) @@ -8916,6 +8931,37 @@ VPBasicBlock *VPRecipeBuilder::handleReplication( bool IsPredicated = LoopVectorizationPlanner::getDecisionAndClampRange( [&](ElementCount VF) { return CM.isPredicatedInst(I); }, Range); + // Even if the instruction is not marked as uniform, there are certain + // intrinsic calls that can be effectively treated as such, so we check for + // them here. Conservatively, we only do this for scalable vectors, since + // for fixed-width VFs we can always fall back on full scalarization. + if (!IsUniform && Range.Start.isScalable() && isa<IntrinsicInst>(I)) { + switch (cast<IntrinsicInst>(I)->getIntrinsicID()) { + case Intrinsic::assume: + case Intrinsic::lifetime_start: + case Intrinsic::lifetime_end: + // For scalable vectors if one of the operands is variant then we still + // want to mark as uniform, which will generate one instruction for just + // the first lane of the vector. We can't scalarize the call in the same + // way as for fixed-width vectors because we don't know how many lanes + // there are. + // + // The reasons for doing it this way for scalable vectors are: + // 1. For the assume intrinsic generating the instruction for the first + // lane is still be better than not generating any at all. For + // example, the input may be a splat across all lanes. + // 2. For the lifetime start/end intrinsics the pointer operand only + // does anything useful when the input comes from a stack object, + // which suggests it should always be uniform. For non-stack objects + // the effect is to poison the object, which still allows us to + // remove the call. + IsUniform = true; + break; + default: + break; + } + } + auto *Recipe = new VPReplicateRecipe(I, Plan->mapToVPValues(I->operands()), IsUniform, IsPredicated); setRecipe(I, Recipe); diff --git a/llvm/tools/llvm-mca/Views/TimelineView.cpp b/llvm/tools/llvm-mca/Views/TimelineView.cpp index 9a949761bb75..4ecc3015529c 100644 --- a/llvm/tools/llvm-mca/Views/TimelineView.cpp +++ b/llvm/tools/llvm-mca/Views/TimelineView.cpp @@ -145,10 +145,11 @@ void TimelineView::printWaitTimeEntry(formatted_raw_ostream &OS, double AverageTime1, AverageTime2, AverageTime3; AverageTime1 = - (double)Entry.CyclesSpentInSchedulerQueue / CumulativeExecutions; - AverageTime2 = (double)Entry.CyclesSpentInSQWhileReady / CumulativeExecutions; - AverageTime3 = - (double)Entry.CyclesSpentAfterWBAndBeforeRetire / CumulativeExecutions; + (double)(Entry.CyclesSpentInSchedulerQueue * 10) / CumulativeExecutions; + AverageTime2 = + (double)(Entry.CyclesSpentInSQWhileReady * 10) / CumulativeExecutions; + AverageTime3 = (double)(Entry.CyclesSpentAfterWBAndBeforeRetire * 10) / + CumulativeExecutions; OS << Executions; OS.PadToColumn(13); @@ -157,18 +158,18 @@ void TimelineView::printWaitTimeEntry(formatted_raw_ostream &OS, if (!PrintingTotals) tryChangeColor(OS, Entry.CyclesSpentInSchedulerQueue, CumulativeExecutions, BufferSize); - OS << format("%.1f", floor((AverageTime1 * 10) + 0.5) / 10); + OS << format("%.1f", floor(AverageTime1 + 0.5) / 10); OS.PadToColumn(20); if (!PrintingTotals) tryChangeColor(OS, Entry.CyclesSpentInSQWhileReady, CumulativeExecutions, BufferSize); - OS << format("%.1f", floor((AverageTime2 * 10) + 0.5) / 10); + OS << format("%.1f", floor(AverageTime2 + 0.5) / 10); OS.PadToColumn(27); if (!PrintingTotals) tryChangeColor(OS, Entry.CyclesSpentAfterWBAndBeforeRetire, CumulativeExecutions, getSubTargetInfo().getSchedModel().MicroOpBufferSize); - OS << format("%.1f", floor((AverageTime3 * 10) + 0.5) / 10); + OS << format("%.1f", floor(AverageTime3 + 0.5) / 10); if (OS.has_colors()) OS.resetColor(); diff --git a/openmp/runtime/src/kmp_taskdeps.cpp b/openmp/runtime/src/kmp_taskdeps.cpp index 162fb38e1eed..dd3e7688d33f 100644 --- a/openmp/runtime/src/kmp_taskdeps.cpp +++ b/openmp/runtime/src/kmp_taskdeps.cpp @@ -344,6 +344,13 @@ __kmp_process_deps(kmp_int32 gtid, kmp_depnode_t *node, kmp_dephash_t **hash, // link node as successor of all nodes in the prev_set if any npredecessors += __kmp_depnode_link_successor(gtid, thread, task, node, prev_set); + if (dep_barrier) { + // clean last_out and prev_set if any; don't touch last_set + __kmp_node_deref(thread, last_out); + info->last_out = NULL; + __kmp_depnode_list_free(thread, prev_set); + info->prev_set = NULL; + } } else { // last_set is of different dep kind, make it prev_set // link node as successor of all nodes in the last_set npredecessors += @@ -353,13 +360,21 @@ __kmp_process_deps(kmp_int32 gtid, kmp_depnode_t *node, kmp_dephash_t **hash, info->last_out = NULL; // clean prev_set if any __kmp_depnode_list_free(thread, prev_set); - // move last_set to prev_set, new last_set will be allocated - info->prev_set = last_set; + if (!dep_barrier) { + // move last_set to prev_set, new last_set will be allocated + info->prev_set = last_set; + } else { + info->prev_set = NULL; + info->last_flag = 0; + } info->last_set = NULL; } - info->last_flag = dep->flag; // store dep kind of the last_set - info->last_set = __kmp_add_node(thread, info->last_set, node); - + // for dep_barrier last_flag value should remain: + // 0 if last_set is empty, unchanged otherwise + if (!dep_barrier) { + info->last_flag = dep->flag; // store dep kind of the last_set + info->last_set = __kmp_add_node(thread, info->last_set, node); + } // check if we are processing MTX dependency if (dep->flag == KMP_DEP_MTX) { if (info->mtx_lock == NULL) { @@ -756,8 +771,6 @@ void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32 gtid, kmp_int32 ndeps, kmp_depnode_t node = {0}; __kmp_init_node(&node); - // the stack owns the node - __kmp_node_ref(&node); if (!__kmp_check_deps(gtid, &node, NULL, ¤t_task->td_dephash, DEP_BARRIER, ndeps, dep_list, ndeps_noalias, diff --git a/openmp/runtime/src/kmp_taskdeps.h b/openmp/runtime/src/kmp_taskdeps.h index d1576dd5b791..73abf07018f3 100644 --- a/openmp/runtime/src/kmp_taskdeps.h +++ b/openmp/runtime/src/kmp_taskdeps.h @@ -23,8 +23,7 @@ static inline void __kmp_node_deref(kmp_info_t *thread, kmp_depnode_t *node) { return; kmp_int32 n = KMP_ATOMIC_DEC(&node->dn.nrefs) - 1; - // TODO: temporarily disable assertion until the bug with dependences is fixed - // KMP_DEBUG_ASSERT(n >= 0); + KMP_DEBUG_ASSERT(n >= 0); if (n == 0) { KMP_ASSERT(node->dn.nrefs == 0); #if USE_FAST_MEMORY diff --git a/openmp/runtime/src/kmp_tasking.cpp b/openmp/runtime/src/kmp_tasking.cpp index 6c3e2c95cb5a..55e9c307638a 100644 --- a/openmp/runtime/src/kmp_tasking.cpp +++ b/openmp/runtime/src/kmp_tasking.cpp @@ -1441,6 +1441,7 @@ kmp_task_t *__kmpc_omp_target_task_alloc(ident_t *loc_ref, kmp_int32 gtid, if (__kmp_enable_hidden_helper) { auto &input_flags = reinterpret_cast<kmp_tasking_flags_t &>(flags); input_flags.hidden_helper = TRUE; + input_flags.tiedness = TASK_UNTIED; } return __kmpc_omp_task_alloc(loc_ref, gtid, flags, sizeof_kmp_task_t, |