aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--clang/include/clang/Basic/BuiltinsAArch64.def3
-rw-r--r--clang/include/clang/Basic/LangOptions.h6
-rw-r--r--clang/include/clang/Driver/Options.td6
-rw-r--r--clang/include/clang/Lex/PreprocessorOptions.h3
-rw-r--r--clang/include/clang/Sema/Sema.h3
-rw-r--r--clang/lib/AST/ASTContext.cpp20
-rw-r--r--clang/lib/AST/Expr.cpp7
-rw-r--r--clang/lib/Basic/LangOptions.cpp8
-rw-r--r--clang/lib/CodeGen/CGBuiltin.cpp23
-rw-r--r--clang/lib/CodeGen/CGDeclCXX.cpp18
-rw-r--r--clang/lib/CodeGen/CodeGenModule.cpp2
-rw-r--r--clang/lib/Driver/ToolChains/Clang.cpp38
-rw-r--r--clang/lib/Driver/ToolChains/Hexagon.cpp32
-rw-r--r--clang/lib/Driver/ToolChains/MinGW.cpp7
-rw-r--r--clang/lib/Frontend/CompilerInvocation.cpp18
-rw-r--r--clang/lib/Headers/intrin.h3
-rw-r--r--clang/lib/Lex/PPMacroExpansion.cpp11
-rw-r--r--clang/lib/Sema/SemaConcept.cpp17
-rw-r--r--clang/lib/Sema/SemaDeclCXX.cpp5
-rw-r--r--clang/lib/Sema/SemaTemplate.cpp9
-rw-r--r--clang/lib/Sema/SemaTemplateInstantiate.cpp19
-rw-r--r--compiler-rt/include/profile/InstrProfData.inc4
-rw-r--r--compiler-rt/lib/profile/InstrProfilingBuffer.c2
-rw-r--r--compiler-rt/lib/profile/InstrProfilingMerge.c11
-rw-r--r--compiler-rt/lib/profile/InstrProfilingPlatformLinux.c19
-rw-r--r--libcxx/include/__config11
-rw-r--r--libcxx/include/ctime2
-rw-r--r--libcxx/include/ios7
-rw-r--r--lld/ELF/Config.h3
-rw-r--r--lld/ELF/Driver.cpp55
-rw-r--r--lld/ELF/LinkerScript.cpp36
-rw-r--r--lld/ELF/LinkerScript.h2
-rw-r--r--lld/ELF/Relocations.cpp7
-rw-r--r--lld/ELF/ScriptParser.cpp9
-rw-r--r--lld/ELF/SymbolTable.cpp118
-rw-r--r--lld/ELF/SymbolTable.h10
-rw-r--r--lld/ELF/Symbols.cpp3
-rw-r--r--lld/docs/ReleaseNotes.rst148
-rw-r--r--lldb/source/Commands/CommandObjectMemoryTag.cpp182
-rw-r--r--lldb/source/Commands/Options.td8
-rw-r--r--lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServerLLGS.cpp28
-rw-r--r--lldb/source/Symbol/TypeSystem.cpp90
-rw-r--r--llvm/include/llvm/Analysis/ValueTracking.h4
-rw-r--r--llvm/include/llvm/IR/Module.h3
-rw-r--r--llvm/include/llvm/ProfileData/InstrProf.h1
-rw-r--r--llvm/include/llvm/ProfileData/InstrProfData.inc4
-rw-r--r--llvm/include/llvm/Transforms/IPO/Attributor.h20
-rw-r--r--llvm/include/llvm/Transforms/Utils/PredicateInfo.h6
-rw-r--r--llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h5
-rw-r--r--llvm/lib/Analysis/InstructionSimplify.cpp16
-rw-r--r--llvm/lib/Analysis/ValueTracking.cpp10
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp6
-rw-r--r--llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp11
-rw-r--r--llvm/lib/IR/ConstantFold.cpp241
-rw-r--r--llvm/lib/IR/Module.cpp4
-rw-r--r--llvm/lib/ProfileData/InstrProfReader.cpp2
-rw-r--r--llvm/lib/Target/AArch64/AArch64ISelLowering.cpp18
-rw-r--r--llvm/lib/Target/AArch64/AArch64InstrInfo.cpp34
-rw-r--r--llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp2
-rw-r--r--llvm/lib/Target/BPF/BPFTargetTransformInfo.h18
-rw-r--r--llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp3
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoV.td1003
-rw-r--r--llvm/lib/Target/RISCV/RISCVSchedRocket.td3
-rw-r--r--llvm/lib/Target/RISCV/RISCVSchedSiFive7.td3
-rw-r--r--llvm/lib/Target/RISCV/RISCVSchedule.td1
-rw-r--r--llvm/lib/Target/RISCV/RISCVScheduleV.td820
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp14
-rw-r--r--llvm/lib/Target/X86/X86InstrArithmetic.td28
-rw-r--r--llvm/lib/Transforms/IPO/Attributor.cpp117
-rw-r--r--llvm/lib/Transforms/IPO/AttributorAttributes.cpp54
-rw-r--r--llvm/lib/Transforms/IPO/OpenMPOpt.cpp16
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp123
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp8
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp3
-rw-r--r--llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp492
-rw-r--r--llvm/lib/Transforms/Scalar/SROA.cpp5
-rw-r--r--llvm/lib/Transforms/Utils/PredicateInfo.cpp46
-rw-r--r--llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp5
-rw-r--r--llvm/lib/Transforms/Vectorize/LoopVectorize.cpp46
-rw-r--r--llvm/tools/llvm-mca/Views/TimelineView.cpp15
-rw-r--r--openmp/runtime/src/kmp_taskdeps.cpp27
-rw-r--r--openmp/runtime/src/kmp_taskdeps.h3
-rw-r--r--openmp/runtime/src/kmp_tasking.cpp1
83 files changed, 3235 insertions, 1019 deletions
diff --git a/clang/include/clang/Basic/BuiltinsAArch64.def b/clang/include/clang/Basic/BuiltinsAArch64.def
index 1dac5d2371d4..634bcaed20a6 100644
--- a/clang/include/clang/Basic/BuiltinsAArch64.def
+++ b/clang/include/clang/Basic/BuiltinsAArch64.def
@@ -243,6 +243,9 @@ TARGET_HEADER_BUILTIN(_ReadStatusReg, "LLii", "nh", "intrin.h", ALL_MS_LANGUAG
TARGET_HEADER_BUILTIN(_WriteStatusReg, "viLLi", "nh", "intrin.h", ALL_MS_LANGUAGES, "")
TARGET_HEADER_BUILTIN(_AddressOfReturnAddress, "v*", "nh", "intrin.h", ALL_MS_LANGUAGES, "")
+TARGET_HEADER_BUILTIN(__mulh, "SLLiSLLiSLLi", "nh", "intrin.h", ALL_MS_LANGUAGES, "")
+TARGET_HEADER_BUILTIN(__umulh, "ULLiULLiULLi", "nh", "intrin.h", ALL_MS_LANGUAGES, "")
+
#undef BUILTIN
#undef LANGBUILTIN
#undef TARGET_HEADER_BUILTIN
diff --git a/clang/include/clang/Basic/LangOptions.h b/clang/include/clang/Basic/LangOptions.h
index 71cf0c65e692..b60b94a1ba08 100644
--- a/clang/include/clang/Basic/LangOptions.h
+++ b/clang/include/clang/Basic/LangOptions.h
@@ -354,6 +354,9 @@ public:
/// A list of all -fno-builtin-* function names (e.g., memset).
std::vector<std::string> NoBuiltinFuncs;
+ /// A prefix map for __FILE__, __BASE_FILE__ and __builtin_FILE().
+ std::map<std::string, std::string, std::greater<std::string>> MacroPrefixMap;
+
/// Triples of the OpenMP targets that the host code codegen should
/// take into account in order to generate accurate offloading descriptors.
std::vector<llvm::Triple> OMPTargetTriples;
@@ -460,6 +463,9 @@ public:
}
bool isSYCL() const { return SYCLIsDevice || SYCLIsHost; }
+
+ /// Remap path prefix according to -fmacro-prefix-path option.
+ void remapPathPrefix(SmallString<256> &Path) const;
};
/// Floating point control options
diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index ab1a5487d9c0..a0cbcae0bdc3 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -2825,10 +2825,10 @@ def fcoverage_prefix_map_EQ
HelpText<"remap file source paths in coverage mapping">;
def ffile_prefix_map_EQ
: Joined<["-"], "ffile-prefix-map=">, Group<f_Group>,
- HelpText<"remap file source paths in debug info and predefined preprocessor macros">;
+ HelpText<"remap file source paths in debug info, predefined preprocessor macros and __builtin_FILE()">;
def fmacro_prefix_map_EQ
- : Joined<["-"], "fmacro-prefix-map=">, Group<Preprocessor_Group>, Flags<[CC1Option]>,
- HelpText<"remap file source paths in predefined preprocessor macros">;
+ : Joined<["-"], "fmacro-prefix-map=">, Group<f_Group>, Flags<[CC1Option]>,
+ HelpText<"remap file source paths in predefined preprocessor macros and __builtin_FILE()">;
defm force_dwarf_frame : BoolFOption<"force-dwarf-frame",
CodeGenOpts<"ForceDwarfFrameSection">, DefaultFalse,
PosFlag<SetTrue, [CC1Option], "Always emit a debug frame section">, NegFlag<SetFalse>>;
diff --git a/clang/include/clang/Lex/PreprocessorOptions.h b/clang/include/clang/Lex/PreprocessorOptions.h
index 99085b98fc7a..a7aabc3e1df2 100644
--- a/clang/include/clang/Lex/PreprocessorOptions.h
+++ b/clang/include/clang/Lex/PreprocessorOptions.h
@@ -199,9 +199,6 @@ public:
/// build it again.
std::shared_ptr<FailedModulesSet> FailedModules;
- /// A prefix map for __FILE__ and __BASE_FILE__.
- std::map<std::string, std::string, std::greater<std::string>> MacroPrefixMap;
-
/// Contains the currently active skipped range mappings for skipping excluded
/// conditional directives.
///
diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h
index 83a2d132bf6a..d8b2546b81a3 100644
--- a/clang/include/clang/Sema/Sema.h
+++ b/clang/include/clang/Sema/Sema.h
@@ -7828,8 +7828,7 @@ public:
TemplateArgumentLoc &Arg,
SmallVectorImpl<TemplateArgument> &Converted);
- bool CheckTemplateArgument(TemplateTypeParmDecl *Param,
- TypeSourceInfo *Arg);
+ bool CheckTemplateArgument(TypeSourceInfo *Arg);
ExprResult CheckTemplateArgument(NonTypeTemplateParmDecl *Param,
QualType InstantiatedParamType, Expr *Arg,
TemplateArgument &Converted,
diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp
index e102a3ba508d..fdba204fbe7f 100644
--- a/clang/lib/AST/ASTContext.cpp
+++ b/clang/lib/AST/ASTContext.cpp
@@ -6066,9 +6066,11 @@ ASTContext::getCanonicalNestedNameSpecifier(NestedNameSpecifier *NNS) const {
NNS->getAsNamespaceAlias()->getNamespace()
->getOriginalNamespace());
+ // The difference between TypeSpec and TypeSpecWithTemplate is that the
+ // latter will have the 'template' keyword when printed.
case NestedNameSpecifier::TypeSpec:
case NestedNameSpecifier::TypeSpecWithTemplate: {
- QualType T = getCanonicalType(QualType(NNS->getAsType(), 0));
+ const Type *T = getCanonicalType(NNS->getAsType());
// If we have some kind of dependent-named type (e.g., "typename T::type"),
// break it apart into its prefix and identifier, then reconsititute those
@@ -6078,14 +6080,16 @@ ASTContext::getCanonicalNestedNameSpecifier(NestedNameSpecifier *NNS) const {
// typedef typename T::type T1;
// typedef typename T1::type T2;
if (const auto *DNT = T->getAs<DependentNameType>())
- return NestedNameSpecifier::Create(*this, DNT->getQualifier(),
- const_cast<IdentifierInfo *>(DNT->getIdentifier()));
-
- // Otherwise, just canonicalize the type, and force it to be a TypeSpec.
- // FIXME: Why are TypeSpec and TypeSpecWithTemplate distinct in the
- // first place?
+ return NestedNameSpecifier::Create(
+ *this, DNT->getQualifier(),
+ const_cast<IdentifierInfo *>(DNT->getIdentifier()));
+ if (const auto *DTST = T->getAs<DependentTemplateSpecializationType>())
+ return NestedNameSpecifier::Create(*this, DTST->getQualifier(), true,
+ const_cast<Type *>(T));
+
+ // TODO: Set 'Template' parameter to true for other template types.
return NestedNameSpecifier::Create(*this, nullptr, false,
- const_cast<Type *>(T.getTypePtr()));
+ const_cast<Type *>(T));
}
case NestedNameSpecifier::Global:
diff --git a/clang/lib/AST/Expr.cpp b/clang/lib/AST/Expr.cpp
index e8b4aaa2b81e..11f10d4695fc 100644
--- a/clang/lib/AST/Expr.cpp
+++ b/clang/lib/AST/Expr.cpp
@@ -2233,8 +2233,11 @@ APValue SourceLocExpr::EvaluateInContext(const ASTContext &Ctx,
};
switch (getIdentKind()) {
- case SourceLocExpr::File:
- return MakeStringLiteral(PLoc.getFilename());
+ case SourceLocExpr::File: {
+ SmallString<256> Path(PLoc.getFilename());
+ Ctx.getLangOpts().remapPathPrefix(Path);
+ return MakeStringLiteral(Path);
+ }
case SourceLocExpr::Function: {
const Decl *CurDecl = dyn_cast_or_null<Decl>(Context);
return MakeStringLiteral(
diff --git a/clang/lib/Basic/LangOptions.cpp b/clang/lib/Basic/LangOptions.cpp
index dc392d5352aa..bebf3178426f 100644
--- a/clang/lib/Basic/LangOptions.cpp
+++ b/clang/lib/Basic/LangOptions.cpp
@@ -11,6 +11,8 @@
//===----------------------------------------------------------------------===//
#include "clang/Basic/LangOptions.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/Support/Path.h"
using namespace clang;
@@ -48,6 +50,12 @@ VersionTuple LangOptions::getOpenCLVersionTuple() const {
return VersionTuple(Ver / 100, (Ver % 100) / 10);
}
+void LangOptions::remapPathPrefix(SmallString<256> &Path) const {
+ for (const auto &Entry : MacroPrefixMap)
+ if (llvm::sys::path::replace_path_prefix(Path, Entry.first, Entry.second))
+ break;
+}
+
FPOptions FPOptions::defaultWithoutTrailingStorage(const LangOptions &LO) {
FPOptions result(LO);
return result;
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index d9b2a5fe16be..1a02965b223e 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -9732,6 +9732,29 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
return Builder.CreateCall(F);
}
+ if (BuiltinID == AArch64::BI__mulh || BuiltinID == AArch64::BI__umulh) {
+ llvm::Type *ResType = ConvertType(E->getType());
+ llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
+
+ bool IsSigned = BuiltinID == AArch64::BI__mulh;
+ Value *LHS =
+ Builder.CreateIntCast(EmitScalarExpr(E->getArg(0)), Int128Ty, IsSigned);
+ Value *RHS =
+ Builder.CreateIntCast(EmitScalarExpr(E->getArg(1)), Int128Ty, IsSigned);
+
+ Value *MulResult, *HigherBits;
+ if (IsSigned) {
+ MulResult = Builder.CreateNSWMul(LHS, RHS);
+ HigherBits = Builder.CreateAShr(MulResult, 64);
+ } else {
+ MulResult = Builder.CreateNUWMul(LHS, RHS);
+ HigherBits = Builder.CreateLShr(MulResult, 64);
+ }
+ HigherBits = Builder.CreateIntCast(HigherBits, ResType, IsSigned);
+
+ return HigherBits;
+ }
+
// Handle MSVC intrinsics before argument evaluation to prevent double
// evaluation.
if (Optional<MSVCIntrin> MsvcIntId = translateAarch64ToMsvcIntrin(BuiltinID))
diff --git a/clang/lib/CodeGen/CGDeclCXX.cpp b/clang/lib/CodeGen/CGDeclCXX.cpp
index d43fb99550a8..553fedebfe56 100644
--- a/clang/lib/CodeGen/CGDeclCXX.cpp
+++ b/clang/lib/CodeGen/CGDeclCXX.cpp
@@ -555,7 +555,8 @@ CodeGenModule::EmitCXXGlobalVarDeclInitFunc(const VarDecl *D,
PrioritizedCXXGlobalInits.size());
PrioritizedCXXGlobalInits.push_back(std::make_pair(Key, Fn));
} else if (isTemplateInstantiation(D->getTemplateSpecializationKind()) ||
- getContext().GetGVALinkageForVariable(D) == GVA_DiscardableODR) {
+ getContext().GetGVALinkageForVariable(D) == GVA_DiscardableODR ||
+ D->hasAttr<SelectAnyAttr>()) {
// C++ [basic.start.init]p2:
// Definitions of explicitly specialized class template static data
// members have ordered initialization. Other class template static data
@@ -568,17 +569,18 @@ CodeGenModule::EmitCXXGlobalVarDeclInitFunc(const VarDecl *D,
// group with the global being initialized. On most platforms, this is a
// minor startup time optimization. In the MS C++ ABI, there are no guard
// variables, so this COMDAT key is required for correctness.
- AddGlobalCtor(Fn, 65535, COMDATKey);
- if (getTarget().getCXXABI().isMicrosoft() && COMDATKey) {
- // In The MS C++, MS add template static data member in the linker
- // drective.
- addUsedGlobal(COMDATKey);
- }
- } else if (D->hasAttr<SelectAnyAttr>()) {
+ //
// SelectAny globals will be comdat-folded. Put the initializer into a
// COMDAT group associated with the global, so the initializers get folded
// too.
+
AddGlobalCtor(Fn, 65535, COMDATKey);
+ if (COMDATKey && (getTriple().isOSBinFormatELF() ||
+ getTarget().getCXXABI().isMicrosoft())) {
+ // When COMDAT is used on ELF or in the MS C++ ABI, the key must be in
+ // llvm.used to prevent linker GC.
+ addUsedGlobal(COMDATKey);
+ }
} else {
I = DelayedCXXInitPosition.find(D); // Re-do lookup in case of re-hash.
if (I == DelayedCXXInitPosition.end()) {
diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp
index 9b40b88ea3c9..49a1396b58e3 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -186,7 +186,7 @@ CodeGenModule::CodeGenModule(ASTContext &C, const HeaderSearchOptions &HSO,
!getModule().getSourceFileName().empty()) {
std::string Path = getModule().getSourceFileName();
// Check if a path substitution is needed from the MacroPrefixMap.
- for (const auto &Entry : PPO.MacroPrefixMap)
+ for (const auto &Entry : LangOpts.MacroPrefixMap)
if (Path.rfind(Entry.first, 0) != std::string::npos) {
Path = Entry.second + Path.substr(Entry.first.size());
break;
diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index 1870bd81789c..4c8ba8cdcd29 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -2637,7 +2637,7 @@ static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D,
llvm::DenormalMode DenormalFPMath = DefaultDenormalFPMath;
llvm::DenormalMode DenormalFP32Math = DefaultDenormalFP32Math;
- StringRef FPContract = "on";
+ StringRef FPContract = "";
bool StrictFPModel = false;
@@ -2662,7 +2662,7 @@ static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D,
ReciprocalMath = false;
SignedZeros = true;
// -fno_fast_math restores default denormal and fpcontract handling
- FPContract = "on";
+ FPContract = "";
DenormalFPMath = llvm::DenormalMode::getIEEE();
// FIXME: The target may have picked a non-IEEE default mode here based on
@@ -2682,18 +2682,20 @@ static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D,
// ffp-model= is a Driver option, it is entirely rewritten into more
// granular options before being passed into cc1.
// Use the gcc option in the switch below.
- if (!FPModel.empty() && !FPModel.equals(Val))
+ if (!FPModel.empty() && !FPModel.equals(Val)) {
D.Diag(clang::diag::warn_drv_overriding_flag_option)
<< Args.MakeArgString("-ffp-model=" + FPModel)
<< Args.MakeArgString("-ffp-model=" + Val);
+ FPContract = "";
+ }
if (Val.equals("fast")) {
optID = options::OPT_ffast_math;
FPModel = Val;
- FPContract = Val;
+ FPContract = "fast";
} else if (Val.equals("precise")) {
optID = options::OPT_ffp_contract;
FPModel = Val;
- FPContract = "on";
+ FPContract = "fast";
PreciseFPModel = true;
} else if (Val.equals("strict")) {
StrictFPModel = true;
@@ -2779,11 +2781,9 @@ static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D,
case options::OPT_ffp_contract: {
StringRef Val = A->getValue();
if (PreciseFPModel) {
- // When -ffp-model=precise is seen on the command line,
- // the boolean PreciseFPModel is set to true which indicates
- // "the current option is actually PreciseFPModel". The optID
- // is changed to OPT_ffp_contract and FPContract is set to "on".
- // the argument Val string is "precise": it shouldn't be checked.
+ // -ffp-model=precise enables ffp-contract=fast as a side effect
+ // the FPContract value has already been set to a string literal
+ // and the Val string isn't a pertinent value.
;
} else if (Val.equals("fast") || Val.equals("on") || Val.equals("off"))
FPContract = Val;
@@ -2881,17 +2881,18 @@ static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D,
// -fno_fast_math restores default denormal and fpcontract handling
DenormalFPMath = DefaultDenormalFPMath;
DenormalFP32Math = llvm::DenormalMode::getIEEE();
- FPContract = "on";
+ FPContract = "";
break;
}
if (StrictFPModel) {
// If -ffp-model=strict has been specified on command line but
// subsequent options conflict then emit warning diagnostic.
- if (HonorINFs && HonorNaNs && !AssociativeMath && !ReciprocalMath &&
- SignedZeros && TrappingMath && RoundingFPMath &&
- DenormalFPMath == llvm::DenormalMode::getIEEE() &&
- DenormalFP32Math == llvm::DenormalMode::getIEEE() &&
- FPContract.equals("off"))
+ if (HonorINFs && HonorNaNs &&
+ !AssociativeMath && !ReciprocalMath &&
+ SignedZeros && TrappingMath && RoundingFPMath &&
+ (FPContract.equals("off") || FPContract.empty()) &&
+ DenormalFPMath == llvm::DenormalMode::getIEEE() &&
+ DenormalFP32Math == llvm::DenormalMode::getIEEE())
// OK: Current Arg doesn't conflict with -ffp-model=strict
;
else {
@@ -7690,8 +7691,11 @@ void OffloadBundler::ConstructJob(Compilation &C, const JobAction &JA,
assert(CurTC == nullptr && "Expected one dependence!");
CurTC = TC;
});
+ UB += C.addTempFile(
+ C.getArgs().MakeArgString(CurTC->getInputFilename(Inputs[I])));
+ } else {
+ UB += CurTC->getInputFilename(Inputs[I]);
}
- UB += CurTC->getInputFilename(Inputs[I]);
}
CmdArgs.push_back(TCArgs.MakeArgString(UB));
diff --git a/clang/lib/Driver/ToolChains/Hexagon.cpp b/clang/lib/Driver/ToolChains/Hexagon.cpp
index 828bfdbb05a3..314d0efce441 100644
--- a/clang/lib/Driver/ToolChains/Hexagon.cpp
+++ b/clang/lib/Driver/ToolChains/Hexagon.cpp
@@ -588,21 +588,43 @@ void HexagonToolChain::addClangTargetOptions(const ArgList &DriverArgs,
void HexagonToolChain::AddClangSystemIncludeArgs(const ArgList &DriverArgs,
ArgStringList &CC1Args) const {
- if (DriverArgs.hasArg(options::OPT_nostdinc) ||
- DriverArgs.hasArg(options::OPT_nostdlibinc))
+ if (DriverArgs.hasArg(options::OPT_nostdinc))
return;
+ const bool IsELF = !getTriple().isMusl() && !getTriple().isOSLinux();
+ const bool IsLinuxMusl = getTriple().isMusl() && getTriple().isOSLinux();
+
const Driver &D = getDriver();
- if (!D.SysRoot.empty()) {
+ SmallString<128> ResourceDirInclude(D.ResourceDir);
+ if (!IsELF) {
+ llvm::sys::path::append(ResourceDirInclude, "include");
+ if (!DriverArgs.hasArg(options::OPT_nobuiltininc) &&
+ (!IsLinuxMusl || DriverArgs.hasArg(options::OPT_nostdlibinc)))
+ addSystemInclude(DriverArgs, CC1Args, ResourceDirInclude);
+ }
+ if (DriverArgs.hasArg(options::OPT_nostdlibinc))
+ return;
+
+ const bool HasSysRoot = !D.SysRoot.empty();
+ if (HasSysRoot) {
SmallString<128> P(D.SysRoot);
- if (getTriple().isMusl())
+ if (IsLinuxMusl)
llvm::sys::path::append(P, "usr/include");
else
llvm::sys::path::append(P, "include");
+
addExternCSystemInclude(DriverArgs, CC1Args, P.str());
- return;
+ // LOCAL_INCLUDE_DIR
+ addSystemInclude(DriverArgs, CC1Args, P + "/usr/local/include");
+ // TOOL_INCLUDE_DIR
+ AddMultilibIncludeArgs(DriverArgs, CC1Args);
}
+ if (!DriverArgs.hasArg(options::OPT_nobuiltininc) && IsLinuxMusl)
+ addSystemInclude(DriverArgs, CC1Args, ResourceDirInclude);
+
+ if (HasSysRoot)
+ return;
std::string TargetDir = getHexagonTargetDir(D.getInstalledDir(),
D.PrefixDirs);
addExternCSystemInclude(DriverArgs, CC1Args, TargetDir + "/hexagon/include");
diff --git a/clang/lib/Driver/ToolChains/MinGW.cpp b/clang/lib/Driver/ToolChains/MinGW.cpp
index 20efbdc237a8..7ba729f36bd8 100644
--- a/clang/lib/Driver/ToolChains/MinGW.cpp
+++ b/clang/lib/Driver/ToolChains/MinGW.cpp
@@ -136,10 +136,13 @@ void tools::MinGW::Linker::ConstructJob(Compilation &C, const JobAction &JA,
llvm_unreachable("Unsupported target architecture.");
}
- if (Args.hasArg(options::OPT_mwindows)) {
+ Arg *SubsysArg =
+ Args.getLastArg(options::OPT_mwindows, options::OPT_mconsole);
+ if (SubsysArg && SubsysArg->getOption().matches(options::OPT_mwindows)) {
CmdArgs.push_back("--subsystem");
CmdArgs.push_back("windows");
- } else if (Args.hasArg(options::OPT_mconsole)) {
+ } else if (SubsysArg &&
+ SubsysArg->getOption().matches(options::OPT_mconsole)) {
CmdArgs.push_back("--subsystem");
CmdArgs.push_back("console");
}
diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp
index 33e5f3e99c45..7025028bc94a 100644
--- a/clang/lib/Frontend/CompilerInvocation.cpp
+++ b/clang/lib/Frontend/CompilerInvocation.cpp
@@ -3528,6 +3528,9 @@ void CompilerInvocation::GenerateLangArgs(const LangOptions &Opts,
GenerateArg(Args, OPT_fexperimental_relative_cxx_abi_vtables, SA);
else
GenerateArg(Args, OPT_fno_experimental_relative_cxx_abi_vtables, SA);
+
+ for (const auto &MP : Opts.MacroPrefixMap)
+ GenerateArg(Args, OPT_fmacro_prefix_map_EQ, MP.first + "=" + MP.second, SA);
}
bool CompilerInvocation::ParseLangArgs(LangOptions &Opts, ArgList &Args,
@@ -4037,6 +4040,12 @@ bool CompilerInvocation::ParseLangArgs(LangOptions &Opts, ArgList &Args,
options::OPT_fno_experimental_relative_cxx_abi_vtables,
TargetCXXABI::usesRelativeVTables(T));
+ for (const auto &A : Args.getAllArgValues(OPT_fmacro_prefix_map_EQ)) {
+ auto Split = StringRef(A).split('=');
+ Opts.MacroPrefixMap.insert(
+ {std::string(Split.first), std::string(Split.second)});
+ }
+
return Diags.getNumErrors() == NumErrorsBefore;
}
@@ -4109,9 +4118,6 @@ static void GeneratePreprocessorArgs(PreprocessorOptions &Opts,
for (const auto &D : Opts.DeserializedPCHDeclsToErrorOn)
GenerateArg(Args, OPT_error_on_deserialized_pch_decl, D, SA);
- for (const auto &MP : Opts.MacroPrefixMap)
- GenerateArg(Args, OPT_fmacro_prefix_map_EQ, MP.first + "=" + MP.second, SA);
-
if (Opts.PrecompiledPreambleBytes != std::make_pair(0u, false))
GenerateArg(Args, OPT_preamble_bytes_EQ,
Twine(Opts.PrecompiledPreambleBytes.first) + "," +
@@ -4180,12 +4186,6 @@ static bool ParsePreprocessorArgs(PreprocessorOptions &Opts, ArgList &Args,
for (const auto *A : Args.filtered(OPT_error_on_deserialized_pch_decl))
Opts.DeserializedPCHDeclsToErrorOn.insert(A->getValue());
- for (const auto &A : Args.getAllArgValues(OPT_fmacro_prefix_map_EQ)) {
- auto Split = StringRef(A).split('=');
- Opts.MacroPrefixMap.insert(
- {std::string(Split.first), std::string(Split.second)});
- }
-
if (const Arg *A = Args.getLastArg(OPT_preamble_bytes_EQ)) {
StringRef Value(A->getValue());
size_t Comma = Value.find(',');
diff --git a/clang/lib/Headers/intrin.h b/clang/lib/Headers/intrin.h
index ff8eb8fca268..34ec79d6acbc 100644
--- a/clang/lib/Headers/intrin.h
+++ b/clang/lib/Headers/intrin.h
@@ -574,6 +574,9 @@ void _WriteStatusReg(int, __int64);
unsigned short __cdecl _byteswap_ushort(unsigned short val);
unsigned long __cdecl _byteswap_ulong (unsigned long val);
unsigned __int64 __cdecl _byteswap_uint64(unsigned __int64 val);
+
+__int64 __mulh(__int64 __a, __int64 __b);
+unsigned __int64 __umulh(unsigned __int64 __a, unsigned __int64 __b);
#endif
/*----------------------------------------------------------------------------*\
diff --git a/clang/lib/Lex/PPMacroExpansion.cpp b/clang/lib/Lex/PPMacroExpansion.cpp
index 8728ac9e2166..d8ad9d845e7a 100644
--- a/clang/lib/Lex/PPMacroExpansion.cpp
+++ b/clang/lib/Lex/PPMacroExpansion.cpp
@@ -1453,15 +1453,6 @@ static bool isTargetEnvironment(const TargetInfo &TI,
return TI.getTriple().getEnvironment() == Env.getEnvironment();
}
-static void remapMacroPath(
- SmallString<256> &Path,
- const std::map<std::string, std::string, std::greater<std::string>>
- &MacroPrefixMap) {
- for (const auto &Entry : MacroPrefixMap)
- if (llvm::sys::path::replace_path_prefix(Path, Entry.first, Entry.second))
- break;
-}
-
/// ExpandBuiltinMacro - If an identifier token is read that is to be expanded
/// as a builtin macro, handle it and return the next token as 'Tok'.
void Preprocessor::ExpandBuiltinMacro(Token &Tok) {
@@ -1543,7 +1534,7 @@ void Preprocessor::ExpandBuiltinMacro(Token &Tok) {
} else {
FN += PLoc.getFilename();
}
- remapMacroPath(FN, PPOpts->MacroPrefixMap);
+ getLangOpts().remapPathPrefix(FN);
Lexer::Stringify(FN);
OS << '"' << FN << '"';
}
diff --git a/clang/lib/Sema/SemaConcept.cpp b/clang/lib/Sema/SemaConcept.cpp
index f2c70d0a56ef..931c9e3e2738 100644
--- a/clang/lib/Sema/SemaConcept.cpp
+++ b/clang/lib/Sema/SemaConcept.cpp
@@ -742,22 +742,15 @@ Optional<NormalizedConstraint>
NormalizedConstraint::fromConstraintExprs(Sema &S, NamedDecl *D,
ArrayRef<const Expr *> E) {
assert(E.size() != 0);
- auto First = fromConstraintExpr(S, D, E[0]);
- if (E.size() == 1)
- return First;
- auto Second = fromConstraintExpr(S, D, E[1]);
- if (!Second)
+ auto Conjunction = fromConstraintExpr(S, D, E[0]);
+ if (!Conjunction)
return None;
- llvm::Optional<NormalizedConstraint> Conjunction;
- Conjunction.emplace(S.Context, std::move(*First), std::move(*Second),
- CCK_Conjunction);
- for (unsigned I = 2; I < E.size(); ++I) {
+ for (unsigned I = 1; I < E.size(); ++I) {
auto Next = fromConstraintExpr(S, D, E[I]);
if (!Next)
- return llvm::Optional<NormalizedConstraint>{};
- NormalizedConstraint NewConjunction(S.Context, std::move(*Conjunction),
+ return None;
+ *Conjunction = NormalizedConstraint(S.Context, std::move(*Conjunction),
std::move(*Next), CCK_Conjunction);
- *Conjunction = std::move(NewConjunction);
}
return Conjunction;
}
diff --git a/clang/lib/Sema/SemaDeclCXX.cpp b/clang/lib/Sema/SemaDeclCXX.cpp
index 83c97626ff7e..da4f4f862095 100644
--- a/clang/lib/Sema/SemaDeclCXX.cpp
+++ b/clang/lib/Sema/SemaDeclCXX.cpp
@@ -12472,6 +12472,8 @@ bool Sema::CheckUsingDeclRedeclaration(SourceLocation UsingLoc,
return false;
}
+ const NestedNameSpecifier *CNNS =
+ Context.getCanonicalNestedNameSpecifier(Qual);
for (LookupResult::iterator I = Prev.begin(), E = Prev.end(); I != E; ++I) {
NamedDecl *D = *I;
@@ -12497,8 +12499,7 @@ bool Sema::CheckUsingDeclRedeclaration(SourceLocation UsingLoc,
// using decls differ if they name different scopes (but note that
// template instantiation can cause this check to trigger when it
// didn't before instantiation).
- if (Context.getCanonicalNestedNameSpecifier(Qual) !=
- Context.getCanonicalNestedNameSpecifier(DQual))
+ if (CNNS != Context.getCanonicalNestedNameSpecifier(DQual))
continue;
Diag(NameLoc, diag::err_using_decl_redeclaration) << SS.getRange();
diff --git a/clang/lib/Sema/SemaTemplate.cpp b/clang/lib/Sema/SemaTemplate.cpp
index 175388198324..5d26f2d2c11a 100644
--- a/clang/lib/Sema/SemaTemplate.cpp
+++ b/clang/lib/Sema/SemaTemplate.cpp
@@ -1079,7 +1079,7 @@ NamedDecl *Sema::ActOnTypeParameter(Scope *S, bool Typename,
return Param;
// Check the template argument itself.
- if (CheckTemplateArgument(Param, DefaultTInfo)) {
+ if (CheckTemplateArgument(DefaultTInfo)) {
Param->setInvalidDecl();
return Param;
}
@@ -5042,7 +5042,7 @@ bool Sema::CheckTemplateTypeArgument(TemplateTypeParmDecl *Param,
}
}
- if (CheckTemplateArgument(Param, TSI))
+ if (CheckTemplateArgument(TSI))
return true;
// Add the converted template type argument.
@@ -5661,7 +5661,7 @@ bool Sema::CheckTemplateArgumentList(
TemplateArgumentListInfo NewArgs = TemplateArgs;
// Make sure we get the template parameter list from the most
- // recentdeclaration, since that is the only one that has is guaranteed to
+ // recent declaration, since that is the only one that is guaranteed to
// have all the default template argument information.
TemplateParameterList *Params =
cast<TemplateDecl>(Template->getMostRecentDecl())
@@ -6208,8 +6208,7 @@ bool UnnamedLocalNoLinkageFinder::VisitNestedNameSpecifier(
///
/// This routine implements the semantics of C++ [temp.arg.type]. It
/// returns true if an error occurred, and false otherwise.
-bool Sema::CheckTemplateArgument(TemplateTypeParmDecl *Param,
- TypeSourceInfo *ArgInfo) {
+bool Sema::CheckTemplateArgument(TypeSourceInfo *ArgInfo) {
assert(ArgInfo && "invalid TypeSourceInfo");
QualType Arg = ArgInfo->getType();
SourceRange SR = ArgInfo->getTypeLoc().getSourceRange();
diff --git a/clang/lib/Sema/SemaTemplateInstantiate.cpp b/clang/lib/Sema/SemaTemplateInstantiate.cpp
index f18f77d3442a..74889aa3ca88 100644
--- a/clang/lib/Sema/SemaTemplateInstantiate.cpp
+++ b/clang/lib/Sema/SemaTemplateInstantiate.cpp
@@ -1934,25 +1934,23 @@ TemplateInstantiator::TransformExprRequirement(concepts::ExprRequirement *Req) {
return Req;
Sema::SFINAETrap Trap(SemaRef);
- TemplateDeductionInfo Info(Req->getExpr()->getBeginLoc());
llvm::PointerUnion<Expr *, concepts::Requirement::SubstitutionDiagnostic *>
TransExpr;
if (Req->isExprSubstitutionFailure())
TransExpr = Req->getExprSubstitutionDiagnostic();
else {
- Sema::InstantiatingTemplate ExprInst(SemaRef, Req->getExpr()->getBeginLoc(),
- Req, Info,
- Req->getExpr()->getSourceRange());
+ Expr *E = Req->getExpr();
+ TemplateDeductionInfo Info(E->getBeginLoc());
+ Sema::InstantiatingTemplate ExprInst(SemaRef, E->getBeginLoc(), Req, Info,
+ E->getSourceRange());
if (ExprInst.isInvalid())
return nullptr;
- ExprResult TransExprRes = TransformExpr(Req->getExpr());
+ ExprResult TransExprRes = TransformExpr(E);
if (TransExprRes.isInvalid() || Trap.hasErrorOccurred())
- TransExpr = createSubstDiag(SemaRef, Info,
- [&] (llvm::raw_ostream& OS) {
- Req->getExpr()->printPretty(OS, nullptr,
- SemaRef.getPrintingPolicy());
- });
+ TransExpr = createSubstDiag(SemaRef, Info, [&](llvm::raw_ostream &OS) {
+ E->printPretty(OS, nullptr, SemaRef.getPrintingPolicy());
+ });
else
TransExpr = TransExprRes.get();
}
@@ -1966,6 +1964,7 @@ TemplateInstantiator::TransformExprRequirement(concepts::ExprRequirement *Req) {
else if (RetReq.isTypeConstraint()) {
TemplateParameterList *OrigTPL =
RetReq.getTypeConstraintTemplateParameterList();
+ TemplateDeductionInfo Info(OrigTPL->getTemplateLoc());
Sema::InstantiatingTemplate TPLInst(SemaRef, OrigTPL->getTemplateLoc(),
Req, Info, OrigTPL->getSourceRange());
if (TPLInst.isInvalid())
diff --git a/compiler-rt/include/profile/InstrProfData.inc b/compiler-rt/include/profile/InstrProfData.inc
index 08a642469627..7d2097cfc297 100644
--- a/compiler-rt/include/profile/InstrProfData.inc
+++ b/compiler-rt/include/profile/InstrProfData.inc
@@ -129,6 +129,7 @@ INSTR_PROF_VALUE_NODE(PtrToNodeT, llvm::Type::getInt8PtrTy(Ctx), Next, \
#endif
INSTR_PROF_RAW_HEADER(uint64_t, Magic, __llvm_profile_get_magic())
INSTR_PROF_RAW_HEADER(uint64_t, Version, __llvm_profile_get_version())
+INSTR_PROF_RAW_HEADER(uint64_t, BinaryIdsSize, __llvm_write_binary_ids(NULL))
INSTR_PROF_RAW_HEADER(uint64_t, DataSize, DataSize)
INSTR_PROF_RAW_HEADER(uint64_t, PaddingBytesBeforeCounters, PaddingBytesBeforeCounters)
INSTR_PROF_RAW_HEADER(uint64_t, CountersSize, CountersSize)
@@ -137,7 +138,6 @@ INSTR_PROF_RAW_HEADER(uint64_t, NamesSize, NamesSize)
INSTR_PROF_RAW_HEADER(uint64_t, CountersDelta, (uintptr_t)CountersBegin)
INSTR_PROF_RAW_HEADER(uint64_t, NamesDelta, (uintptr_t)NamesBegin)
INSTR_PROF_RAW_HEADER(uint64_t, ValueKindLast, IPVK_Last)
-INSTR_PROF_RAW_HEADER(uint64_t, BinaryIdsSize, __llvm_write_binary_ids(NULL))
#undef INSTR_PROF_RAW_HEADER
/* INSTR_PROF_RAW_HEADER end */
@@ -646,7 +646,7 @@ serializeValueProfDataFrom(ValueProfRecordClosure *Closure,
(uint64_t)'f' << 16 | (uint64_t)'R' << 8 | (uint64_t)129
/* Raw profile format version (start from 1). */
-#define INSTR_PROF_RAW_VERSION 6
+#define INSTR_PROF_RAW_VERSION 7
/* Indexed profile format version (start from 1). */
#define INSTR_PROF_INDEX_VERSION 7
/* Coverage mapping format version (start from 0). */
diff --git a/compiler-rt/lib/profile/InstrProfilingBuffer.c b/compiler-rt/lib/profile/InstrProfilingBuffer.c
index 21fa7ba1ddd6..68b4f5cd6f52 100644
--- a/compiler-rt/lib/profile/InstrProfilingBuffer.c
+++ b/compiler-rt/lib/profile/InstrProfilingBuffer.c
@@ -116,7 +116,7 @@ uint64_t __llvm_profile_get_size_for_buffer_internal(
DataSize, CountersSize, NamesSize, &PaddingBytesBeforeCounters,
&PaddingBytesAfterCounters, &PaddingBytesAfterNames);
- return sizeof(__llvm_profile_header) +
+ return sizeof(__llvm_profile_header) + __llvm_write_binary_ids(NULL) +
(DataSize * sizeof(__llvm_profile_data)) + PaddingBytesBeforeCounters +
(CountersSize * sizeof(uint64_t)) + PaddingBytesAfterCounters +
NamesSize + PaddingBytesAfterNames;
diff --git a/compiler-rt/lib/profile/InstrProfilingMerge.c b/compiler-rt/lib/profile/InstrProfilingMerge.c
index 913228513259..16ebc2f8b2a9 100644
--- a/compiler-rt/lib/profile/InstrProfilingMerge.c
+++ b/compiler-rt/lib/profile/InstrProfilingMerge.c
@@ -22,6 +22,7 @@ void (*VPMergeHook)(ValueProfData *, __llvm_profile_data *);
COMPILER_RT_VISIBILITY
uint64_t lprofGetLoadModuleSignature() {
/* A very fast way to compute a module signature. */
+ uint64_t Version = __llvm_profile_get_version();
uint64_t CounterSize = (uint64_t)(__llvm_profile_end_counters() -
__llvm_profile_begin_counters());
uint64_t DataSize = __llvm_profile_get_data_size(__llvm_profile_begin_data(),
@@ -33,7 +34,7 @@ uint64_t lprofGetLoadModuleSignature() {
const __llvm_profile_data *FirstD = __llvm_profile_begin_data();
return (NamesSize << 40) + (CounterSize << 30) + (DataSize << 20) +
- (NumVnodes << 10) + (DataSize > 0 ? FirstD->NameRef : 0);
+ (NumVnodes << 10) + (DataSize > 0 ? FirstD->NameRef : 0) + Version;
}
/* Returns 1 if profile is not structurally compatible. */
@@ -44,7 +45,8 @@ int __llvm_profile_check_compatibility(const char *ProfileData,
__llvm_profile_header *Header = (__llvm_profile_header *)ProfileData;
__llvm_profile_data *SrcDataStart, *SrcDataEnd, *SrcData, *DstData;
SrcDataStart =
- (__llvm_profile_data *)(ProfileData + sizeof(__llvm_profile_header));
+ (__llvm_profile_data *)(ProfileData + sizeof(__llvm_profile_header) +
+ Header->BinaryIdsSize);
SrcDataEnd = SrcDataStart + Header->DataSize;
if (ProfileSize < sizeof(__llvm_profile_header))
@@ -63,7 +65,7 @@ int __llvm_profile_check_compatibility(const char *ProfileData,
Header->ValueKindLast != IPVK_Last)
return 1;
- if (ProfileSize < sizeof(__llvm_profile_header) +
+ if (ProfileSize < sizeof(__llvm_profile_header) + Header->BinaryIdsSize +
Header->DataSize * sizeof(__llvm_profile_data) +
Header->NamesSize + Header->CountersSize)
return 1;
@@ -91,7 +93,8 @@ int __llvm_profile_merge_from_buffer(const char *ProfileData,
const char *SrcValueProfDataStart, *SrcValueProfData;
SrcDataStart =
- (__llvm_profile_data *)(ProfileData + sizeof(__llvm_profile_header));
+ (__llvm_profile_data *)(ProfileData + sizeof(__llvm_profile_header) +
+ Header->BinaryIdsSize);
SrcDataEnd = SrcDataStart + Header->DataSize;
SrcCountersStart = (uint64_t *)SrcDataEnd;
SrcNameStart = (const char *)(SrcCountersStart + Header->CountersSize);
diff --git a/compiler-rt/lib/profile/InstrProfilingPlatformLinux.c b/compiler-rt/lib/profile/InstrProfilingPlatformLinux.c
index 508624a80cd6..7c15f97aff89 100644
--- a/compiler-rt/lib/profile/InstrProfilingPlatformLinux.c
+++ b/compiler-rt/lib/profile/InstrProfilingPlatformLinux.c
@@ -17,6 +17,15 @@
#include "InstrProfiling.h"
#include "InstrProfilingInternal.h"
+#if defined(__FreeBSD__) && !defined(ElfW)
+/*
+ * FreeBSD's elf.h and link.h headers do not define the ElfW(type) macro yet.
+ * If this is added to all supported FreeBSD versions in the future, this
+ * compatibility macro can be removed.
+ */
+#define ElfW(type) __ElfN(type)
+#endif
+
#define PROF_DATA_START INSTR_PROF_SECT_START(INSTR_PROF_DATA_COMMON)
#define PROF_DATA_STOP INSTR_PROF_SECT_STOP(INSTR_PROF_DATA_COMMON)
#define PROF_NAME_START INSTR_PROF_SECT_START(INSTR_PROF_NAME_COMMON)
@@ -76,6 +85,7 @@ COMPILER_RT_VISIBILITY ValueProfNode *__llvm_profile_end_vnodes(void) {
COMPILER_RT_VISIBILITY ValueProfNode *CurrentVNode = &PROF_VNODES_START;
COMPILER_RT_VISIBILITY ValueProfNode *EndVNode = &PROF_VNODES_STOP;
+#ifdef NT_GNU_BUILD_ID
static size_t RoundUp(size_t size, size_t align) {
return (size + align - 1) & ~(align - 1);
}
@@ -179,5 +189,14 @@ COMPILER_RT_VISIBILITY int __llvm_write_binary_ids(ProfDataWriter *Writer) {
return 0;
}
+#else /* !NT_GNU_BUILD_ID */
+/*
+ * Fallback implementation for targets that don't support the GNU
+ * extensions NT_GNU_BUILD_ID and __ehdr_start.
+ */
+COMPILER_RT_VISIBILITY int __llvm_write_binary_ids(ProfDataWriter *Writer) {
+ return 0;
+}
+#endif
#endif
diff --git a/libcxx/include/__config b/libcxx/include/__config
index 3cf23694f878..97e33f3157aa 100644
--- a/libcxx/include/__config
+++ b/libcxx/include/__config
@@ -354,6 +354,16 @@
# define _LIBCPP_NO_CFI
#endif
+// If the compiler supports using_if_exists, pretend we have those functions and they'll
+// be picked up if the C library provides them.
+//
+// TODO: Once we drop support for Clang 12, we can assume the compiler supports using_if_exists
+// for platforms that don't have a conforming C11 library, so we can drop this whole thing.
+#if __has_attribute(using_if_exists)
+# define _LIBCPP_HAS_TIMESPEC_GET
+# define _LIBCPP_HAS_QUICK_EXIT
+# define _LIBCPP_HAS_ALIGNED_ALLOC
+#else
#if (defined(__ISO_C_VISIBLE) && (__ISO_C_VISIBLE >= 2011)) || __cplusplus >= 201103L
# if defined(__FreeBSD__)
# define _LIBCPP_HAS_ALIGNED_ALLOC
@@ -408,6 +418,7 @@
# endif
# endif // __APPLE__
#endif
+#endif // __has_attribute(using_if_exists)
#ifndef _LIBCPP_CXX03_LANG
# define _LIBCPP_ALIGNOF(_Tp) alignof(_Tp)
diff --git a/libcxx/include/ctime b/libcxx/include/ctime
index 8b2efd7449ca..2a3fdd12e874 100644
--- a/libcxx/include/ctime
+++ b/libcxx/include/ctime
@@ -59,7 +59,7 @@ int timespec_get( struct timespec *ts, int base); // C++17
// we're detecting this here instead of in <__config> because we can't include
// system headers from <__config>, since it leads to circular module dependencies.
// This is also meant to be a very temporary workaround until the SDKs are fixed.
-#if defined(__APPLE__)
+#if defined(__APPLE__) && !__has_attribute(using_if_exists)
# include <sys/cdefs.h>
# if defined(_LIBCPP_HAS_TIMESPEC_GET) && (__DARWIN_C_LEVEL < __DARWIN_C_FULL)
# define _LIBCPP_HAS_TIMESPEC_GET_NOT_ACTUALLY_PROVIDED
diff --git a/libcxx/include/ios b/libcxx/include/ios
index 3128bca89999..c9230d6a9484 100644
--- a/libcxx/include/ios
+++ b/libcxx/include/ios
@@ -607,8 +607,15 @@ public:
static_assert((is_same<_CharT, typename traits_type::char_type>::value),
"traits_type::char_type must be the same type as CharT");
+#ifdef _LIBCPP_CXX03_LANG
+ // Preserve the ability to compare with literal 0,
+ // and implicitly convert to bool, but not implicitly convert to int.
+ _LIBCPP_INLINE_VISIBILITY
+ operator void*() const {return fail() ? nullptr : (void*)this;}
+#else
_LIBCPP_INLINE_VISIBILITY
explicit operator bool() const {return !fail();}
+#endif
_LIBCPP_INLINE_VISIBILITY bool operator!() const {return fail();}
_LIBCPP_INLINE_VISIBILITY iostate rdstate() const {return ios_base::rdstate();}
diff --git a/lld/ELF/Config.h b/lld/ELF/Config.h
index a996a815599a..e1abb4dfab36 100644
--- a/lld/ELF/Config.h
+++ b/lld/ELF/Config.h
@@ -86,7 +86,8 @@ struct SymbolVersion {
struct VersionDefinition {
llvm::StringRef name;
uint16_t id;
- std::vector<SymbolVersion> patterns;
+ std::vector<SymbolVersion> nonLocalPatterns;
+ std::vector<SymbolVersion> localPatterns;
};
// This struct contains the global configuration for the linker.
diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp
index 91e7df21a60a..594c20016827 100644
--- a/lld/ELF/Driver.cpp
+++ b/lld/ELF/Driver.cpp
@@ -1351,18 +1351,19 @@ static void readConfigs(opt::InputArgList &args) {
}
assert(config->versionDefinitions.empty());
- config->versionDefinitions.push_back({"local", (uint16_t)VER_NDX_LOCAL, {}});
config->versionDefinitions.push_back(
- {"global", (uint16_t)VER_NDX_GLOBAL, {}});
+ {"local", (uint16_t)VER_NDX_LOCAL, {}, {}});
+ config->versionDefinitions.push_back(
+ {"global", (uint16_t)VER_NDX_GLOBAL, {}, {}});
// If --retain-symbol-file is used, we'll keep only the symbols listed in
// the file and discard all others.
if (auto *arg = args.getLastArg(OPT_retain_symbols_file)) {
- config->versionDefinitions[VER_NDX_LOCAL].patterns.push_back(
+ config->versionDefinitions[VER_NDX_LOCAL].nonLocalPatterns.push_back(
{"*", /*isExternCpp=*/false, /*hasWildcard=*/true});
if (Optional<MemoryBufferRef> buffer = readFile(arg->getValue()))
for (StringRef s : args::getLines(*buffer))
- config->versionDefinitions[VER_NDX_GLOBAL].patterns.push_back(
+ config->versionDefinitions[VER_NDX_GLOBAL].nonLocalPatterns.push_back(
{s, /*isExternCpp=*/false, /*hasWildcard=*/false});
}
@@ -2069,23 +2070,37 @@ static void redirectSymbols(ArrayRef<WrappedSymbol> wrapped) {
if (suffix1[0] != '@' || suffix1[1] == '@')
continue;
- // Check whether the default version foo@@v1 exists. If it exists, the
- // symbol can be found by the name "foo" in the symbol table.
- Symbol *maybeDefault = symtab->find(name);
- if (!maybeDefault)
- continue;
- const char *suffix2 = maybeDefault->getVersionSuffix();
- if (suffix2[0] != '@' || suffix2[1] != '@' ||
- strcmp(suffix1 + 1, suffix2 + 2) != 0)
+ // Check the existing symbol foo. We have two special cases to handle:
+ //
+ // * There is a definition of foo@v1 and foo@@v1.
+ // * There is a definition of foo@v1 and foo.
+ Defined *sym2 = dyn_cast_or_null<Defined>(symtab->find(name));
+ if (!sym2)
continue;
-
- // foo@v1 and foo@@v1 should be merged, so redirect foo@v1 to foo@@v1.
- map.try_emplace(sym, maybeDefault);
- // If both foo@v1 and foo@@v1 are defined and non-weak, report a duplicate
- // definition error.
- maybeDefault->resolve(*sym);
- // Eliminate foo@v1 from the symbol table.
- sym->symbolKind = Symbol::PlaceholderKind;
+ const char *suffix2 = sym2->getVersionSuffix();
+ if (suffix2[0] == '@' && suffix2[1] == '@' &&
+ strcmp(suffix1 + 1, suffix2 + 2) == 0) {
+ // foo@v1 and foo@@v1 should be merged, so redirect foo@v1 to foo@@v1.
+ map.try_emplace(sym, sym2);
+ // If both foo@v1 and foo@@v1 are defined and non-weak, report a duplicate
+ // definition error.
+ sym2->resolve(*sym);
+ // Eliminate foo@v1 from the symbol table.
+ sym->symbolKind = Symbol::PlaceholderKind;
+ } else if (auto *sym1 = dyn_cast<Defined>(sym)) {
+ if (sym2->versionId > VER_NDX_GLOBAL
+ ? config->versionDefinitions[sym2->versionId].name == suffix1 + 1
+ : sym1->section == sym2->section && sym1->value == sym2->value) {
+ // Due to an assembler design flaw, if foo is defined, .symver foo,
+ // foo@v1 defines both foo and foo@v1. Unless foo is bound to a
+ // different version, GNU ld makes foo@v1 canonical and elimiates foo.
+ // Emulate its behavior, otherwise we would have foo or foo@@v1 beside
+ // foo@v1. foo@v1 and foo combining does not apply if they are not
+ // defined in the same place.
+ map.try_emplace(sym2, sym);
+ sym2->symbolKind = Symbol::PlaceholderKind;
+ }
+ }
}
if (map.empty())
diff --git a/lld/ELF/LinkerScript.cpp b/lld/ELF/LinkerScript.cpp
index a938984ad945..01785f39ed75 100644
--- a/lld/ELF/LinkerScript.cpp
+++ b/lld/ELF/LinkerScript.cpp
@@ -849,17 +849,8 @@ void LinkerScript::diagnoseOrphanHandling() const {
}
uint64_t LinkerScript::advance(uint64_t size, unsigned alignment) {
- bool isTbss =
- (ctx->outSec->flags & SHF_TLS) && ctx->outSec->type == SHT_NOBITS;
- uint64_t start = isTbss ? dot + ctx->threadBssOffset : dot;
- start = alignTo(start, alignment);
- uint64_t end = start + size;
-
- if (isTbss)
- ctx->threadBssOffset = end - dot;
- else
- dot = end;
- return end;
+ dot = alignTo(dot, alignment) + size;
+ return dot;
}
void LinkerScript::output(InputSection *s) {
@@ -931,13 +922,24 @@ static OutputSection *findFirstSection(PhdrEntry *load) {
// This function assigns offsets to input sections and an output section
// for a single sections command (e.g. ".text { *(.text); }").
void LinkerScript::assignOffsets(OutputSection *sec) {
+ const bool isTbss = (sec->flags & SHF_TLS) && sec->type == SHT_NOBITS;
const bool sameMemRegion = ctx->memRegion == sec->memRegion;
const bool prevLMARegionIsDefault = ctx->lmaRegion == nullptr;
const uint64_t savedDot = dot;
ctx->memRegion = sec->memRegion;
ctx->lmaRegion = sec->lmaRegion;
- if (sec->flags & SHF_ALLOC) {
+ if (!(sec->flags & SHF_ALLOC)) {
+ // Non-SHF_ALLOC sections have zero addresses.
+ dot = 0;
+ } else if (isTbss) {
+ // Allow consecutive SHF_TLS SHT_NOBITS output sections. The address range
+ // starts from the end address of the previous tbss section.
+ if (ctx->tbssAddr == 0)
+ ctx->tbssAddr = dot;
+ else
+ dot = ctx->tbssAddr;
+ } else {
if (ctx->memRegion)
dot = ctx->memRegion->curPos;
if (sec->addrExpr)
@@ -950,9 +952,6 @@ void LinkerScript::assignOffsets(OutputSection *sec) {
if (ctx->memRegion && ctx->memRegion->curPos < dot)
expandMemoryRegion(ctx->memRegion, dot - ctx->memRegion->curPos,
ctx->memRegion->name, sec->name);
- } else {
- // Non-SHF_ALLOC sections have zero addresses.
- dot = 0;
}
switchTo(sec);
@@ -1008,8 +1007,13 @@ void LinkerScript::assignOffsets(OutputSection *sec) {
// Non-SHF_ALLOC sections do not affect the addresses of other OutputSections
// as they are not part of the process image.
- if (!(sec->flags & SHF_ALLOC))
+ if (!(sec->flags & SHF_ALLOC)) {
dot = savedDot;
+ } else if (isTbss) {
+ // NOBITS TLS sections are similar. Additionally save the end address.
+ ctx->tbssAddr = dot;
+ dot = savedDot;
+ }
}
static bool isDiscardable(OutputSection &sec) {
diff --git a/lld/ELF/LinkerScript.h b/lld/ELF/LinkerScript.h
index 0592c52acb84..d2487ae0f9d2 100644
--- a/lld/ELF/LinkerScript.h
+++ b/lld/ELF/LinkerScript.h
@@ -247,11 +247,11 @@ class LinkerScript final {
// not be used outside of the scope of a call to the above functions.
struct AddressState {
AddressState();
- uint64_t threadBssOffset = 0;
OutputSection *outSec = nullptr;
MemoryRegion *memRegion = nullptr;
MemoryRegion *lmaRegion = nullptr;
uint64_t lmaOffset = 0;
+ uint64_t tbssAddr = 0;
};
llvm::DenseMap<StringRef, OutputSection *> nameToOutputSection;
diff --git a/lld/ELF/Relocations.cpp b/lld/ELF/Relocations.cpp
index e3cc210972b2..537859f9e0b5 100644
--- a/lld/ELF/Relocations.cpp
+++ b/lld/ELF/Relocations.cpp
@@ -527,6 +527,13 @@ static SmallSet<SharedSymbol *, 4> getSymbolsAt(SharedSymbol &ss) {
if (auto *alias = dyn_cast_or_null<SharedSymbol>(sym))
ret.insert(alias);
}
+
+ // The loop does not check SHT_GNU_verneed, so ret does not contain
+ // non-default version symbols. If ss has a non-default version, ret won't
+ // contain ss. Just add ss unconditionally. If a non-default version alias is
+ // separately copy relocated, it and ss will have different addresses.
+ // Fortunately this case is impractical and fails with GNU ld as well.
+ ret.insert(&ss);
return ret;
}
diff --git a/lld/ELF/ScriptParser.cpp b/lld/ELF/ScriptParser.cpp
index 2c980eb810c7..1c743fd47747 100644
--- a/lld/ELF/ScriptParser.cpp
+++ b/lld/ELF/ScriptParser.cpp
@@ -1496,9 +1496,9 @@ void ScriptParser::readAnonymousDeclaration() {
std::vector<SymbolVersion> globals;
std::tie(locals, globals) = readSymbols();
for (const SymbolVersion &pat : locals)
- config->versionDefinitions[VER_NDX_LOCAL].patterns.push_back(pat);
+ config->versionDefinitions[VER_NDX_LOCAL].localPatterns.push_back(pat);
for (const SymbolVersion &pat : globals)
- config->versionDefinitions[VER_NDX_GLOBAL].patterns.push_back(pat);
+ config->versionDefinitions[VER_NDX_GLOBAL].nonLocalPatterns.push_back(pat);
expect(";");
}
@@ -1510,13 +1510,12 @@ void ScriptParser::readVersionDeclaration(StringRef verStr) {
std::vector<SymbolVersion> locals;
std::vector<SymbolVersion> globals;
std::tie(locals, globals) = readSymbols();
- for (const SymbolVersion &pat : locals)
- config->versionDefinitions[VER_NDX_LOCAL].patterns.push_back(pat);
// Create a new version definition and add that to the global symbols.
VersionDefinition ver;
ver.name = verStr;
- ver.patterns = globals;
+ ver.nonLocalPatterns = std::move(globals);
+ ver.localPatterns = std::move(locals);
ver.id = config->versionDefinitions.size();
config->versionDefinitions.push_back(ver);
diff --git a/lld/ELF/SymbolTable.cpp b/lld/ELF/SymbolTable.cpp
index 70aea288c53f..22e6b4f92898 100644
--- a/lld/ELF/SymbolTable.cpp
+++ b/lld/ELF/SymbolTable.cpp
@@ -134,9 +134,20 @@ static bool canBeVersioned(const Symbol &sym) {
StringMap<std::vector<Symbol *>> &SymbolTable::getDemangledSyms() {
if (!demangledSyms) {
demangledSyms.emplace();
+ std::string demangled;
for (Symbol *sym : symVector)
- if (canBeVersioned(*sym))
- (*demangledSyms)[demangleItanium(sym->getName())].push_back(sym);
+ if (canBeVersioned(*sym)) {
+ StringRef name = sym->getName();
+ size_t pos = name.find('@');
+ if (pos == std::string::npos)
+ demangled = demangleItanium(name);
+ else if (pos + 1 == name.size() || name[pos + 1] == '@')
+ demangled = demangleItanium(name.substr(0, pos));
+ else
+ demangled =
+ (demangleItanium(name.substr(0, pos)) + name.substr(pos)).str();
+ (*demangledSyms)[demangled].push_back(sym);
+ }
}
return *demangledSyms;
}
@@ -150,19 +161,29 @@ std::vector<Symbol *> SymbolTable::findByVersion(SymbolVersion ver) {
return {};
}
-std::vector<Symbol *> SymbolTable::findAllByVersion(SymbolVersion ver) {
+std::vector<Symbol *> SymbolTable::findAllByVersion(SymbolVersion ver,
+ bool includeNonDefault) {
std::vector<Symbol *> res;
SingleStringMatcher m(ver.name);
+ auto check = [&](StringRef name) {
+ size_t pos = name.find('@');
+ if (!includeNonDefault)
+ return pos == StringRef::npos;
+ return !(pos + 1 < name.size() && name[pos + 1] == '@');
+ };
if (ver.isExternCpp) {
for (auto &p : getDemangledSyms())
if (m.match(p.first()))
- res.insert(res.end(), p.second.begin(), p.second.end());
+ for (Symbol *sym : p.second)
+ if (check(sym->getName()))
+ res.push_back(sym);
return res;
}
for (Symbol *sym : symVector)
- if (canBeVersioned(*sym) && m.match(sym->getName()))
+ if (canBeVersioned(*sym) && check(sym->getName()) &&
+ m.match(sym->getName()))
res.push_back(sym);
return res;
}
@@ -172,7 +193,7 @@ void SymbolTable::handleDynamicList() {
for (SymbolVersion &ver : config->dynamicList) {
std::vector<Symbol *> syms;
if (ver.hasWildcard)
- syms = findAllByVersion(ver);
+ syms = findAllByVersion(ver, /*includeNonDefault=*/true);
else
syms = findByVersion(ver);
@@ -181,21 +202,13 @@ void SymbolTable::handleDynamicList() {
}
}
-// Set symbol versions to symbols. This function handles patterns
-// containing no wildcard characters.
-void SymbolTable::assignExactVersion(SymbolVersion ver, uint16_t versionId,
- StringRef versionName) {
- if (ver.hasWildcard)
- return;
-
+// Set symbol versions to symbols. This function handles patterns containing no
+// wildcard characters. Return false if no symbol definition matches ver.
+bool SymbolTable::assignExactVersion(SymbolVersion ver, uint16_t versionId,
+ StringRef versionName,
+ bool includeNonDefault) {
// Get a list of symbols which we need to assign the version to.
std::vector<Symbol *> syms = findByVersion(ver);
- if (syms.empty()) {
- if (!config->undefinedVersion)
- error("version script assignment of '" + versionName + "' to symbol '" +
- ver.name + "' failed: symbol not defined");
- return;
- }
auto getName = [](uint16_t ver) -> std::string {
if (ver == VER_NDX_LOCAL)
@@ -207,10 +220,11 @@ void SymbolTable::assignExactVersion(SymbolVersion ver, uint16_t versionId,
// Assign the version.
for (Symbol *sym : syms) {
- // Skip symbols containing version info because symbol versions
- // specified by symbol names take precedence over version scripts.
- // See parseSymbolVersion().
- if (sym->getName().contains('@'))
+ // For a non-local versionId, skip symbols containing version info because
+ // symbol versions specified by symbol names take precedence over version
+ // scripts. See parseSymbolVersion().
+ if (!includeNonDefault && versionId != VER_NDX_LOCAL &&
+ sym->getName().contains('@'))
continue;
// If the version has not been assigned, verdefIndex is -1. Use an arbitrary
@@ -225,13 +239,15 @@ void SymbolTable::assignExactVersion(SymbolVersion ver, uint16_t versionId,
warn("attempt to reassign symbol '" + ver.name + "' of " +
getName(sym->versionId) + " to " + getName(versionId));
}
+ return !syms.empty();
}
-void SymbolTable::assignWildcardVersion(SymbolVersion ver, uint16_t versionId) {
+void SymbolTable::assignWildcardVersion(SymbolVersion ver, uint16_t versionId,
+ bool includeNonDefault) {
// Exact matching takes precedence over fuzzy matching,
// so we set a version to a symbol only if no version has been assigned
// to the symbol. This behavior is compatible with GNU.
- for (Symbol *sym : findAllByVersion(ver))
+ for (Symbol *sym : findAllByVersion(ver, includeNonDefault))
if (sym->verdefIndex == UINT32_C(-1)) {
sym->verdefIndex = 0;
sym->versionId = versionId;
@@ -244,26 +260,60 @@ void SymbolTable::assignWildcardVersion(SymbolVersion ver, uint16_t versionId) {
// script file, the script does not actually define any symbol version,
// but just specifies symbols visibilities.
void SymbolTable::scanVersionScript() {
+ SmallString<128> buf;
// First, we assign versions to exact matching symbols,
// i.e. version definitions not containing any glob meta-characters.
- for (VersionDefinition &v : config->versionDefinitions)
- for (SymbolVersion &pat : v.patterns)
- assignExactVersion(pat, v.id, v.name);
+ std::vector<Symbol *> syms;
+ for (VersionDefinition &v : config->versionDefinitions) {
+ auto assignExact = [&](SymbolVersion pat, uint16_t id, StringRef ver) {
+ bool found =
+ assignExactVersion(pat, id, ver, /*includeNonDefault=*/false);
+ buf.clear();
+ found |= assignExactVersion({(pat.name + "@" + v.name).toStringRef(buf),
+ pat.isExternCpp, /*hasWildCard=*/false},
+ id, ver, /*includeNonDefault=*/true);
+ if (!found && !config->undefinedVersion)
+ errorOrWarn("version script assignment of '" + ver + "' to symbol '" +
+ pat.name + "' failed: symbol not defined");
+ };
+ for (SymbolVersion &pat : v.nonLocalPatterns)
+ if (!pat.hasWildcard)
+ assignExact(pat, v.id, v.name);
+ for (SymbolVersion pat : v.localPatterns)
+ if (!pat.hasWildcard)
+ assignExact(pat, VER_NDX_LOCAL, "local");
+ }
// Next, assign versions to wildcards that are not "*". Note that because the
// last match takes precedence over previous matches, we iterate over the
// definitions in the reverse order.
- for (VersionDefinition &v : llvm::reverse(config->versionDefinitions))
- for (SymbolVersion &pat : v.patterns)
+ auto assignWildcard = [&](SymbolVersion pat, uint16_t id, StringRef ver) {
+ assignWildcardVersion(pat, id, /*includeNonDefault=*/false);
+ buf.clear();
+ assignWildcardVersion({(pat.name + "@" + ver).toStringRef(buf),
+ pat.isExternCpp, /*hasWildCard=*/true},
+ id,
+ /*includeNonDefault=*/true);
+ };
+ for (VersionDefinition &v : llvm::reverse(config->versionDefinitions)) {
+ for (SymbolVersion &pat : v.nonLocalPatterns)
if (pat.hasWildcard && pat.name != "*")
- assignWildcardVersion(pat, v.id);
+ assignWildcard(pat, v.id, v.name);
+ for (SymbolVersion &pat : v.localPatterns)
+ if (pat.hasWildcard && pat.name != "*")
+ assignWildcard(pat, VER_NDX_LOCAL, v.name);
+ }
// Then, assign versions to "*". In GNU linkers they have lower priority than
// other wildcards.
- for (VersionDefinition &v : config->versionDefinitions)
- for (SymbolVersion &pat : v.patterns)
+ for (VersionDefinition &v : config->versionDefinitions) {
+ for (SymbolVersion &pat : v.nonLocalPatterns)
if (pat.hasWildcard && pat.name == "*")
- assignWildcardVersion(pat, v.id);
+ assignWildcard(pat, v.id, v.name);
+ for (SymbolVersion &pat : v.localPatterns)
+ if (pat.hasWildcard && pat.name == "*")
+ assignWildcard(pat, VER_NDX_LOCAL, v.name);
+ }
// Symbol themselves might know their versions because symbols
// can contain versions in the form of <name>@<version>.
diff --git a/lld/ELF/SymbolTable.h b/lld/ELF/SymbolTable.h
index 507af8d2be75..54c4b1169ed1 100644
--- a/lld/ELF/SymbolTable.h
+++ b/lld/ELF/SymbolTable.h
@@ -65,12 +65,14 @@ public:
private:
std::vector<Symbol *> findByVersion(SymbolVersion ver);
- std::vector<Symbol *> findAllByVersion(SymbolVersion ver);
+ std::vector<Symbol *> findAllByVersion(SymbolVersion ver,
+ bool includeNonDefault);
llvm::StringMap<std::vector<Symbol *>> &getDemangledSyms();
- void assignExactVersion(SymbolVersion ver, uint16_t versionId,
- StringRef versionName);
- void assignWildcardVersion(SymbolVersion ver, uint16_t versionId);
+ bool assignExactVersion(SymbolVersion ver, uint16_t versionId,
+ StringRef versionName, bool includeNonDefault);
+ void assignWildcardVersion(SymbolVersion ver, uint16_t versionId,
+ bool includeNonDefault);
// The order the global symbols are in is not defined. We can use an arbitrary
// order, but it has to be reproducible. That is true even when cross linking.
diff --git a/lld/ELF/Symbols.cpp b/lld/ELF/Symbols.cpp
index 496be33dd182..cef303f05f89 100644
--- a/lld/ELF/Symbols.cpp
+++ b/lld/ELF/Symbols.cpp
@@ -208,6 +208,9 @@ OutputSection *Symbol::getOutputSection() const {
// If a symbol name contains '@', the characters after that is
// a symbol version name. This function parses that.
void Symbol::parseSymbolVersion() {
+ // Return if localized by a local: pattern in a version script.
+ if (versionId == VER_NDX_LOCAL)
+ return;
StringRef s = getName();
size_t pos = s.find('@');
if (pos == 0 || pos == StringRef::npos)
diff --git a/lld/docs/ReleaseNotes.rst b/lld/docs/ReleaseNotes.rst
index a52ee4348f78..50af6e7d7939 100644
--- a/lld/docs/ReleaseNotes.rst
+++ b/lld/docs/ReleaseNotes.rst
@@ -24,6 +24,13 @@ Non-comprehensive list of changes in this release
ELF Improvements
----------------
+* ``-z start-stop-gc`` is now supported and becomes the default.
+ (`D96914 <https://reviews.llvm.org/D96914>`_)
+ (`rG6d2d3bd0 <https://reviews.llvm.org/rG6d2d3bd0a61f5fc7fd9f61f48bc30e9ca77cc619>`_)
+* ``--shuffle-sections=<seed>`` has been changed to ``--shuffle-sections=<section-glob>=<seed>``.
+ If seed is -1, the matched input sections are reversed.
+ (`D98445 <https://reviews.llvm.org/D98445>`_)
+ (`D98679 <https://reviews.llvm.org/D98679>`_)
* ``-Bsymbolic -Bsymbolic-functions`` has been changed to behave the same as ``-Bsymbolic-functions``. This matches GNU ld.
(`D102461 <https://reviews.llvm.org/D102461>`_)
* ``-Bno-symbolic`` has been added.
@@ -32,6 +39,75 @@ ELF Improvements
(`D103303 <https://reviews.llvm.org/D103303>`_)
* ``-Bsymbolic-non-weak-functions`` has been added as a ``STB_GLOBAL`` subset of ``-Bsymbolic-functions``.
(`D102570 <https://reviews.llvm.org/D102570>`_)
+* ``--no-allow-shlib-undefined`` has been improved to catch more cases.
+ (`D101996 <https://reviews.llvm.org/D101996>`_)
+* ``__rela_iplt_start`` is no longer defined for -pie/-shared.
+ This makes GCC/Clang ``-static-pie`` built executables work.
+ (`rG8cb78e99 <https://reviews.llvm.org/rf8cb78e99aae9aa3f89f7bfe667db2c5b767f21f>`_)
+* IRELATIVE/TLSDESC relocations now support ``-z rel``.
+ (`D100544 <https://reviews.llvm.org/D100544>`_)
+* Section groups with a zero flag are now supported.
+ This is used by ``comdat nodeduplicate`` in LLVM IR.
+ (`D96636 <https://reviews.llvm.org/D96636>`_)
+ (`D106228 <https://reviews.llvm.org/D106228>`_)
+* Defined symbols are now resolved before undefined symbols to stabilize the bheavior of archive member extraction.
+ (`D95985 <https://reviews.llvm.org/D95985>`_)
+* ``STB_WEAK`` symbols are now preferred over COMMON symbols as a fix to a ``--fortran-common`` regression.
+ (`D105945 <https://reviews.llvm.org/D105945>`_)
+* Absolute relocations referencing undef weak now produce dynamic relocations for -pie, matching GOT-generating relocations.
+ (`D105164 <https://reviews.llvm.org/D105164>`_)
+* Exported symbols are now communicated to the LTO library so as to make LTO
+ based whole program devirtualization (``-flto=thin -fwhole-program-vtables``)
+ work with shared objects.
+ (`D91583 <https://reviews.llvm.org/D91583>`_)
+* Whole program devirtualization now respects ``local:`` version nodes in a version script.
+ (`D98220 <https://reviews.llvm.org/D98220>`_)
+ (`D98686 <https://reviews.llvm.org/D98686>`_)
+* ``local:`` version nodes in a version script now apply to non-default version symbols.
+ (`D107234 <https://reviews.llvm.org/D107234>`_)
+* If an object file defines both ``foo`` and ``foo@v1``, now only ``foo@v1`` will be in the output.
+ (`D107235 <https://reviews.llvm.org/D107235>`_)
+* Copy relocations on non-default version symbols are now supported.
+ (`D107535 <https://reviews.llvm.org/D107535>`_)
+
+Linker script changes:
+
+* ``.``, ``$``, and double quotes can now be used in symbol names in expressions.
+ (`D98306 <https://reviews.llvm.org/D98306>`_)
+ (`rGe7a7ad13 <https://reviews.llvm.org/rGe7a7ad134fe182aad190cb3ebc441164470e92f5>`_)
+* Fixed value of ``.`` in the output section description of ``.tbss``.
+ (`D107288 <https://reviews.llvm.org/D107288>`_)
+* ``NOLOAD`` sections can now be placed in a ``PT_LOAD`` program header.
+ (`D103815 <https://reviews.llvm.org/D103815>`_)
+* ``OUTPUT_FORMAT(default, big, little)`` now consults ``-EL`` and ``-EB``.
+ (`D96214 <https://reviews.llvm.org/D96214>`_)
+* The ``OVERWRITE_SECTIONS`` command has been added.
+ (`D103303 <https://reviews.llvm.org/D103303>`_)
+* The section order within an ``INSERT AFTER`` command is now preserved.
+ (`D105158 <https://reviews.llvm.org/D105158>`_)
+
+Architecture specific changes:
+
+* aarch64_be is now supported.
+ (`D96188 <https://reviews.llvm.org/D96188>`_)
+* The AMDGPU port now supports ``--amdhsa-code-object-version=4`` object files;
+ (`D95811 <https://reviews.llvm.org/D95811>`_)
+* The ARM port now accounts for PC biases in range extension thunk creation.
+ (`D97550 <https://reviews.llvm.org/D97550>`_)
+* The AVR port now computes ``e_flags``.
+ (`D99754 <https://reviews.llvm.org/D99754>`_)
+* The Mips port now omits unneeded dynamic relocations for PIE non-preemptible TLS.
+ (`D101382 <https://reviews.llvm.org/D101382>`_)
+* The PowerPC port now supports ``--power10-stubs=no`` to omit Power10 instructions from call stubs.
+ (`D94625 <https://reviews.llvm.org/D94625>`_)
+* Fixed a thunk creation bug in the PowerPC port when TOC/NOTOC calls are mixed.
+ (`D101837 <https://reviews.llvm.org/D101837>`_)
+* The RISC-V port now resolves undefined weak relocations to the current location if not using PLT.
+ (`D103001 <https://reviews.llvm.org/D103001>`_)
+* ``R_386_GOTOFF`` relocations from .debug_info are now allowed to be compatible with GCC.
+ (`D95994 <https://reviews.llvm.org/D95994>`_)
+* ``gotEntrySize`` has been added to improve support for the ILP32 ABI of x86-64.
+ (`D102569 <https://reviews.llvm.org/D102569>`_)
Breaking changes
----------------
@@ -42,17 +118,75 @@ Breaking changes
COFF Improvements
-----------------
-* ...
-
-MinGW Improvements
-------------------
+* Avoid thread exhaustion when running on 32 bit Windows.
+ (`D105506 <https://reviews.llvm.org/D105506>`_)
-* ...
+* Improve terminating the process on Windows while a thread pool might be
+ running. (`D102944 <https://reviews.llvm.org/D102944>`_)
-MachO Improvements
+MinGW Improvements
------------------
-* Item 1.
+* Support for linking directly against a DLL without using an import library
+ has been added. (`D104530 <https://reviews.llvm.org/D104530>`_ and
+ `D104531 <https://reviews.llvm.org/D104531>`_)
+
+* Fix linking with ``--export-all-symbols`` in combination with
+ ``-function-sections``. (`D101522 <https://reviews.llvm.org/D101522>`_ and
+ `D101615 <https://reviews.llvm.org/D101615>`_)
+
+* Fix automatic export of symbols from LTO objects.
+ (`D101569 <https://reviews.llvm.org/D101569>`_)
+
+* Accept more spellings of some options.
+ (`D107237 <https://reviews.llvm.org/D107237>`_ and
+ `D107253 <https://reviews.llvm.org/D107253>`_)
+
+Mach-O Improvements
+-------------------
+
+The Mach-O backend is now able to link several large, real-world programs,
+though we are still working out the kinks.
+
+* arm64 is now supported as a target. (`D88629 <https://reviews.llvm.org/D88629>`_)
+* arm64_32 is now supported as a target. (`D99822 <https://reviews.llvm.org/D99822>`_)
+* Branch-range-extension thunks are now supported. (`D100818 <https://reviews.llvm.org/D100818>`_)
+* ``-dead_strip`` is now supported. (`D103324 <https://reviews.llvm.org/D103324>`_)
+* Support for identical code folding (``--icf=all``) has been added.
+ (`D103292 <https://reviews.llvm.org/D103292>`_)
+* Support for special ``$start`` and ``$end`` symbols for segment & sections has been
+ added. (`D106767 <https://reviews.llvm.org/D106767>`_, `D106629 <https://reviews.llvm.org/D106629>`_)
+* ``$ld$previous`` symbols are now supported. (`D103505 <https://reviews.llvm.org/D103505 >`_)
+* ``$ld$install_name`` symbols are now supported. (`D103746 <https://reviews.llvm.org/D103746>`_)
+* ``__mh_*_header`` symbols are now supported. (`D97007 <https://reviews.llvm.org/D97007>`_)
+* LC_CODE_SIGNATURE is now supported. (`D96164 <https://reviews.llvm.org/D96164>`_)
+* LC_FUNCTION_STARTS is now supported. (`D97260 <https://reviews.llvm.org/D97260>`_)
+* LC_DATA_IN_CODE is now supported. (`D103006 <https://reviews.llvm.org/D103006>`_)
+* Bind opcodes are more compactly encoded. (`D106128 <https://reviews.llvm.org/D106128>`_,
+ `D105075 <https://reviews.llvm.org/D105075>`_)
+* LTO cache support has been added. (`D105922 <https://reviews.llvm.org/D105922>`_)
+* ``-application_extension`` is now supported. (`D105818 <https://reviews.llvm.org/D105818>`_)
+* ``-export_dynamic`` is now partially supported. (`D105482 <https://reviews.llvm.org/D105482>`_)
+* ``-arch_multiple`` is now supported. (`D105450 <https://reviews.llvm.org/D105450>`_)
+* ``-final_output`` is now supported. (`D105449 <https://reviews.llvm.org/D105449>`_)
+* ``-umbrella`` is now supported. (`D105448 <https://reviews.llvm.org/D105448>`_)
+* ``--print-dylib-search`` is now supported. (`D103985 <https://reviews.llvm.org/D103985>`_)
+* ``-force_load_swift_libs`` is now supported. (`D103709 <https://reviews.llvm.org/D103709>`_)
+* ``-reexport_framework``, ``-reexport_library``, ``-reexport-l`` are now supported.
+ (`D103497 <https://reviews.llvm.org/D103497>`_)
+* ``.weak_def_can_be_hidden`` is now supported. (`D101080 <https://reviews.llvm.org/D101080>`_)
+* ``-add_ast_path`` is now supported. (`D100076 <https://reviews.llvm.org/D100076>`_)
+* ``-segprot`` is now supported. (`D99389 <https://reviews.llvm.org/D99389>`_)
+* ``-dependency_info`` is now partially supported. (`D98559 <https://reviews.llvm.org/D98559>`_)
+* ``--time-trace`` is now supported. (`D98419 <https://reviews.llvm.org/D98419>`_)
+* ``-mark_dead_strippable_dylib`` is now supported. (`D98262 <https://reviews.llvm.org/D98262>`_)
+* ``-[un]exported_symbol[s_list]`` is now supported. (`D98223 <https://reviews.llvm.org/D98223>`_)
+* ``-flat_namespace`` is now supported. (`D97641 <https://reviews.llvm.org/D97641>`_)
+* ``-rename_section`` and ``-rename_segment`` are now supported. (`D97600 <https://reviews.llvm.org/D97600>`_)
+* ``-bundle_loader`` is now supported. (`D95913 <https://reviews.llvm.org/D95913>`_)
+* ``-map`` is now partially supported. (`D98323 <https://reviews.llvm.org/D98323>`_)
+
+There were numerous other bug-fixes as well.
WebAssembly Improvements
------------------------
diff --git a/lldb/source/Commands/CommandObjectMemoryTag.cpp b/lldb/source/Commands/CommandObjectMemoryTag.cpp
index 1dfb32a92f3b..840f81719d7d 100644
--- a/lldb/source/Commands/CommandObjectMemoryTag.cpp
+++ b/lldb/source/Commands/CommandObjectMemoryTag.cpp
@@ -7,8 +7,11 @@
//===----------------------------------------------------------------------===//
#include "CommandObjectMemoryTag.h"
+#include "lldb/Host/OptionParser.h"
#include "lldb/Interpreter/CommandReturnObject.h"
#include "lldb/Interpreter/OptionArgParser.h"
+#include "lldb/Interpreter/OptionGroupFormat.h"
+#include "lldb/Interpreter/OptionValueString.h"
#include "lldb/Target/Process.h"
using namespace lldb;
@@ -21,7 +24,8 @@ class CommandObjectMemoryTagRead : public CommandObjectParsed {
public:
CommandObjectMemoryTagRead(CommandInterpreter &interpreter)
: CommandObjectParsed(interpreter, "tag",
- "Read memory tags for the given range of memory.",
+ "Read memory tags for the given range of memory."
+ " Mismatched tags will be marked.",
nullptr,
eCommandRequiresTarget | eCommandRequiresProcess |
eCommandProcessMustBePaused) {
@@ -97,16 +101,17 @@ protected:
return false;
}
- result.AppendMessageWithFormatv("Logical tag: {0:x}",
- tag_manager->GetLogicalTag(start_addr));
+ lldb::addr_t logical_tag = tag_manager->GetLogicalTag(start_addr);
+ result.AppendMessageWithFormatv("Logical tag: {0:x}", logical_tag);
result.AppendMessage("Allocation tags:");
addr_t addr = tagged_range->GetRangeBase();
for (auto tag : *tags) {
addr_t next_addr = addr + tag_manager->GetGranuleSize();
// Showing tagged adresses here until we have non address bit handling
- result.AppendMessageWithFormatv("[{0:x}, {1:x}): {2:x}", addr, next_addr,
- tag);
+ result.AppendMessageWithFormatv("[{0:x}, {1:x}): {2:x}{3}", addr,
+ next_addr, tag,
+ logical_tag == tag ? "" : " (mismatch)");
addr = next_addr;
}
@@ -115,6 +120,168 @@ protected:
}
};
+#define LLDB_OPTIONS_memory_tag_write
+#include "CommandOptions.inc"
+
+class CommandObjectMemoryTagWrite : public CommandObjectParsed {
+public:
+ class OptionGroupTagWrite : public OptionGroup {
+ public:
+ OptionGroupTagWrite() : OptionGroup(), m_end_addr(LLDB_INVALID_ADDRESS) {}
+
+ ~OptionGroupTagWrite() override = default;
+
+ llvm::ArrayRef<OptionDefinition> GetDefinitions() override {
+ return llvm::makeArrayRef(g_memory_tag_write_options);
+ }
+
+ Status SetOptionValue(uint32_t option_idx, llvm::StringRef option_value,
+ ExecutionContext *execution_context) override {
+ Status status;
+ const int short_option =
+ g_memory_tag_write_options[option_idx].short_option;
+
+ switch (short_option) {
+ case 'e':
+ m_end_addr = OptionArgParser::ToAddress(execution_context, option_value,
+ LLDB_INVALID_ADDRESS, &status);
+ break;
+ default:
+ llvm_unreachable("Unimplemented option");
+ }
+
+ return status;
+ }
+
+ void OptionParsingStarting(ExecutionContext *execution_context) override {
+ m_end_addr = LLDB_INVALID_ADDRESS;
+ }
+
+ lldb::addr_t m_end_addr;
+ };
+
+ CommandObjectMemoryTagWrite(CommandInterpreter &interpreter)
+ : CommandObjectParsed(interpreter, "tag",
+ "Write memory tags starting from the granule that "
+ "contains the given address.",
+ nullptr,
+ eCommandRequiresTarget | eCommandRequiresProcess |
+ eCommandProcessMustBePaused),
+ m_option_group(), m_tag_write_options() {
+ // Address
+ m_arguments.push_back(
+ CommandArgumentEntry{CommandArgumentData(eArgTypeAddressOrExpression)});
+ // One or more tag values
+ m_arguments.push_back(CommandArgumentEntry{
+ CommandArgumentData(eArgTypeValue, eArgRepeatPlus)});
+
+ m_option_group.Append(&m_tag_write_options);
+ m_option_group.Finalize();
+ }
+
+ ~CommandObjectMemoryTagWrite() override = default;
+
+ Options *GetOptions() override { return &m_option_group; }
+
+protected:
+ bool DoExecute(Args &command, CommandReturnObject &result) override {
+ if (command.GetArgumentCount() < 2) {
+ result.AppendError("wrong number of arguments; expected "
+ "<address-expression> <tag> [<tag> [...]]");
+ return false;
+ }
+
+ Status error;
+ addr_t start_addr = OptionArgParser::ToAddress(
+ &m_exe_ctx, command[0].ref(), LLDB_INVALID_ADDRESS, &error);
+ if (start_addr == LLDB_INVALID_ADDRESS) {
+ result.AppendErrorWithFormatv("Invalid address expression, {0}",
+ error.AsCString());
+ return false;
+ }
+
+ command.Shift(); // shift off start address
+
+ std::vector<lldb::addr_t> tags;
+ for (auto &entry : command) {
+ lldb::addr_t tag_value;
+ // getAsInteger returns true on failure
+ if (entry.ref().getAsInteger(0, tag_value)) {
+ result.AppendErrorWithFormat(
+ "'%s' is not a valid unsigned decimal string value.\n",
+ entry.c_str());
+ return false;
+ }
+ tags.push_back(tag_value);
+ }
+
+ Process *process = m_exe_ctx.GetProcessPtr();
+ llvm::Expected<const MemoryTagManager *> tag_manager_or_err =
+ process->GetMemoryTagManager();
+
+ if (!tag_manager_or_err) {
+ result.SetError(Status(tag_manager_or_err.takeError()));
+ return false;
+ }
+
+ const MemoryTagManager *tag_manager = *tag_manager_or_err;
+
+ MemoryRegionInfos memory_regions;
+ // If this fails the list of regions is cleared, so we don't need to read
+ // the return status here.
+ process->GetMemoryRegions(memory_regions);
+
+ // We have to assume start_addr is not granule aligned.
+ // So if we simply made a range:
+ // (start_addr, start_addr + (N * granule_size))
+ // We would end up with a range that isn't N granules but N+1
+ // granules. To avoid this we'll align the start first using the method that
+ // doesn't check memory attributes. (if the final range is untagged we'll
+ // handle that error later)
+ lldb::addr_t aligned_start_addr =
+ tag_manager->ExpandToGranule(MemoryTagManager::TagRange(start_addr, 1))
+ .GetRangeBase();
+
+ lldb::addr_t end_addr = 0;
+ // When you have an end address you want to align the range like tag read
+ // does. Meaning, align the start down (which we've done) and align the end
+ // up.
+ if (m_tag_write_options.m_end_addr != LLDB_INVALID_ADDRESS)
+ end_addr = m_tag_write_options.m_end_addr;
+ else
+ // Without an end address assume number of tags matches number of granules
+ // to write to
+ end_addr =
+ aligned_start_addr + (tags.size() * tag_manager->GetGranuleSize());
+
+ // Now we've aligned the start address so if we ask for another range
+ // using the number of tags N, we'll get back a range that is also N
+ // granules in size.
+ llvm::Expected<MemoryTagManager::TagRange> tagged_range =
+ tag_manager->MakeTaggedRange(aligned_start_addr, end_addr,
+ memory_regions);
+
+ if (!tagged_range) {
+ result.SetError(Status(tagged_range.takeError()));
+ return false;
+ }
+
+ Status status = process->WriteMemoryTags(tagged_range->GetRangeBase(),
+ tagged_range->GetByteSize(), tags);
+
+ if (status.Fail()) {
+ result.SetError(status);
+ return false;
+ }
+
+ result.SetStatus(eReturnStatusSuccessFinishResult);
+ return true;
+ }
+
+ OptionGroupOptions m_option_group;
+ OptionGroupTagWrite m_tag_write_options;
+};
+
CommandObjectMemoryTag::CommandObjectMemoryTag(CommandInterpreter &interpreter)
: CommandObjectMultiword(
interpreter, "tag", "Commands for manipulating memory tags",
@@ -123,6 +290,11 @@ CommandObjectMemoryTag::CommandObjectMemoryTag(CommandInterpreter &interpreter)
new CommandObjectMemoryTagRead(interpreter));
read_command_object->SetCommandName("memory tag read");
LoadSubCommand("read", read_command_object);
+
+ CommandObjectSP write_command_object(
+ new CommandObjectMemoryTagWrite(interpreter));
+ write_command_object->SetCommandName("memory tag write");
+ LoadSubCommand("write", write_command_object);
}
CommandObjectMemoryTag::~CommandObjectMemoryTag() = default;
diff --git a/lldb/source/Commands/Options.td b/lldb/source/Commands/Options.td
index 9c9b7c6e9b82..6abb4788bed0 100644
--- a/lldb/source/Commands/Options.td
+++ b/lldb/source/Commands/Options.td
@@ -504,6 +504,14 @@ let Command = "memory write" in {
Desc<"Start writing bytes from an offset within the input file.">;
}
+let Command = "memory tag write" in {
+ def memory_write_end_addr : Option<"end-addr", "e">, Group<1>,
+ Arg<"AddressOrExpression">, Desc<
+ "Set tags for start address to end-addr, repeating tags as needed"
+ " to cover the range. (instead of calculating the range from the"
+ " number of tags given)">;
+}
+
let Command = "register read" in {
def register_read_alternate : Option<"alternate", "A">,
Desc<"Display register names using the alternate register name if there "
diff --git a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServerLLGS.cpp b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServerLLGS.cpp
index 5e69b5793f9f..8e1f6bc29a6f 100644
--- a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServerLLGS.cpp
+++ b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServerLLGS.cpp
@@ -3474,15 +3474,31 @@ GDBRemoteCommunicationServerLLGS::Handle_qMemTags(
if (packet.GetBytesLeft() < 1 || packet.GetChar() != ':')
return SendIllFormedResponse(packet, invalid_type_err);
- int32_t type =
- packet.GetS32(std::numeric_limits<int32_t>::max(), /*base=*/16);
- if (type == std::numeric_limits<int32_t>::max() ||
+ // Type is a signed integer but packed into the packet as its raw bytes.
+ // However, our GetU64 uses strtoull which allows +/-. We do not want this.
+ const char *first_type_char = packet.Peek();
+ if (first_type_char && (*first_type_char == '+' || *first_type_char == '-'))
+ return SendIllFormedResponse(packet, invalid_type_err);
+
+ // Extract type as unsigned then cast to signed.
+ // Using a uint64_t here so that we have some value outside of the 32 bit
+ // range to use as the invalid return value.
+ uint64_t raw_type =
+ packet.GetU64(std::numeric_limits<uint64_t>::max(), /*base=*/16);
+
+ if ( // Make sure the cast below would be valid
+ raw_type > std::numeric_limits<uint32_t>::max() ||
// To catch inputs like "123aardvark" that will parse but clearly aren't
// valid in this case.
packet.GetBytesLeft()) {
return SendIllFormedResponse(packet, invalid_type_err);
}
+ // First narrow to 32 bits otherwise the copy into type would take
+ // the wrong 4 bytes on big endian.
+ uint32_t raw_type_32 = raw_type;
+ int32_t type = reinterpret_cast<int32_t &>(raw_type_32);
+
StreamGDBRemote response;
std::vector<uint8_t> tags;
Status error = m_current_process->ReadMemoryTags(type, addr, length, tags);
@@ -3552,7 +3568,11 @@ GDBRemoteCommunicationServerLLGS::Handle_QMemTags(
packet.GetU64(std::numeric_limits<uint64_t>::max(), /*base=*/16);
if (raw_type > std::numeric_limits<uint32_t>::max())
return SendIllFormedResponse(packet, invalid_type_err);
- int32_t type = static_cast<int32_t>(raw_type);
+
+ // First narrow to 32 bits. Otherwise the copy below would get the wrong
+ // 4 bytes on big endian.
+ uint32_t raw_type_32 = raw_type;
+ int32_t type = reinterpret_cast<int32_t &>(raw_type_32);
// Tag data
if (packet.GetBytesLeft() < 1 || packet.GetChar() != ':')
diff --git a/lldb/source/Symbol/TypeSystem.cpp b/lldb/source/Symbol/TypeSystem.cpp
index 252b06e269d6..0b3f7e4f3bd4 100644
--- a/lldb/source/Symbol/TypeSystem.cpp
+++ b/lldb/source/Symbol/TypeSystem.cpp
@@ -223,62 +223,32 @@ void TypeSystemMap::ForEach(std::function<bool(TypeSystem *)> const &callback) {
llvm::Expected<TypeSystem &> TypeSystemMap::GetTypeSystemForLanguage(
lldb::LanguageType language,
llvm::Optional<CreateCallback> create_callback) {
- llvm::Error error = llvm::Error::success();
- assert(!error); // Check the success value when assertions are enabled
std::lock_guard<std::mutex> guard(m_mutex);
- if (m_clear_in_progress) {
- error = llvm::make_error<llvm::StringError>(
+ if (m_clear_in_progress)
+ return llvm::make_error<llvm::StringError>(
"Unable to get TypeSystem because TypeSystemMap is being cleared",
llvm::inconvertibleErrorCode());
- } else {
- collection::iterator pos = m_map.find(language);
- if (pos != m_map.end()) {
- auto *type_system = pos->second.get();
- if (type_system) {
- llvm::consumeError(std::move(error));
- return *type_system;
- }
- error = llvm::make_error<llvm::StringError>(
- "TypeSystem for language " +
- llvm::StringRef(Language::GetNameForLanguageType(language)) +
- " doesn't exist",
- llvm::inconvertibleErrorCode());
- return std::move(error);
- }
- for (const auto &pair : m_map) {
- if (pair.second && pair.second->SupportsLanguage(language)) {
- // Add a new mapping for "language" to point to an already existing
- // TypeSystem that supports this language
- m_map[language] = pair.second;
- if (pair.second.get()) {
- llvm::consumeError(std::move(error));
- return *pair.second.get();
- }
- error = llvm::make_error<llvm::StringError>(
- "TypeSystem for language " +
- llvm::StringRef(Language::GetNameForLanguageType(language)) +
- " doesn't exist",
- llvm::inconvertibleErrorCode());
- return std::move(error);
- }
- }
+ collection::iterator pos = m_map.find(language);
+ if (pos != m_map.end()) {
+ auto *type_system = pos->second.get();
+ if (type_system)
+ return *type_system;
+ return llvm::make_error<llvm::StringError>(
+ "TypeSystem for language " +
+ llvm::StringRef(Language::GetNameForLanguageType(language)) +
+ " doesn't exist",
+ llvm::inconvertibleErrorCode());
+ }
- if (!create_callback) {
- error = llvm::make_error<llvm::StringError>(
- "Unable to find type system for language " +
- llvm::StringRef(Language::GetNameForLanguageType(language)),
- llvm::inconvertibleErrorCode());
- } else {
- // Cache even if we get a shared pointer that contains a null type system
- // back
- TypeSystemSP type_system_sp = (*create_callback)();
- m_map[language] = type_system_sp;
- if (type_system_sp.get()) {
- llvm::consumeError(std::move(error));
- return *type_system_sp.get();
- }
- error = llvm::make_error<llvm::StringError>(
+ for (const auto &pair : m_map) {
+ if (pair.second && pair.second->SupportsLanguage(language)) {
+ // Add a new mapping for "language" to point to an already existing
+ // TypeSystem that supports this language
+ m_map[language] = pair.second;
+ if (pair.second.get())
+ return *pair.second.get();
+ return llvm::make_error<llvm::StringError>(
"TypeSystem for language " +
llvm::StringRef(Language::GetNameForLanguageType(language)) +
" doesn't exist",
@@ -286,7 +256,23 @@ llvm::Expected<TypeSystem &> TypeSystemMap::GetTypeSystemForLanguage(
}
}
- return std::move(error);
+ if (!create_callback)
+ return llvm::make_error<llvm::StringError>(
+ "Unable to find type system for language " +
+ llvm::StringRef(Language::GetNameForLanguageType(language)),
+ llvm::inconvertibleErrorCode());
+
+ // Cache even if we get a shared pointer that contains a null type system
+ // back
+ TypeSystemSP type_system_sp = (*create_callback)();
+ m_map[language] = type_system_sp;
+ if (type_system_sp.get())
+ return *type_system_sp.get();
+ return llvm::make_error<llvm::StringError>(
+ "TypeSystem for language " +
+ llvm::StringRef(Language::GetNameForLanguageType(language)) +
+ " doesn't exist",
+ llvm::inconvertibleErrorCode());
}
llvm::Expected<TypeSystem &>
diff --git a/llvm/include/llvm/Analysis/ValueTracking.h b/llvm/include/llvm/Analysis/ValueTracking.h
index 90ec742f18e6..f46e66641c08 100644
--- a/llvm/include/llvm/Analysis/ValueTracking.h
+++ b/llvm/include/llvm/Analysis/ValueTracking.h
@@ -744,6 +744,10 @@ constexpr unsigned MaxAnalysisRecursionDepth = 6;
/// minimum/maximum flavor.
CmpInst::Predicate getInverseMinMaxPred(SelectPatternFlavor SPF);
+ /// Return the minimum or maximum constant value for the specified integer
+ /// min/max flavor and type.
+ APInt getMinMaxLimit(SelectPatternFlavor SPF, unsigned BitWidth);
+
/// Check if the values in \p VL are select instructions that can be converted
/// to a min or max (vector) intrinsic. Returns the intrinsic ID, if such a
/// conversion is possible, together with a bool indicating whether all select
diff --git a/llvm/include/llvm/IR/Module.h b/llvm/include/llvm/IR/Module.h
index 81e29d9b86e8..97aea5aedf22 100644
--- a/llvm/include/llvm/IR/Module.h
+++ b/llvm/include/llvm/IR/Module.h
@@ -324,6 +324,9 @@ public:
/// name is not found.
GlobalValue *getNamedValue(StringRef Name) const;
+ /// Return the number of global values in the module.
+ unsigned getNumNamedValues() const;
+
/// Return a unique non-zero ID for the specified metadata kind. This ID is
/// uniqued across modules in the current LLVMContext.
unsigned getMDKindID(StringRef Name) const;
diff --git a/llvm/include/llvm/ProfileData/InstrProf.h b/llvm/include/llvm/ProfileData/InstrProf.h
index 08a934e6985f..c0cedb23bdcf 100644
--- a/llvm/include/llvm/ProfileData/InstrProf.h
+++ b/llvm/include/llvm/ProfileData/InstrProf.h
@@ -1104,6 +1104,7 @@ namespace RawInstrProf {
// Version 5: Bit 60 of FuncHash is reserved for the flag for the context
// sensitive records.
// Version 6: Added binary id.
+// Version 7: Reorder binary id and include version in signature.
const uint64_t Version = INSTR_PROF_RAW_VERSION;
template <class IntPtrT> inline uint64_t getMagic();
diff --git a/llvm/include/llvm/ProfileData/InstrProfData.inc b/llvm/include/llvm/ProfileData/InstrProfData.inc
index 08a642469627..7d2097cfc297 100644
--- a/llvm/include/llvm/ProfileData/InstrProfData.inc
+++ b/llvm/include/llvm/ProfileData/InstrProfData.inc
@@ -129,6 +129,7 @@ INSTR_PROF_VALUE_NODE(PtrToNodeT, llvm::Type::getInt8PtrTy(Ctx), Next, \
#endif
INSTR_PROF_RAW_HEADER(uint64_t, Magic, __llvm_profile_get_magic())
INSTR_PROF_RAW_HEADER(uint64_t, Version, __llvm_profile_get_version())
+INSTR_PROF_RAW_HEADER(uint64_t, BinaryIdsSize, __llvm_write_binary_ids(NULL))
INSTR_PROF_RAW_HEADER(uint64_t, DataSize, DataSize)
INSTR_PROF_RAW_HEADER(uint64_t, PaddingBytesBeforeCounters, PaddingBytesBeforeCounters)
INSTR_PROF_RAW_HEADER(uint64_t, CountersSize, CountersSize)
@@ -137,7 +138,6 @@ INSTR_PROF_RAW_HEADER(uint64_t, NamesSize, NamesSize)
INSTR_PROF_RAW_HEADER(uint64_t, CountersDelta, (uintptr_t)CountersBegin)
INSTR_PROF_RAW_HEADER(uint64_t, NamesDelta, (uintptr_t)NamesBegin)
INSTR_PROF_RAW_HEADER(uint64_t, ValueKindLast, IPVK_Last)
-INSTR_PROF_RAW_HEADER(uint64_t, BinaryIdsSize, __llvm_write_binary_ids(NULL))
#undef INSTR_PROF_RAW_HEADER
/* INSTR_PROF_RAW_HEADER end */
@@ -646,7 +646,7 @@ serializeValueProfDataFrom(ValueProfRecordClosure *Closure,
(uint64_t)'f' << 16 | (uint64_t)'R' << 8 | (uint64_t)129
/* Raw profile format version (start from 1). */
-#define INSTR_PROF_RAW_VERSION 6
+#define INSTR_PROF_RAW_VERSION 7
/* Indexed profile format version (start from 1). */
#define INSTR_PROF_INDEX_VERSION 7
/* Coverage mapping format version (start from 0). */
diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h
index c93b8adcc890..c3c12fd23746 100644
--- a/llvm/include/llvm/Transforms/IPO/Attributor.h
+++ b/llvm/include/llvm/Transforms/IPO/Attributor.h
@@ -1855,6 +1855,10 @@ public:
///
static void createShallowWrapper(Function &F);
+ /// Returns true if the function \p F can be internalized. i.e. it has a
+ /// compatible linkage.
+ static bool isInternalizable(Function &F);
+
/// Make another copy of the function \p F such that the copied version has
/// internal linkage afterwards and can be analysed. Then we replace all uses
/// of the original function to the copied one
@@ -1870,6 +1874,22 @@ public:
/// null pointer.
static Function *internalizeFunction(Function &F, bool Force = false);
+ /// Make copies of each function in the set \p FnSet such that the copied
+ /// version has internal linkage afterwards and can be analysed. Then we
+ /// replace all uses of the original function to the copied one. The map
+ /// \p FnMap contains a mapping of functions to their internalized versions.
+ ///
+ /// Only non-locally linked functions that have `linkonce_odr` or `weak_odr`
+ /// linkage can be internalized because these linkages guarantee that other
+ /// definitions with the same name have the same semantics as this one.
+ ///
+ /// This version will internalize all the functions in the set \p FnSet at
+ /// once and then replace the uses. This prevents internalized functions being
+ /// called by external functions when there is an internalized version in the
+ /// module.
+ static bool internalizeFunctions(SmallPtrSetImpl<Function *> &FnSet,
+ DenseMap<Function *, Function *> &FnMap);
+
/// Return the data layout associated with the anchor scope.
const DataLayout &getDataLayout() const { return InfoCache.DL; }
diff --git a/llvm/include/llvm/Transforms/Utils/PredicateInfo.h b/llvm/include/llvm/Transforms/Utils/PredicateInfo.h
index c4030735d965..c922476ac79d 100644
--- a/llvm/include/llvm/Transforms/Utils/PredicateInfo.h
+++ b/llvm/include/llvm/Transforms/Utils/PredicateInfo.h
@@ -51,11 +51,13 @@
#define LLVM_TRANSFORMS_UTILS_PREDICATEINFO_H
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/ilist.h"
#include "llvm/ADT/ilist_node.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/PassManager.h"
#include "llvm/IR/Value.h"
+#include "llvm/IR/ValueHandle.h"
#include "llvm/Pass.h"
namespace llvm {
@@ -176,7 +178,7 @@ public:
class PredicateInfo {
public:
PredicateInfo(Function &, DominatorTree &, AssumptionCache &);
- ~PredicateInfo() = default;
+ ~PredicateInfo();
void verifyPredicateInfo() const;
@@ -203,6 +205,8 @@ private:
// the Predicate Info, they belong to the ValueInfo structs in the ValueInfos
// vector.
DenseMap<const Value *, const PredicateBase *> PredicateMap;
+ // The set of ssa_copy declarations we created with our custom mangling.
+ SmallSet<AssertingVH<Function>, 20> CreatedDeclarations;
};
// This pass does eager building and then printing of PredicateInfo. It is used
diff --git a/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h b/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h
index 8662dbf385dc..59bf3a342caa 100644
--- a/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h
+++ b/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h
@@ -83,6 +83,9 @@ class SCEVExpander : public SCEVVisitor<SCEVExpander, Value *> {
/// InsertedValues/InsertedPostIncValues.
SmallPtrSet<Value *, 16> ReusedValues;
+ // The induction variables generated.
+ SmallVector<WeakVH, 2> InsertedIVs;
+
/// A memoization of the "relevant" loop for a given SCEV.
DenseMap<const SCEV *, const Loop *> RelevantLoops;
@@ -199,9 +202,11 @@ public:
InsertedPostIncValues.clear();
ReusedValues.clear();
ChainedPhis.clear();
+ InsertedIVs.clear();
}
ScalarEvolution *getSE() { return &SE; }
+ const SmallVectorImpl<WeakVH> &getInsertedIVs() const { return InsertedIVs; }
/// Return a vector containing all instructions inserted during expansion.
SmallVector<Instruction *, 32> getAllInsertedInstructions() const {
diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp
index 23083bc8178e..69ab0052b0a7 100644
--- a/llvm/lib/Analysis/InstructionSimplify.cpp
+++ b/llvm/lib/Analysis/InstructionSimplify.cpp
@@ -4080,6 +4080,22 @@ static Value *simplifySelectWithICmpCond(Value *CondVal, Value *TrueVal,
std::swap(TrueVal, FalseVal);
}
+ // Check for integer min/max with a limit constant:
+ // X > MIN_INT ? X : MIN_INT --> X
+ // X < MAX_INT ? X : MAX_INT --> X
+ if (TrueVal->getType()->isIntOrIntVectorTy()) {
+ Value *X, *Y;
+ SelectPatternFlavor SPF =
+ matchDecomposedSelectPattern(cast<ICmpInst>(CondVal), TrueVal, FalseVal,
+ X, Y).Flavor;
+ if (SelectPatternResult::isMinOrMax(SPF) && Pred == getMinMaxPred(SPF)) {
+ APInt LimitC = getMinMaxLimit(getInverseMinMaxFlavor(SPF),
+ X->getType()->getScalarSizeInBits());
+ if (match(Y, m_SpecificInt(LimitC)))
+ return X;
+ }
+ }
+
if (Pred == ICmpInst::ICMP_EQ && match(CmpRHS, m_Zero())) {
Value *X;
const APInt *Y;
diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
index 522d21812c6a..6e3ca5c4e08a 100644
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -6253,6 +6253,16 @@ CmpInst::Predicate llvm::getInverseMinMaxPred(SelectPatternFlavor SPF) {
return getMinMaxPred(getInverseMinMaxFlavor(SPF));
}
+APInt llvm::getMinMaxLimit(SelectPatternFlavor SPF, unsigned BitWidth) {
+ switch (SPF) {
+ case SPF_SMAX: return APInt::getSignedMaxValue(BitWidth);
+ case SPF_SMIN: return APInt::getSignedMinValue(BitWidth);
+ case SPF_UMAX: return APInt::getMaxValue(BitWidth);
+ case SPF_UMIN: return APInt::getMinValue(BitWidth);
+ default: llvm_unreachable("Unexpected flavor");
+ }
+}
+
std::pair<Intrinsic::ID, bool>
llvm::canConvertToMinOrMaxIntrinsic(ArrayRef<Value *> VL) {
// Check if VL contains select instructions that can be folded into a min/max
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 1bba7232eb14..4f730b2cf372 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -20560,8 +20560,12 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) {
// otherwise => (extract_subvec V1, ExtIdx)
uint64_t InsIdx = V.getConstantOperandVal(2);
if (InsIdx * SmallVT.getScalarSizeInBits() ==
- ExtIdx * NVT.getScalarSizeInBits())
+ ExtIdx * NVT.getScalarSizeInBits()) {
+ if (LegalOperations && !TLI.isOperationLegal(ISD::BITCAST, NVT))
+ return SDValue();
+
return DAG.getBitcast(NVT, V.getOperand(1));
+ }
return DAG.getNode(
ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT,
DAG.getBitcast(N->getOperand(0).getValueType(), V.getOperand(0)),
diff --git a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
index add34eccc1f3..de096f95afcb 100644
--- a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -677,8 +677,9 @@ calcUniqueIDUpdateFlagsAndSize(const GlobalObject *GO, StringRef SectionName,
}
if (Retain) {
- if (Ctx.getAsmInfo()->useIntegratedAssembler() ||
- Ctx.getAsmInfo()->binutilsIsAtLeast(2, 36))
+ if ((Ctx.getAsmInfo()->useIntegratedAssembler() ||
+ Ctx.getAsmInfo()->binutilsIsAtLeast(2, 36)) &&
+ !TM.getTargetTriple().isOSSolaris())
Flags |= ELF::SHF_GNU_RETAIN;
return NextUniqueID++;
}
@@ -855,8 +856,10 @@ static MCSection *selectELFSectionForGlobal(
EmitUniqueSection = true;
Flags |= ELF::SHF_LINK_ORDER;
}
- if (Retain && (Ctx.getAsmInfo()->useIntegratedAssembler() ||
- Ctx.getAsmInfo()->binutilsIsAtLeast(2, 36))) {
+ if (Retain &&
+ (Ctx.getAsmInfo()->useIntegratedAssembler() ||
+ Ctx.getAsmInfo()->binutilsIsAtLeast(2, 36)) &&
+ !TM.getTargetTriple().isOSSolaris()) {
EmitUniqueSection = true;
Flags |= ELF::SHF_GNU_RETAIN;
}
diff --git a/llvm/lib/IR/ConstantFold.cpp b/llvm/lib/IR/ConstantFold.cpp
index 5f05aa2e94e7..e1e28d1230b0 100644
--- a/llvm/lib/IR/ConstantFold.cpp
+++ b/llvm/lib/IR/ConstantFold.cpp
@@ -349,200 +349,6 @@ static Constant *ExtractConstantBytes(Constant *C, unsigned ByteStart,
}
}
-/// Wrapper around getFoldedSizeOfImpl() that adds caching.
-static Constant *getFoldedSizeOf(Type *Ty, Type *DestTy, bool Folded,
- DenseMap<Type *, Constant *> &Cache);
-
-/// Return a ConstantExpr with type DestTy for sizeof on Ty, with any known
-/// factors factored out. If Folded is false, return null if no factoring was
-/// possible, to avoid endlessly bouncing an unfoldable expression back into the
-/// top-level folder.
-static Constant *getFoldedSizeOfImpl(Type *Ty, Type *DestTy, bool Folded,
- DenseMap<Type *, Constant *> &Cache) {
- // This is the actual implementation of getFoldedSizeOf(). To get the caching
- // behavior, we need to call getFoldedSizeOf() when we recurse.
-
- if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
- Constant *N = ConstantInt::get(DestTy, ATy->getNumElements());
- Constant *E = getFoldedSizeOf(ATy->getElementType(), DestTy, true, Cache);
- return ConstantExpr::getNUWMul(E, N);
- }
-
- if (StructType *STy = dyn_cast<StructType>(Ty))
- if (!STy->isPacked()) {
- unsigned NumElems = STy->getNumElements();
- // An empty struct has size zero.
- if (NumElems == 0)
- return ConstantExpr::getNullValue(DestTy);
- // Check for a struct with all members having the same size.
- Constant *MemberSize =
- getFoldedSizeOf(STy->getElementType(0), DestTy, true, Cache);
- bool AllSame = true;
- for (unsigned i = 1; i != NumElems; ++i)
- if (MemberSize !=
- getFoldedSizeOf(STy->getElementType(i), DestTy, true, Cache)) {
- AllSame = false;
- break;
- }
- if (AllSame) {
- Constant *N = ConstantInt::get(DestTy, NumElems);
- return ConstantExpr::getNUWMul(MemberSize, N);
- }
- }
-
- // Pointer size doesn't depend on the pointee type, so canonicalize them
- // to an arbitrary pointee.
- if (PointerType *PTy = dyn_cast<PointerType>(Ty))
- if (!PTy->getElementType()->isIntegerTy(1))
- return getFoldedSizeOf(
- PointerType::get(IntegerType::get(PTy->getContext(), 1),
- PTy->getAddressSpace()),
- DestTy, true, Cache);
-
- // If there's no interesting folding happening, bail so that we don't create
- // a constant that looks like it needs folding but really doesn't.
- if (!Folded)
- return nullptr;
-
- // Base case: Get a regular sizeof expression.
- Constant *C = ConstantExpr::getSizeOf(Ty);
- C = ConstantExpr::getCast(CastInst::getCastOpcode(C, false,
- DestTy, false),
- C, DestTy);
- return C;
-}
-
-static Constant *getFoldedSizeOf(Type *Ty, Type *DestTy, bool Folded,
- DenseMap<Type *, Constant *> &Cache) {
- // Check for previously generated folded size constant.
- auto It = Cache.find(Ty);
- if (It != Cache.end())
- return It->second;
- return Cache[Ty] = getFoldedSizeOfImpl(Ty, DestTy, Folded, Cache);
-}
-
-static Constant *getFoldedSizeOf(Type *Ty, Type *DestTy, bool Folded) {
- DenseMap<Type *, Constant *> Cache;
- return getFoldedSizeOf(Ty, DestTy, Folded, Cache);
-}
-
-/// Return a ConstantExpr with type DestTy for alignof on Ty, with any known
-/// factors factored out. If Folded is false, return null if no factoring was
-/// possible, to avoid endlessly bouncing an unfoldable expression back into the
-/// top-level folder.
-static Constant *getFoldedAlignOf(Type *Ty, Type *DestTy, bool Folded) {
- // The alignment of an array is equal to the alignment of the
- // array element. Note that this is not always true for vectors.
- if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
- Constant *C = ConstantExpr::getAlignOf(ATy->getElementType());
- C = ConstantExpr::getCast(CastInst::getCastOpcode(C, false,
- DestTy,
- false),
- C, DestTy);
- return C;
- }
-
- if (StructType *STy = dyn_cast<StructType>(Ty)) {
- // Packed structs always have an alignment of 1.
- if (STy->isPacked())
- return ConstantInt::get(DestTy, 1);
-
- // Otherwise, struct alignment is the maximum alignment of any member.
- // Without target data, we can't compare much, but we can check to see
- // if all the members have the same alignment.
- unsigned NumElems = STy->getNumElements();
- // An empty struct has minimal alignment.
- if (NumElems == 0)
- return ConstantInt::get(DestTy, 1);
- // Check for a struct with all members having the same alignment.
- Constant *MemberAlign =
- getFoldedAlignOf(STy->getElementType(0), DestTy, true);
- bool AllSame = true;
- for (unsigned i = 1; i != NumElems; ++i)
- if (MemberAlign != getFoldedAlignOf(STy->getElementType(i), DestTy, true)) {
- AllSame = false;
- break;
- }
- if (AllSame)
- return MemberAlign;
- }
-
- // Pointer alignment doesn't depend on the pointee type, so canonicalize them
- // to an arbitrary pointee.
- if (PointerType *PTy = dyn_cast<PointerType>(Ty))
- if (!PTy->getElementType()->isIntegerTy(1))
- return
- getFoldedAlignOf(PointerType::get(IntegerType::get(PTy->getContext(),
- 1),
- PTy->getAddressSpace()),
- DestTy, true);
-
- // If there's no interesting folding happening, bail so that we don't create
- // a constant that looks like it needs folding but really doesn't.
- if (!Folded)
- return nullptr;
-
- // Base case: Get a regular alignof expression.
- Constant *C = ConstantExpr::getAlignOf(Ty);
- C = ConstantExpr::getCast(CastInst::getCastOpcode(C, false,
- DestTy, false),
- C, DestTy);
- return C;
-}
-
-/// Return a ConstantExpr with type DestTy for offsetof on Ty and FieldNo, with
-/// any known factors factored out. If Folded is false, return null if no
-/// factoring was possible, to avoid endlessly bouncing an unfoldable expression
-/// back into the top-level folder.
-static Constant *getFoldedOffsetOf(Type *Ty, Constant *FieldNo, Type *DestTy,
- bool Folded) {
- if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
- Constant *N = ConstantExpr::getCast(CastInst::getCastOpcode(FieldNo, false,
- DestTy, false),
- FieldNo, DestTy);
- Constant *E = getFoldedSizeOf(ATy->getElementType(), DestTy, true);
- return ConstantExpr::getNUWMul(E, N);
- }
-
- if (StructType *STy = dyn_cast<StructType>(Ty))
- if (!STy->isPacked()) {
- unsigned NumElems = STy->getNumElements();
- // An empty struct has no members.
- if (NumElems == 0)
- return nullptr;
- // Check for a struct with all members having the same size.
- Constant *MemberSize =
- getFoldedSizeOf(STy->getElementType(0), DestTy, true);
- bool AllSame = true;
- for (unsigned i = 1; i != NumElems; ++i)
- if (MemberSize !=
- getFoldedSizeOf(STy->getElementType(i), DestTy, true)) {
- AllSame = false;
- break;
- }
- if (AllSame) {
- Constant *N = ConstantExpr::getCast(CastInst::getCastOpcode(FieldNo,
- false,
- DestTy,
- false),
- FieldNo, DestTy);
- return ConstantExpr::getNUWMul(MemberSize, N);
- }
- }
-
- // If there's no interesting folding happening, bail so that we don't create
- // a constant that looks like it needs folding but really doesn't.
- if (!Folded)
- return nullptr;
-
- // Base case: Get a regular offsetof expression.
- Constant *C = ConstantExpr::getOffsetOf(Ty, FieldNo);
- C = ConstantExpr::getCast(CastInst::getCastOpcode(C, false,
- DestTy, false),
- C, DestTy);
- return C;
-}
-
Constant *llvm::ConstantFoldCastInstruction(unsigned opc, Constant *V,
Type *DestTy) {
if (isa<PoisonValue>(V))
@@ -666,53 +472,6 @@ Constant *llvm::ConstantFoldCastInstruction(unsigned opc, Constant *V,
// Is it a null pointer value?
if (V->isNullValue())
return ConstantInt::get(DestTy, 0);
- // If this is a sizeof-like expression, pull out multiplications by
- // known factors to expose them to subsequent folding. If it's an
- // alignof-like expression, factor out known factors.
- if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
- if (CE->getOpcode() == Instruction::GetElementPtr &&
- CE->getOperand(0)->isNullValue()) {
- // FIXME: Looks like getFoldedSizeOf(), getFoldedOffsetOf() and
- // getFoldedAlignOf() don't handle the case when DestTy is a vector of
- // pointers yet. We end up in asserts in CastInst::getCastOpcode (see
- // test/Analysis/ConstantFolding/cast-vector.ll). I've only seen this
- // happen in one "real" C-code test case, so it does not seem to be an
- // important optimization to handle vectors here. For now, simply bail
- // out.
- if (DestTy->isVectorTy())
- return nullptr;
- GEPOperator *GEPO = cast<GEPOperator>(CE);
- Type *Ty = GEPO->getSourceElementType();
- if (CE->getNumOperands() == 2) {
- // Handle a sizeof-like expression.
- Constant *Idx = CE->getOperand(1);
- bool isOne = isa<ConstantInt>(Idx) && cast<ConstantInt>(Idx)->isOne();
- if (Constant *C = getFoldedSizeOf(Ty, DestTy, !isOne)) {
- Idx = ConstantExpr::getCast(CastInst::getCastOpcode(Idx, true,
- DestTy, false),
- Idx, DestTy);
- return ConstantExpr::getMul(C, Idx);
- }
- } else if (CE->getNumOperands() == 3 &&
- CE->getOperand(1)->isNullValue()) {
- // Handle an alignof-like expression.
- if (StructType *STy = dyn_cast<StructType>(Ty))
- if (!STy->isPacked()) {
- ConstantInt *CI = cast<ConstantInt>(CE->getOperand(2));
- if (CI->isOne() &&
- STy->getNumElements() == 2 &&
- STy->getElementType(0)->isIntegerTy(1)) {
- return getFoldedAlignOf(STy->getElementType(1), DestTy, false);
- }
- }
- // Handle an offsetof-like expression.
- if (Ty->isStructTy() || Ty->isArrayTy()) {
- if (Constant *C = getFoldedOffsetOf(Ty, CE->getOperand(2),
- DestTy, false))
- return C;
- }
- }
- }
// Other pointer types cannot be casted
return nullptr;
case Instruction::UIToFP:
diff --git a/llvm/lib/IR/Module.cpp b/llvm/lib/IR/Module.cpp
index 7c18dc0ed299..63ea41fba89a 100644
--- a/llvm/lib/IR/Module.cpp
+++ b/llvm/lib/IR/Module.cpp
@@ -114,6 +114,10 @@ GlobalValue *Module::getNamedValue(StringRef Name) const {
return cast_or_null<GlobalValue>(getValueSymbolTable().lookup(Name));
}
+unsigned Module::getNumNamedValues() const {
+ return getValueSymbolTable().size();
+}
+
/// getMDKindID - Return a unique non-zero ID for the specified metadata kind.
/// This ID is uniqued across modules in the current LLVMContext.
unsigned Module::getMDKindID(StringRef Name) const {
diff --git a/llvm/lib/ProfileData/InstrProfReader.cpp b/llvm/lib/ProfileData/InstrProfReader.cpp
index 8a4470ae207d..a0460062f307 100644
--- a/llvm/lib/ProfileData/InstrProfReader.cpp
+++ b/llvm/lib/ProfileData/InstrProfReader.cpp
@@ -366,6 +366,7 @@ Error RawInstrProfReader<IntPtrT>::readHeader(
if (GET_VERSION(Version) != RawInstrProf::Version)
return error(instrprof_error::unsupported_version);
+ BinaryIdsSize = swap(Header.BinaryIdsSize);
CountersDelta = swap(Header.CountersDelta);
NamesDelta = swap(Header.NamesDelta);
auto DataSize = swap(Header.DataSize);
@@ -374,7 +375,6 @@ Error RawInstrProfReader<IntPtrT>::readHeader(
auto PaddingBytesAfterCounters = swap(Header.PaddingBytesAfterCounters);
NamesSize = swap(Header.NamesSize);
ValueKindLast = swap(Header.ValueKindLast);
- BinaryIdsSize = swap(Header.BinaryIdsSize);
auto DataSizeInBytes = DataSize * sizeof(RawInstrProf::ProfileData<IntPtrT>);
auto PaddingSize = getNumPaddingBytes(NamesSize);
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index ca6b87a5ebb0..b27a02b8c182 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -4353,8 +4353,13 @@ SDValue AArch64TargetLowering::LowerMGATHER(SDValue Op,
if (IsFixedLength) {
assert(Subtarget->useSVEForFixedLengthVectors() &&
"Cannot lower when not using SVE for fixed vectors");
- IndexVT = getContainerForFixedLengthVector(DAG, IndexVT);
- MemVT = IndexVT.changeVectorElementType(MemVT.getVectorElementType());
+ if (MemVT.getScalarSizeInBits() <= IndexVT.getScalarSizeInBits()) {
+ IndexVT = getContainerForFixedLengthVector(DAG, IndexVT);
+ MemVT = IndexVT.changeVectorElementType(MemVT.getVectorElementType());
+ } else {
+ MemVT = getContainerForFixedLengthVector(DAG, MemVT);
+ IndexVT = MemVT.changeTypeToInteger();
+ }
InputVT = DAG.getValueType(MemVT.changeTypeToInteger());
Mask = DAG.getNode(
ISD::ZERO_EXTEND, DL,
@@ -4453,8 +4458,13 @@ SDValue AArch64TargetLowering::LowerMSCATTER(SDValue Op,
if (IsFixedLength) {
assert(Subtarget->useSVEForFixedLengthVectors() &&
"Cannot lower when not using SVE for fixed vectors");
- IndexVT = getContainerForFixedLengthVector(DAG, IndexVT);
- MemVT = IndexVT.changeVectorElementType(MemVT.getVectorElementType());
+ if (MemVT.getScalarSizeInBits() <= IndexVT.getScalarSizeInBits()) {
+ IndexVT = getContainerForFixedLengthVector(DAG, IndexVT);
+ MemVT = IndexVT.changeVectorElementType(MemVT.getVectorElementType());
+ } else {
+ MemVT = getContainerForFixedLengthVector(DAG, MemVT);
+ IndexVT = MemVT.changeTypeToInteger();
+ }
InputVT = DAG.getValueType(MemVT.changeTypeToInteger());
StoreVal =
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index b03d421d3e6d..091a62aa4ada 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -1120,6 +1120,16 @@ bool AArch64InstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg,
if (!MI.getOperand(1).isReg())
return false;
+ auto NormalizeCmpValue = [](int64_t Value) -> int {
+ // Comparison immediates may be 64-bit, but CmpValue is only an int.
+ // Normalize to 0/1/2 return value, where 2 indicates any value apart from
+ // 0 or 1.
+ // TODO: Switch CmpValue to int64_t in the API to avoid this.
+ if (Value == 0 || Value == 1)
+ return Value;
+ return 2;
+ };
+
switch (MI.getOpcode()) {
default:
break;
@@ -1155,8 +1165,7 @@ bool AArch64InstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg,
SrcReg = MI.getOperand(1).getReg();
SrcReg2 = 0;
CmpMask = ~0;
- // FIXME: In order to convert CmpValue to 0 or 1
- CmpValue = MI.getOperand(2).getImm() != 0;
+ CmpValue = NormalizeCmpValue(MI.getOperand(2).getImm());
return true;
case AArch64::ANDSWri:
case AArch64::ANDSXri:
@@ -1165,14 +1174,9 @@ bool AArch64InstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg,
SrcReg = MI.getOperand(1).getReg();
SrcReg2 = 0;
CmpMask = ~0;
- // FIXME:The return val type of decodeLogicalImmediate is uint64_t,
- // while the type of CmpValue is int. When converting uint64_t to int,
- // the high 32 bits of uint64_t will be lost.
- // In fact it causes a bug in spec2006-483.xalancbmk
- // CmpValue is only used to compare with zero in OptimizeCompareInstr
- CmpValue = AArch64_AM::decodeLogicalImmediate(
+ CmpValue = NormalizeCmpValue(AArch64_AM::decodeLogicalImmediate(
MI.getOperand(2).getImm(),
- MI.getOpcode() == AArch64::ANDSWri ? 32 : 64) != 0;
+ MI.getOpcode() == AArch64::ANDSWri ? 32 : 64));
return true;
}
@@ -1462,10 +1466,9 @@ bool AArch64InstrInfo::optimizeCompareInstr(
if (CmpInstr.getOpcode() == AArch64::PTEST_PP)
return optimizePTestInstr(&CmpInstr, SrcReg, SrcReg2, MRI);
- // Continue only if we have a "ri" where immediate is zero.
- // FIXME:CmpValue has already been converted to 0 or 1 in analyzeCompare
- // function.
- assert((CmpValue == 0 || CmpValue == 1) && "CmpValue must be 0 or 1!");
+ // Warning: CmpValue == 2 indicates *any* value apart from 0 or 1.
+ assert((CmpValue == 0 || CmpValue == 1 || CmpValue == 2) &&
+ "CmpValue must be 0, 1, or 2!");
if (SrcReg2 != 0)
return false;
@@ -1473,9 +1476,10 @@ bool AArch64InstrInfo::optimizeCompareInstr(
if (!MRI->use_nodbg_empty(CmpInstr.getOperand(0).getReg()))
return false;
- if (!CmpValue && substituteCmpToZero(CmpInstr, SrcReg, *MRI))
+ if (CmpValue == 0 && substituteCmpToZero(CmpInstr, SrcReg, *MRI))
return true;
- return removeCmpToZeroOrOne(CmpInstr, SrcReg, CmpValue, *MRI);
+ return (CmpValue == 0 || CmpValue == 1) &&
+ removeCmpToZeroOrOne(CmpInstr, SrcReg, CmpValue, *MRI);
}
/// Get opcode of S version of Instr.
diff --git a/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
index 2167ad5d7467..e68a3aa8bf47 100644
--- a/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
+++ b/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
@@ -1647,7 +1647,7 @@ bool ARMExpandPseudo::ExpandCMP_SWAP(MachineBasicBlock &MBB,
"CMP_SWAP not expected to be custom expanded for Thumb1");
assert((UxtOp == 0 || UxtOp == ARM::tUXTB || UxtOp == ARM::tUXTH) &&
"ARMv8-M.baseline does not have t2UXTB/t2UXTH");
- assert(ARM::tGPRRegClass.contains(DesiredReg) &&
+ assert((UxtOp == 0 || ARM::tGPRRegClass.contains(DesiredReg)) &&
"DesiredReg used for UXT op must be tGPR");
}
diff --git a/llvm/lib/Target/BPF/BPFTargetTransformInfo.h b/llvm/lib/Target/BPF/BPFTargetTransformInfo.h
index 3bc5556a62f4..417e8b6ffec3 100644
--- a/llvm/lib/Target/BPF/BPFTargetTransformInfo.h
+++ b/llvm/lib/Target/BPF/BPFTargetTransformInfo.h
@@ -54,6 +54,24 @@ public:
return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind,
I);
}
+
+ InstructionCost getArithmeticInstrCost(
+ unsigned Opcode, Type *Ty,
+ TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
+ TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
+ TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
+ TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
+ TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None,
+ ArrayRef<const Value *> Args = ArrayRef<const Value *>(),
+ const Instruction *CxtI = nullptr) {
+ int ISD = TLI->InstructionOpcodeToISD(Opcode);
+ if (ISD == ISD::ADD && CostKind == TTI::TCK_RecipThroughput)
+ return SCEVCheapExpansionBudget.getValue() + 1;
+
+ return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Opd1Info,
+ Opd2Info, Opd1PropInfo,
+ Opd2PropInfo);
+ }
};
} // end namespace llvm
diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
index d5a7873bd056..abf5b213bbac 100644
--- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
@@ -485,6 +485,9 @@ bool PPCTTIImpl::mightUseCTR(BasicBlock *BB, TargetLibraryInfo *LibInfo,
case Intrinsic::experimental_constrained_sin:
case Intrinsic::experimental_constrained_cos:
return true;
+ // There is no corresponding FMA instruction for PPC double double.
+ // Thus, we need to disable CTR loop generation for this type.
+ case Intrinsic::fmuladd:
case Intrinsic::copysign:
if (CI->getArgOperand(0)->getType()->getScalarType()->
isPPC_FP128Ty())
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoV.td b/llvm/lib/Target/RISCV/RISCVInstrInfoV.td
index 342497150d49..8af3c8f5cfdb 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoV.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoV.td
@@ -78,6 +78,39 @@ def simm5_plus1 : Operand<XLenVT>, ImmLeaf<XLenVT,
}
//===----------------------------------------------------------------------===//
+// Scheduling definitions.
+//===----------------------------------------------------------------------===//
+
+class VMVRSched<int n>: Sched <[!cast<SchedReadWrite>("WriteVMov" # n # "V"),
+ !cast<SchedReadWrite>("ReadVMov" # n # "V")]>;
+
+class VLESched<int n> : Sched <[!cast<SchedReadWrite>("WriteVLDE" # n),
+ ReadVLDX, ReadVMask]>;
+
+class VSESched<int n> : Sched <[!cast<SchedReadWrite>("WriteVSTE" # n),
+ !cast<SchedReadWrite>("ReadVSTE" # n # "V"),
+ ReadVSTX, ReadVMask]>;
+
+class VLSSched<int n> : Sched <[!cast<SchedReadWrite>("WriteVLDS" # n),
+ ReadVLDX, ReadVLDSX, ReadVMask]>;
+
+class VSSSched<int n> : Sched <[!cast<SchedReadWrite>("WriteVSTS" # n),
+ !cast<SchedReadWrite>("ReadVSTS" # n # "V"),
+ ReadVSTX, ReadVSTSX, ReadVMask]>;
+
+class VLXSched<int n, string o> :
+ Sched <[!cast<SchedReadWrite>("WriteVLD" # o # "X" # n),
+ ReadVLDX, !cast<SchedReadWrite>("ReadVLD" # o # "XV"), ReadVMask]>;
+
+class VSXSched<int n, string o> :
+ Sched <[!cast<SchedReadWrite>("WriteVST" # o # "X" # n),
+ !cast<SchedReadWrite>("ReadVST" # o # "X" # n),
+ ReadVSTX, !cast<SchedReadWrite>("ReadVST" # o # "XV"), ReadVMask]>;
+
+class VLFSched<int n> : Sched <[!cast<SchedReadWrite>("WriteVLDFF" # n),
+ ReadVLDX, ReadVMask]>;
+
+//===----------------------------------------------------------------------===//
// Instruction class templates
//===----------------------------------------------------------------------===//
@@ -328,106 +361,417 @@ class VAMONoWd<RISCVAMOOP amoop, RISCVWidth width, string opcodestr>
// Use these multiclasses to define instructions more easily.
//===----------------------------------------------------------------------===//
multiclass VALU_IV_V_X_I<string opcodestr, bits<6> funct6, Operand optype = simm5, string vw = "v"> {
- def V : VALUVV<funct6, OPIVV, opcodestr # "." # vw # "v">;
- def X : VALUVX<funct6, OPIVX, opcodestr # "." # vw # "x">;
- def I : VALUVI<funct6, opcodestr # "." # vw # "i", optype>;
+ def V : VALUVV<funct6, OPIVV, opcodestr # "." # vw # "v">,
+ Sched<[WriteVIALUV, ReadVIALUV, ReadVIALUV, ReadVMask]>;
+ def X : VALUVX<funct6, OPIVX, opcodestr # "." # vw # "x">,
+ Sched<[WriteVIALUX, ReadVIALUV, ReadVIALUX, ReadVMask]>;
+ def I : VALUVI<funct6, opcodestr # "." # vw # "i", optype>,
+ Sched<[WriteVIALUI, ReadVIALUV, ReadVMask]>;
}
multiclass VALU_IV_V_X<string opcodestr, bits<6> funct6, string vw = "v"> {
- def V : VALUVV<funct6, OPIVV, opcodestr # "." # vw # "v">;
- def X : VALUVX<funct6, OPIVX, opcodestr # "." # vw # "x">;
+ def V : VALUVV<funct6, OPIVV, opcodestr # "." # vw # "v">,
+ Sched<[WriteVIALUV, ReadVIALUV, ReadVIALUV, ReadVMask]>;
+ def X : VALUVX<funct6, OPIVX, opcodestr # "." # vw # "x">,
+ Sched<[WriteVIALUX, ReadVIALUV, ReadVIALUX, ReadVMask]>;
}
-multiclass VALUr_IV_V_X<string opcodestr, bits<6> funct6, string vw = "v"> {
- def V : VALUrVV<funct6, OPIVV, opcodestr # "." # vw # "v">;
- def X : VALUrVX<funct6, OPIVX, opcodestr # "." # vw # "x">;
+multiclass VALU_IV_X_I<string opcodestr, bits<6> funct6, Operand optype = simm5, string vw = "v"> {
+ def X : VALUVX<funct6, OPIVX, opcodestr # "." # vw # "x">,
+ Sched<[WriteVIALUV, ReadVIALUV, ReadVIALUX, ReadVMask]>;
+ def I : VALUVI<funct6, opcodestr # "." # vw # "i", optype>,
+ Sched<[WriteVIALUI, ReadVIALUV, ReadVMask]>;
}
-multiclass VALU_IV_X_I<string opcodestr, bits<6> funct6, Operand optype = simm5, string vw = "v"> {
- def X : VALUVX<funct6, OPIVX, opcodestr # "." # vw # "x">;
- def I : VALUVI<funct6, opcodestr # "." # vw # "i", optype>;
+multiclass VALU_MV_V_X<string opcodestr, bits<6> funct6, string vw = "v"> {
+ def V : VALUVV<funct6, OPMVV, opcodestr # "." # vw # "v">,
+ Sched<[WriteVIWALUV, ReadVIWALUV, ReadVIWALUV, ReadVMask]>;
+ def X : VALUVX<funct6, OPMVX, opcodestr # "." # vw # "x">,
+ Sched<[WriteVIWALUX, ReadVIWALUV, ReadVIWALUX, ReadVMask]>;
}
-multiclass VALU_IV_V<string opcodestr, bits<6> funct6> {
- def _VS : VALUVV<funct6, OPIVV, opcodestr # ".vs">;
+multiclass VMAC_MV_V_X<string opcodestr, bits<6> funct6, string vw = "v"> {
+ def V : VALUrVV<funct6, OPMVV, opcodestr # "." # vw # "v">,
+ Sched<[WriteVIMulAddV, ReadVIMulAddV, ReadVIMulAddV, ReadVMask]>;
+ def X : VALUrVX<funct6, OPMVX, opcodestr # "." # vw # "x">,
+ Sched<[WriteVIMulAddX, ReadVIMulAddV, ReadVIMulAddX, ReadVMask]>;
}
-multiclass VALUr_IV_X<string opcodestr, bits<6> funct6, string vw = "v"> {
- def X : VALUrVX<funct6, OPIVX, opcodestr # "." # vw # "x">;
+multiclass VWMAC_MV_V_X<string opcodestr, bits<6> funct6, string vw = "v"> {
+ def V : VALUrVV<funct6, OPMVV, opcodestr # "." # vw # "v">,
+ Sched<[WriteVIWMulAddV, ReadVIWMulAddV, ReadVIWMulAddV, ReadVMask]>;
+ def X : VALUrVX<funct6, OPMVX, opcodestr # "." # vw # "x">,
+ Sched<[WriteVIWMulAddX, ReadVIWMulAddV, ReadVIWMulAddX, ReadVMask]>;
}
-multiclass VALU_MV_V_X<string opcodestr, bits<6> funct6, string vw = "v"> {
- def V : VALUVV<funct6, OPMVV, opcodestr # "." # vw # "v">;
- def X : VALUVX<funct6, OPMVX, opcodestr # "." # vw # "x">;
+multiclass VWMAC_MV_X<string opcodestr, bits<6> funct6, string vw = "v"> {
+ def X : VALUrVX<funct6, OPMVX, opcodestr # "." # vw # "x">,
+ Sched<[WriteVIWMulAddX, ReadVIWMulAddV, ReadVIWMulAddX, ReadVMask]>;
+}
+
+multiclass VALU_MV_VS2<string opcodestr, bits<6> funct6, bits<5> vs1> {
+ def "" : VALUVs2<funct6, vs1, OPMVV, opcodestr>,
+ Sched<[WriteVExtV, ReadVExtV, ReadVMask]>;
+}
+
+multiclass VALUm_IV_V_X_I<string opcodestr, bits<6> funct6> {
+ def VM : VALUmVV<funct6, OPIVV, opcodestr # ".vvm">,
+ Sched<[WriteVICALUV, ReadVIALUCV, ReadVIALUCV, ReadVMask]>;
+ def XM : VALUmVX<funct6, OPIVX, opcodestr # ".vxm">,
+ Sched<[WriteVICALUX, ReadVIALUCV, ReadVIALUCX, ReadVMask]>;
+ def IM : VALUmVI<funct6, opcodestr # ".vim">,
+ Sched<[WriteVICALUI, ReadVIALUCV, ReadVMask]>;
}
-multiclass VALU_MV_V<string opcodestr, bits<6> funct6> {
- def _VS : VALUVV<funct6, OPMVV, opcodestr # ".vs">;
+multiclass VMRG_IV_V_X_I<string opcodestr, bits<6> funct6> {
+ def VM : VALUmVV<funct6, OPIVV, opcodestr # ".vvm">,
+ Sched<[WriteVIMergeV, ReadVIMergeV, ReadVIMergeV, ReadVMask]>;
+ def XM : VALUmVX<funct6, OPIVX, opcodestr # ".vxm">,
+ Sched<[WriteVIMergeX, ReadVIMergeV, ReadVIMergeX, ReadVMask]>;
+ def IM : VALUmVI<funct6, opcodestr # ".vim">,
+ Sched<[WriteVIMergeI, ReadVIMergeV, ReadVMask]>;
}
-multiclass VALU_MV_Mask<string opcodestr, bits<6> funct6, string vm = "v"> {
- def M : VALUVVNoVm<funct6, OPMVV, opcodestr # "." # vm # "m">;
+multiclass VALUm_IV_V_X<string opcodestr, bits<6> funct6> {
+ def VM : VALUmVV<funct6, OPIVV, opcodestr # ".vvm">,
+ Sched<[WriteVICALUV, ReadVIALUCV, ReadVIALUCV, ReadVMask]>;
+ def XM : VALUmVX<funct6, OPIVX, opcodestr # ".vxm">,
+ Sched<[WriteVICALUX, ReadVIALUCV, ReadVIALUCX, ReadVMask]>;
}
-multiclass VALU_MV_X<string opcodestr, bits<6> funct6, string vw = "v"> {
- def X : VALUVX<funct6, OPMVX, opcodestr # "." # vw # "x">;
+multiclass VALUNoVm_IV_V_X_I<string opcodestr, bits<6> funct6, Operand optype = simm5> {
+ def V : VALUVVNoVm<funct6, OPIVV, opcodestr # ".vv">,
+ Sched<[WriteVICALUV, ReadVIALUCV, ReadVIALUCV]>;
+ def X : VALUVXNoVm<funct6, OPIVX, opcodestr # ".vx">,
+ Sched<[WriteVICALUX, ReadVIALUCV, ReadVIALUCX]>;
+ def I : VALUVINoVm<funct6, opcodestr # ".vi", optype>,
+ Sched<[WriteVICALUI, ReadVIALUCV]>;
}
-multiclass VALUr_MV_V_X<string opcodestr, bits<6> funct6, string vw = "v"> {
- def V : VALUrVV<funct6, OPMVV, opcodestr # "." # vw # "v">;
- def X : VALUrVX<funct6, OPMVX, opcodestr # "." # vw # "x">;
+multiclass VALUNoVm_IV_V_X<string opcodestr, bits<6> funct6> {
+ def V : VALUVVNoVm<funct6, OPIVV, opcodestr # ".vv">,
+ Sched<[WriteVICALUV, ReadVIALUCV, ReadVIALUCV]>;
+ def X : VALUVXNoVm<funct6, OPIVX, opcodestr # ".vx">,
+ Sched<[WriteVICALUX, ReadVIALUCV, ReadVIALUCX]>;
}
-multiclass VALUr_MV_X<string opcodestr, bits<6> funct6, string vw = "v"> {
- def X : VALUrVX<funct6, OPMVX, opcodestr # "." # vw # "x">;
+multiclass VALU_FV_V_F<string opcodestr, bits<6> funct6, string vw = "v"> {
+ def V : VALUVV<funct6, OPFVV, opcodestr # "." # vw # "v">,
+ Sched<[WriteVFALUV, ReadVFALUV, ReadVFALUV, ReadVMask]>;
+ def F : VALUVF<funct6, OPFVF, opcodestr # "." # vw # "f">,
+ Sched<[WriteVFALUF, ReadVFALUV, ReadVFALUF, ReadVMask]>;
}
-multiclass VALU_MV_VS2<string opcodestr, bits<6> funct6, bits<5> vs1> {
- def "" : VALUVs2<funct6, vs1, OPMVV, opcodestr>;
+multiclass VALU_FV_F<string opcodestr, bits<6> funct6, string vw = "v"> {
+ def F : VALUVF<funct6, OPFVF, opcodestr # "." # vw # "f">,
+ Sched<[WriteVFALUF, ReadVFALUV, ReadVFALUF, ReadVMask]>;
}
-multiclass VALUm_IV_V_X_I<string opcodestr, bits<6> funct6> {
- def VM : VALUmVV<funct6, OPIVV, opcodestr # ".vvm">;
- def XM : VALUmVX<funct6, OPIVX, opcodestr # ".vxm">;
- def IM : VALUmVI<funct6, opcodestr # ".vim">;
+multiclass VWALU_FV_V_F<string opcodestr, bits<6> funct6, string vw = "v"> {
+ def V : VALUVV<funct6, OPFVV, opcodestr # "." # vw # "v">,
+ Sched<[WriteVFWALUV, ReadVFWALUV, ReadVFWALUV, ReadVMask]>;
+ def F : VALUVF<funct6, OPFVF, opcodestr # "." # vw # "f">,
+ Sched<[WriteVFWALUF, ReadVFWALUV, ReadVFWALUF, ReadVMask]>;
}
-multiclass VALUm_IV_V_X<string opcodestr, bits<6> funct6> {
- def VM : VALUmVV<funct6, OPIVV, opcodestr # ".vvm">;
- def XM : VALUmVX<funct6, OPIVX, opcodestr # ".vxm">;
+multiclass VMUL_FV_V_F<string opcodestr, bits<6> funct6, string vw = "v"> {
+ def V : VALUVV<funct6, OPFVV, opcodestr # "." # vw # "v">,
+ Sched<[WriteVFMulV, ReadVFMulV, ReadVFMulV, ReadVMask]>;
+ def F : VALUVF<funct6, OPFVF, opcodestr # "." # vw # "f">,
+ Sched<[WriteVFMulF, ReadVFMulV, ReadVFMulF, ReadVMask]>;
}
-multiclass VALUNoVm_IV_V_X_I<string opcodestr, bits<6> funct6, Operand optype = simm5> {
- def V : VALUVVNoVm<funct6, OPIVV, opcodestr # ".vv">;
- def X : VALUVXNoVm<funct6, OPIVX, opcodestr # ".vx">;
- def I : VALUVINoVm<funct6, opcodestr # ".vi", optype>;
+multiclass VDIV_FV_V_F<string opcodestr, bits<6> funct6, string vw = "v"> {
+ def V : VALUVV<funct6, OPFVV, opcodestr # "." # vw # "v">,
+ Sched<[WriteVFDivV, ReadVFDivV, ReadVFDivV, ReadVMask]>;
+ def F : VALUVF<funct6, OPFVF, opcodestr # "." # vw # "f">,
+ Sched<[WriteVFDivF, ReadVFDivV, ReadVFDivF, ReadVMask]>;
}
-multiclass VALUNoVm_IV_V_X<string opcodestr, bits<6> funct6> {
- def V : VALUVVNoVm<funct6, OPIVV, opcodestr # ".vv">;
- def X : VALUVXNoVm<funct6, OPIVX, opcodestr # ".vx">;
+multiclass VRDIV_FV_F<string opcodestr, bits<6> funct6, string vw = "v"> {
+ def F : VALUVF<funct6, OPFVF, opcodestr # "." # vw # "f">,
+ Sched<[WriteVFDivF, ReadVFDivV, ReadVFDivF, ReadVMask]>;
}
-multiclass VALU_FV_V_F<string opcodestr, bits<6> funct6, string vw = "v"> {
- def V : VALUVV<funct6, OPFVV, opcodestr # "." # vw # "v">;
- def F : VALUVF<funct6, OPFVF, opcodestr # "." # vw # "f">;
+multiclass VWMUL_FV_V_F<string opcodestr, bits<6> funct6, string vw = "v"> {
+ def V : VALUVV<funct6, OPFVV, opcodestr # "." # vw # "v">,
+ Sched<[WriteVFWMulV, ReadVFWMulV, ReadVFWMulV, ReadVMask]>;
+ def F : VALUVF<funct6, OPFVF, opcodestr # "." # vw # "f">,
+ Sched<[WriteVFWMulF, ReadVFWMulV, ReadVFWMulF, ReadVMask]>;
}
-multiclass VALU_FV_F<string opcodestr, bits<6> funct6, string vw = "v"> {
- def F : VALUVF<funct6, OPFVF, opcodestr # "." # vw # "f">;
+multiclass VMAC_FV_V_F<string opcodestr, bits<6> funct6, string vw = "v"> {
+ def V : VALUrVV<funct6, OPFVV, opcodestr # "." # vw # "v">,
+ Sched<[WriteVFMulAddV, ReadVFMulAddV, ReadVFMulAddV, ReadVMask]>;
+ def F : VALUrVF<funct6, OPFVF, opcodestr # "." # vw # "f">,
+ Sched<[WriteVFMulAddF, ReadVFMulAddV, ReadVFMulAddF, ReadVMask]>;
+}
+
+multiclass VWMAC_FV_V_F<string opcodestr, bits<6> funct6, string vw = "v"> {
+ def V : VALUrVV<funct6, OPFVV, opcodestr # "." # vw # "v">,
+ Sched<[WriteVFWMulAddV, ReadVFWMulAddV, ReadVFWMulAddV, ReadVMask]>;
+ def F : VALUrVF<funct6, OPFVF, opcodestr # "." # vw # "f">,
+ Sched<[WriteVFWMulAddF, ReadVFWMulAddV, ReadVFWMulAddF, ReadVMask]>;
+}
+
+multiclass VSQR_FV_VS2<string opcodestr, bits<6> funct6, bits<5> vs1> {
+ def "" : VALUVs2<funct6, vs1, OPFVV, opcodestr>,
+ Sched<[WriteVFSqrtV, ReadVFSqrtV, ReadVMask]>;
+}
+
+multiclass VRCP_FV_VS2<string opcodestr, bits<6> funct6, bits<5> vs1> {
+ def "" : VALUVs2<funct6, vs1, OPFVV, opcodestr>,
+ Sched<[WriteVFRecpV, ReadVFRecpV, ReadVMask]>;
+}
+
+multiclass VCMP_FV_V_F<string opcodestr, bits<6> funct6, string vw = "v"> {
+ def V : VALUVV<funct6, OPFVV, opcodestr # "." # vw # "v">,
+ Sched<[WriteVFCmpV, ReadVFCmpV, ReadVFCmpV, ReadVMask]>;
+ def F : VALUVF<funct6, OPFVF, opcodestr # "." # vw # "f">,
+ Sched<[WriteVFCmpF, ReadVFCmpV, ReadVFCmpF, ReadVMask]>;
+}
+
+multiclass VCMP_FV_F<string opcodestr, bits<6> funct6, string vw = "v"> {
+ def F : VALUVF<funct6, OPFVF, opcodestr # "." # vw # "f">,
+ Sched<[WriteVFCmpF, ReadVFCmpV, ReadVFCmpF, ReadVMask]>;
+}
+
+multiclass VSGNJ_FV_V_F<string opcodestr, bits<6> funct6, string vw = "v"> {
+ def V : VALUVV<funct6, OPFVV, opcodestr # "." # vw # "v">,
+ Sched<[WriteVFSgnjV, ReadVFSgnjV, ReadVFSgnjV, ReadVMask]>;
+ def F : VALUVF<funct6, OPFVF, opcodestr # "." # vw # "f">,
+ Sched<[WriteVFSgnjF, ReadVFSgnjV, ReadVFSgnjF, ReadVMask]>;
+}
+
+multiclass VCLS_FV_VS2<string opcodestr, bits<6> funct6, bits<5> vs1> {
+ def "" : VALUVs2<funct6, vs1, OPFVV, opcodestr>,
+ Sched<[WriteVFClassV, ReadVFClassV, ReadVMask]>;
+}
+
+multiclass VCVTF_IV_VS2<string opcodestr, bits<6> funct6, bits<5> vs1> {
+ def "" : VALUVs2<funct6, vs1, OPFVV, opcodestr>,
+ Sched<[WriteVFCvtIToFV, ReadVFCvtIToFV, ReadVMask]>;
+}
+
+multiclass VCVTI_FV_VS2<string opcodestr, bits<6> funct6, bits<5> vs1> {
+ def "" : VALUVs2<funct6, vs1, OPFVV, opcodestr>,
+ Sched<[WriteVFCvtFToIV, ReadVFCvtFToIV, ReadVMask]>;
+}
+
+multiclass VWCVTF_IV_VS2<string opcodestr, bits<6> funct6, bits<5> vs1> {
+ def "" : VALUVs2<funct6, vs1, OPFVV, opcodestr>,
+ Sched<[WriteVFWCvtIToFV, ReadVFWCvtIToFV, ReadVMask]>;
+}
+
+multiclass VWCVTI_FV_VS2<string opcodestr, bits<6> funct6, bits<5> vs1> {
+ def "" : VALUVs2<funct6, vs1, OPFVV, opcodestr>,
+ Sched<[WriteVFWCvtFToIV, ReadVFWCvtFToIV, ReadVMask]>;
+}
+
+multiclass VWCVTF_FV_VS2<string opcodestr, bits<6> funct6, bits<5> vs1> {
+ def "" : VALUVs2<funct6, vs1, OPFVV, opcodestr>,
+ Sched<[WriteVFWCvtFToFV, ReadVFWCvtFToFV, ReadVMask]>;
+}
+
+multiclass VNCVTF_IV_VS2<string opcodestr, bits<6> funct6, bits<5> vs1> {
+ def "" : VALUVs2<funct6, vs1, OPFVV, opcodestr>,
+ Sched<[WriteVFNCvtIToFV, ReadVFNCvtIToFV, ReadVMask]>;
+}
+
+multiclass VNCVTI_FV_VS2<string opcodestr, bits<6> funct6, bits<5> vs1> {
+ def "" : VALUVs2<funct6, vs1, OPFVV, opcodestr>,
+ Sched<[WriteVFNCvtFToIV, ReadVFNCvtFToIV, ReadVMask]>;
+}
+
+multiclass VNCVTF_FV_VS2<string opcodestr, bits<6> funct6, bits<5> vs1> {
+ def "" : VALUVs2<funct6, vs1, OPFVV, opcodestr>,
+ Sched<[WriteVFNCvtFToFV, ReadVFNCvtFToFV, ReadVMask]>;
+}
+
+multiclass VRED_MV_V<string opcodestr, bits<6> funct6> {
+ def _VS : VALUVV<funct6, OPMVV, opcodestr # ".vs">,
+ Sched<[WriteVIRedV, ReadVIRedV, ReadVIRedV0, ReadVMask]>;
+}
+
+multiclass VWRED_IV_V<string opcodestr, bits<6> funct6> {
+ def _VS : VALUVV<funct6, OPIVV, opcodestr # ".vs">,
+ Sched<[WriteVIWRedV, ReadVIWRedV, ReadVIWRedV0, ReadVMask]>;
+}
+
+multiclass VRED_FV_V<string opcodestr, bits<6> funct6> {
+ def _VS : VALUVV<funct6, OPFVV, opcodestr # ".vs">,
+ Sched<[WriteVFRedV, ReadVFRedV, ReadVFRedV0, ReadVMask]>;
+}
+
+multiclass VREDO_FV_V<string opcodestr, bits<6> funct6> {
+ def _VS : VALUVV<funct6, OPFVV, opcodestr # ".vs">,
+ Sched<[WriteVFRedOV, ReadVFRedOV, ReadVFRedOV0, ReadVMask]>;
+}
+
+multiclass VWRED_FV_V<string opcodestr, bits<6> funct6> {
+ def _VS : VALUVV<funct6, OPFVV, opcodestr # ".vs">,
+ Sched<[WriteVFWRedV, ReadVFWRedV, ReadVFWRedV0, ReadVMask]>;
+}
+
+multiclass VWREDO_FV_V<string opcodestr, bits<6> funct6> {
+ def _VS : VALUVV<funct6, OPFVV, opcodestr # ".vs">,
+ Sched<[WriteVFWRedOV, ReadVFWRedOV, ReadVFWRedOV0, ReadVMask]>;
}
-multiclass VALUr_FV_V_F<string opcodestr, bits<6> funct6, string vw = "v"> {
- def V : VALUrVV<funct6, OPFVV, opcodestr # "." # vw # "v">;
- def F : VALUrVF<funct6, OPFVF, opcodestr # "." # vw # "f">;
+multiclass VMALU_MV_Mask<string opcodestr, bits<6> funct6, string vm = "v"> {
+ def M : VALUVVNoVm<funct6, OPMVV, opcodestr # "." # vm # "m">,
+ Sched<[WriteVMALUV, ReadVMALUV, ReadVMALUV]>;
}
-multiclass VALU_FV_V<string opcodestr, bits<6> funct6> {
- def _VS : VALUVV<funct6, OPFVV, opcodestr # ".vs">;
+multiclass VMSFS_MV_V<string opcodestr, bits<6> funct6, bits<5> vs1> {
+ def "" : VALUVs2<funct6, vs1, OPMVV, opcodestr>,
+ Sched<[WriteVMSFSV, ReadVMSFSV, ReadVMask]>;
}
-multiclass VALU_FV_VS2<string opcodestr, bits<6> funct6, bits<5> vs1> {
- def "" : VALUVs2<funct6, vs1, OPFVV, opcodestr>;
+multiclass VMIOT_MV_V<string opcodestr, bits<6> funct6, bits<5> vs1> {
+ def "" : VALUVs2<funct6, vs1, OPMVV, opcodestr>,
+ Sched<[WriteVMIotV, ReadVMIotV, ReadVMask]>;
+}
+
+multiclass VSHT_IV_V_X_I<string opcodestr, bits<6> funct6, Operand optype = simm5, string vw = "v"> {
+ def V : VALUVV<funct6, OPIVV, opcodestr # "." # vw # "v">,
+ Sched<[WriteVShiftV, ReadVShiftV, ReadVShiftV, ReadVMask]>;
+ def X : VALUVX<funct6, OPIVX, opcodestr # "." # vw # "x">,
+ Sched<[WriteVShiftX, ReadVShiftV, ReadVShiftX, ReadVMask]>;
+ def I : VALUVI<funct6, opcodestr # "." # vw # "i", optype>,
+ Sched<[WriteVShiftI, ReadVShiftV, ReadVMask]>;
+}
+
+multiclass VNSHT_IV_V_X_I<string opcodestr, bits<6> funct6, Operand optype = simm5, string vw = "v"> {
+ def V : VALUVV<funct6, OPIVV, opcodestr # "." # vw # "v">,
+ Sched<[WriteVNShiftV, ReadVNShiftV, ReadVNShiftV, ReadVMask]>;
+ def X : VALUVX<funct6, OPIVX, opcodestr # "." # vw # "x">,
+ Sched<[WriteVNShiftX, ReadVNShiftV, ReadVNShiftX, ReadVMask]>;
+ def I : VALUVI<funct6, opcodestr # "." # vw # "i", optype>,
+ Sched<[WriteVNShiftI, ReadVNShiftV, ReadVMask]>;
+}
+
+multiclass VCMP_IV_V_X_I<string opcodestr, bits<6> funct6, Operand optype = simm5, string vw = "v"> {
+ def V : VALUVV<funct6, OPIVV, opcodestr # "." # vw # "v">,
+ Sched<[WriteVICmpV, ReadVICmpV, ReadVICmpV, ReadVMask]>;
+ def X : VALUVX<funct6, OPIVX, opcodestr # "." # vw # "x">,
+ Sched<[WriteVICmpX, ReadVICmpV, ReadVICmpX, ReadVMask]>;
+ def I : VALUVI<funct6, opcodestr # "." # vw # "i", optype>,
+ Sched<[WriteVICmpI, ReadVICmpV, ReadVMask]>;
+}
+
+multiclass VCMP_IV_X_I<string opcodestr, bits<6> funct6, Operand optype = simm5, string vw = "v"> {
+ def X : VALUVX<funct6, OPIVX, opcodestr # "." # vw # "x">,
+ Sched<[WriteVICmpV, ReadVICmpV, ReadVICmpX, ReadVMask]>;
+ def I : VALUVI<funct6, opcodestr # "." # vw # "i", optype>,
+ Sched<[WriteVICmpI, ReadVICmpV, ReadVMask]>;
+}
+
+multiclass VCMP_IV_V_X<string opcodestr, bits<6> funct6, Operand optype = simm5, string vw = "v"> {
+ def V : VALUVV<funct6, OPIVV, opcodestr # "." # vw # "v">,
+ Sched<[WriteVICmpV, ReadVICmpV, ReadVICmpV, ReadVMask]>;
+ def X : VALUVX<funct6, OPIVX, opcodestr # "." # vw # "x">,
+ Sched<[WriteVICmpX, ReadVICmpV, ReadVICmpX, ReadVMask]>;
+}
+
+multiclass VMUL_MV_V_X<string opcodestr, bits<6> funct6, string vw = "v"> {
+ def V : VALUVV<funct6, OPMVV, opcodestr # "." # vw # "v">,
+ Sched<[WriteVIMulV, ReadVIMulV, ReadVIMulV, ReadVMask]>;
+ def X : VALUVX<funct6, OPMVX, opcodestr # "." # vw # "x">,
+ Sched<[WriteVIMulX, ReadVIMulV, ReadVIMulX, ReadVMask]>;
+}
+
+multiclass VWMUL_MV_V_X<string opcodestr, bits<6> funct6, string vw = "v"> {
+ def V : VALUVV<funct6, OPMVV, opcodestr # "." # vw # "v">,
+ Sched<[WriteVIWMulV, ReadVIWMulV, ReadVIWMulV, ReadVMask]>;
+ def X : VALUVX<funct6, OPMVX, opcodestr # "." # vw # "x">,
+ Sched<[WriteVIWMulX, ReadVIWMulV, ReadVIWMulX, ReadVMask]>;
+}
+
+multiclass VDIV_MV_V_X<string opcodestr, bits<6> funct6, string vw = "v"> {
+ def V : VALUVV<funct6, OPMVV, opcodestr # "." # vw # "v">,
+ Sched<[WriteVIDivV, ReadVIDivV, ReadVIDivV, ReadVMask]>;
+ def X : VALUVX<funct6, OPMVX, opcodestr # "." # vw # "x">,
+ Sched<[WriteVIDivX, ReadVIDivV, ReadVIDivX, ReadVMask]>;
+}
+
+multiclass VSALU_IV_V_X_I<string opcodestr, bits<6> funct6, Operand optype = simm5, string vw = "v"> {
+ def V : VALUVV<funct6, OPIVV, opcodestr # "." # vw # "v">,
+ Sched<[WriteVSALUV, ReadVSALUV, ReadVSALUV, ReadVMask]>;
+ def X : VALUVX<funct6, OPIVX, opcodestr # "." # vw # "x">,
+ Sched<[WriteVSALUX, ReadVSALUV, ReadVSALUX, ReadVMask]>;
+ def I : VALUVI<funct6, opcodestr # "." # vw # "i", optype>,
+ Sched<[WriteVSALUI, ReadVSALUV, ReadVMask]>;
+}
+
+multiclass VSALU_IV_V_X<string opcodestr, bits<6> funct6, string vw = "v"> {
+ def V : VALUVV<funct6, OPIVV, opcodestr # "." # vw # "v">,
+ Sched<[WriteVSALUV, ReadVSALUV, ReadVSALUV, ReadVMask]>;
+ def X : VALUVX<funct6, OPIVX, opcodestr # "." # vw # "x">,
+ Sched<[WriteVSALUX, ReadVSALUV, ReadVSALUX, ReadVMask]>;
+}
+
+multiclass VAALU_MV_V_X<string opcodestr, bits<6> funct6, string vw = "v"> {
+ def V : VALUVV<funct6, OPMVV, opcodestr # "." # vw # "v">,
+ Sched<[WriteVAALUV, ReadVAALUV, ReadVAALUV, ReadVMask]>;
+ def X : VALUVX<funct6, OPMVX, opcodestr # "." # vw # "x">,
+ Sched<[WriteVAALUX, ReadVAALUV, ReadVAALUX, ReadVMask]>;
+}
+
+multiclass VSMUL_IV_V_X<string opcodestr, bits<6> funct6, string vw = "v"> {
+ def V : VALUVV<funct6, OPIVV, opcodestr # "." # vw # "v">,
+ Sched<[WriteVSMulV, ReadVSMulV, ReadVSMulV, ReadVMask]>;
+ def X : VALUVX<funct6, OPIVX, opcodestr # "." # vw # "x">,
+ Sched<[WriteVSMulX, ReadVSMulV, ReadVSMulX, ReadVMask]>;
+}
+
+multiclass VSSHF_IV_V_X_I<string opcodestr, bits<6> funct6, Operand optype = simm5, string vw = "v"> {
+ def V : VALUVV<funct6, OPIVV, opcodestr # "." # vw # "v">,
+ Sched<[WriteVSShiftV, ReadVSShiftV, ReadVSShiftV, ReadVMask]>;
+ def X : VALUVX<funct6, OPIVX, opcodestr # "." # vw # "x">,
+ Sched<[WriteVSShiftX, ReadVSShiftV, ReadVSShiftX, ReadVMask]>;
+ def I : VALUVI<funct6, opcodestr # "." # vw # "i", optype>,
+ Sched<[WriteVSShiftI, ReadVSShiftV, ReadVMask]>;
+}
+
+multiclass VNCLP_IV_V_X_I<string opcodestr, bits<6> funct6, Operand optype = simm5, string vw = "v"> {
+ def V : VALUVV<funct6, OPIVV, opcodestr # "." # vw # "v">,
+ Sched<[WriteVNClipV, ReadVNClipV, ReadVNClipV, ReadVMask]>;
+ def X : VALUVX<funct6, OPIVX, opcodestr # "." # vw # "x">,
+ Sched<[WriteVNClipX, ReadVNClipV, ReadVNClipX, ReadVMask]>;
+ def I : VALUVI<funct6, opcodestr # "." # vw # "i", optype>,
+ Sched<[WriteVNClipI, ReadVNClipV, ReadVMask]>;
+}
+
+multiclass VSLD_IV_X_I<string opcodestr, bits<6> funct6, Operand optype = simm5, string vw = "v"> {
+ def X : VALUVX<funct6, OPIVX, opcodestr # "." # vw # "x">,
+ Sched<[WriteVISlideX, ReadVISlideV, ReadVISlideX, ReadVMask]>;
+ def I : VALUVI<funct6, opcodestr # "." # vw # "i", optype>,
+ Sched<[WriteVISlideI, ReadVISlideV, ReadVMask]>;
+}
+
+multiclass VSLD1_MV_X<string opcodestr, bits<6> funct6, string vw = "v"> {
+ def X : VALUVX<funct6, OPMVX, opcodestr # "." # vw # "x">,
+ Sched<[WriteVISlide1X, ReadVISlideV, ReadVISlideX, ReadVMask]>;
+}
+
+multiclass VSLD1_FV_F<string opcodestr, bits<6> funct6, string vw = "v"> {
+ def F : VALUVF<funct6, OPFVF, opcodestr # "." # vw # "f">,
+ Sched<[WriteVFSlide1F, ReadVFSlideV, ReadVFSlideF, ReadVMask]>;
+}
+
+multiclass VGTR_IV_V_X_I<string opcodestr, bits<6> funct6, Operand optype = simm5, string vw = "v"> {
+ def V : VALUVV<funct6, OPIVV, opcodestr # "." # vw # "v">,
+ Sched<[WriteVGatherV, ReadVGatherV, ReadVGatherV, ReadVMask]>;
+ def X : VALUVX<funct6, OPIVX, opcodestr # "." # vw # "x">,
+ Sched<[WriteVGatherX, ReadVGatherV, ReadVGatherX, ReadVMask]>;
+ def I : VALUVI<funct6, opcodestr # "." # vw # "i", optype>,
+ Sched<[WriteVGatherI, ReadVGatherV, ReadVMask]>;
+}
+
+multiclass VCPR_MV_Mask<string opcodestr, bits<6> funct6, string vm = "v"> {
+ def M : VALUVVNoVm<funct6, OPMVV, opcodestr # "." # vm # "m">,
+ Sched<[WriteVCompressV, ReadVCompressV, ReadVCompressV]>;
}
multiclass VAMO<RISCVAMOOP amoop, RISCVWidth width, string opcodestr> {
@@ -435,11 +779,48 @@ multiclass VAMO<RISCVAMOOP amoop, RISCVWidth width, string opcodestr> {
def _UNWD : VAMONoWd<amoop, width, opcodestr>;
}
-multiclass VWholeLoad<bits<3> nf, string opcodestr, RegisterClass VRC> {
- def E8_V : VWholeLoad<nf, LSWidth8, opcodestr # "e8.v", VRC>;
- def E16_V : VWholeLoad<nf, LSWidth16, opcodestr # "e16.v", VRC>;
- def E32_V : VWholeLoad<nf, LSWidth32, opcodestr # "e32.v", VRC>;
- def E64_V : VWholeLoad<nf, LSWidth64, opcodestr # "e64.v", VRC>;
+multiclass VWholeLoad1<string opcodestr, RegisterClass VRC> {
+ def E8_V : VWholeLoad<0, LSWidth8, opcodestr # "e8.v", VRC>,
+ Sched<[WriteVLD1R8, ReadVLDX]>;
+ def E16_V : VWholeLoad<0, LSWidth16, opcodestr # "e16.v", VRC>,
+ Sched<[WriteVLD1R16, ReadVLDX]>;
+ def E32_V : VWholeLoad<0, LSWidth32, opcodestr # "e32.v", VRC>,
+ Sched<[WriteVLD1R32, ReadVLDX]>;
+ def E64_V : VWholeLoad<0, LSWidth64, opcodestr # "e64.v", VRC>,
+ Sched<[WriteVLD1R64, ReadVLDX]>;
+}
+
+multiclass VWholeLoad2<string opcodestr, RegisterClass VRC> {
+ def E8_V : VWholeLoad<1, LSWidth8, opcodestr # "e8.v", VRC>,
+ Sched<[WriteVLD2R8, ReadVLDX]>;
+ def E16_V : VWholeLoad<1, LSWidth16, opcodestr # "e16.v", VRC>,
+ Sched<[WriteVLD2R16, ReadVLDX]>;
+ def E32_V : VWholeLoad<1, LSWidth32, opcodestr # "e32.v", VRC>,
+ Sched<[WriteVLD2R32, ReadVLDX]>;
+ def E64_V : VWholeLoad<1, LSWidth64, opcodestr # "e64.v", VRC>,
+ Sched<[WriteVLD2R64, ReadVLDX]>;
+}
+
+multiclass VWholeLoad4<string opcodestr, RegisterClass VRC> {
+ def E8_V : VWholeLoad<3, LSWidth8, opcodestr # "e8.v", VRC>,
+ Sched<[WriteVLD4R8, ReadVLDX]>;
+ def E16_V : VWholeLoad<3, LSWidth16, opcodestr # "e16.v", VRC>,
+ Sched<[WriteVLD4R16, ReadVLDX]>;
+ def E32_V : VWholeLoad<3, LSWidth32, opcodestr # "e32.v", VRC>,
+ Sched<[WriteVLD4R32, ReadVLDX]>;
+ def E64_V : VWholeLoad<3, LSWidth64, opcodestr # "e64.v", VRC>,
+ Sched<[WriteVLD1R64, ReadVLDX]>;
+}
+
+multiclass VWholeLoad8<string opcodestr, RegisterClass VRC> {
+ def E8_V : VWholeLoad<7, LSWidth8, opcodestr # "e8.v", VRC>,
+ Sched<[WriteVLD8R8, ReadVLDX]>;
+ def E16_V : VWholeLoad<7, LSWidth16, opcodestr # "e16.v", VRC>,
+ Sched<[WriteVLD8R16, ReadVLDX]>;
+ def E32_V : VWholeLoad<7, LSWidth32, opcodestr # "e32.v", VRC>,
+ Sched<[WriteVLD8R32, ReadVLDX]>;
+ def E64_V : VWholeLoad<7, LSWidth64, opcodestr # "e64.v", VRC>,
+ Sched<[WriteVLD8R64, ReadVLDX]>;
}
//===----------------------------------------------------------------------===//
@@ -459,69 +840,94 @@ def VSETVL : RVInstSetVL<(outs GPR:$rd), (ins GPR:$rs1, GPR:$rs2),
} // hasSideEffects = 1, mayLoad = 0, mayStore = 0
// Vector Unit-Stride Instructions
-def VLE8_V : VUnitStrideLoad<LUMOPUnitStride, LSWidth8, "vle8.v">;
-def VLE16_V : VUnitStrideLoad<LUMOPUnitStride, LSWidth16, "vle16.v">;
-def VLE32_V : VUnitStrideLoad<LUMOPUnitStride, LSWidth32, "vle32.v">;
-def VLE64_V : VUnitStrideLoad<LUMOPUnitStride, LSWidth64, "vle64.v">;
-
-def VLE8FF_V : VUnitStrideLoad<LUMOPUnitStrideFF, LSWidth8, "vle8ff.v">;
-def VLE16FF_V : VUnitStrideLoad<LUMOPUnitStrideFF, LSWidth16, "vle16ff.v">;
-def VLE32FF_V : VUnitStrideLoad<LUMOPUnitStrideFF, LSWidth32, "vle32ff.v">;
-def VLE64FF_V : VUnitStrideLoad<LUMOPUnitStrideFF, LSWidth64, "vle64ff.v">;
-
-def VLE1_V : VUnitStrideLoadMask<"vle1.v">;
-def VSE1_V : VUnitStrideStoreMask<"vse1.v">;
-
-def VSE8_V : VUnitStrideStore<SUMOPUnitStride, LSWidth8, "vse8.v">;
-def VSE16_V : VUnitStrideStore<SUMOPUnitStride, LSWidth16, "vse16.v">;
-def VSE32_V : VUnitStrideStore<SUMOPUnitStride, LSWidth32, "vse32.v">;
-def VSE64_V : VUnitStrideStore<SUMOPUnitStride, LSWidth64, "vse64.v">;
+def VLE8_V : VUnitStrideLoad<LUMOPUnitStride, LSWidth8, "vle8.v">,
+ VLESched<8>;
+def VLE16_V : VUnitStrideLoad<LUMOPUnitStride, LSWidth16, "vle16.v">,
+ VLESched<16>;
+def VLE32_V : VUnitStrideLoad<LUMOPUnitStride, LSWidth32, "vle32.v">,
+ VLESched<32>;
+def VLE64_V : VUnitStrideLoad<LUMOPUnitStride, LSWidth64, "vle64.v">,
+ VLESched<64>;
+
+def VLE8FF_V : VUnitStrideLoad<LUMOPUnitStrideFF, LSWidth8, "vle8ff.v">,
+ VLFSched<8>;
+def VLE16FF_V : VUnitStrideLoad<LUMOPUnitStrideFF, LSWidth16, "vle16ff.v">,
+ VLFSched<16>;
+def VLE32FF_V : VUnitStrideLoad<LUMOPUnitStrideFF, LSWidth32, "vle32ff.v">,
+ VLFSched<32>;
+def VLE64FF_V : VUnitStrideLoad<LUMOPUnitStrideFF, LSWidth64, "vle64ff.v">,
+ VLFSched<64>;
+
+def VLE1_V : VUnitStrideLoadMask<"vle1.v">,
+ Sched<[WriteVLDM, ReadVLDX]>;
+def VSE1_V : VUnitStrideStoreMask<"vse1.v">,
+ Sched<[WriteVSTM, ReadVSTM, ReadVSTX]>;
+
+def VSE8_V : VUnitStrideStore<SUMOPUnitStride, LSWidth8, "vse8.v">,
+ VSESched<8>;
+def VSE16_V : VUnitStrideStore<SUMOPUnitStride, LSWidth16, "vse16.v">,
+ VSESched<16>;
+def VSE32_V : VUnitStrideStore<SUMOPUnitStride, LSWidth32, "vse32.v">,
+ VSESched<32>;
+def VSE64_V : VUnitStrideStore<SUMOPUnitStride, LSWidth64, "vse64.v">,
+ VSESched<64>;
// Vector Strided Instructions
-def VLSE8_V : VStridedLoad<LSWidth8, "vlse8.v">;
-def VLSE16_V : VStridedLoad<LSWidth16, "vlse16.v">;
-def VLSE32_V : VStridedLoad<LSWidth32, "vlse32.v">;
-def VLSE64_V : VStridedLoad<LSWidth64, "vlse64.v">;
-
-def VSSE8_V : VStridedStore<LSWidth8, "vsse8.v">;
-def VSSE16_V : VStridedStore<LSWidth16, "vsse16.v">;
-def VSSE32_V : VStridedStore<LSWidth32, "vsse32.v">;
-def VSSE64_V : VStridedStore<LSWidth64, "vsse64.v">;
+def VLSE8_V : VStridedLoad<LSWidth8, "vlse8.v">,
+ VLSSched<8>;
+def VLSE16_V : VStridedLoad<LSWidth16, "vlse16.v">,
+ VLSSched<16>;
+def VLSE32_V : VStridedLoad<LSWidth32, "vlse32.v">,
+ VLSSched<32>;
+def VLSE64_V : VStridedLoad<LSWidth64, "vlse64.v">,
+ VLSSched<32>;
+
+def VSSE8_V : VStridedStore<LSWidth8, "vsse8.v">,
+ VSSSched<8>;
+def VSSE16_V : VStridedStore<LSWidth16, "vsse16.v">,
+ VSSSched<16>;
+def VSSE32_V : VStridedStore<LSWidth32, "vsse32.v">,
+ VSSSched<32>;
+def VSSE64_V : VStridedStore<LSWidth64, "vsse64.v">,
+ VSSSched<64>;
// Vector Indexed Instructions
-def VLUXEI8_V : VIndexedLoad<MOPLDIndexedUnord, LSWidth8, "vluxei8.v">;
-def VLUXEI16_V : VIndexedLoad<MOPLDIndexedUnord, LSWidth16, "vluxei16.v">;
-def VLUXEI32_V : VIndexedLoad<MOPLDIndexedUnord, LSWidth32, "vluxei32.v">;
-def VLUXEI64_V : VIndexedLoad<MOPLDIndexedUnord, LSWidth64, "vluxei64.v">;
-
-def VLOXEI8_V : VIndexedLoad<MOPLDIndexedOrder, LSWidth8, "vloxei8.v">;
-def VLOXEI16_V : VIndexedLoad<MOPLDIndexedOrder, LSWidth16, "vloxei16.v">;
-def VLOXEI32_V : VIndexedLoad<MOPLDIndexedOrder, LSWidth32, "vloxei32.v">;
-def VLOXEI64_V : VIndexedLoad<MOPLDIndexedOrder, LSWidth64, "vloxei64.v">;
-
-def VSUXEI8_V : VIndexedStore<MOPSTIndexedUnord, LSWidth8, "vsuxei8.v">;
-def VSUXEI16_V : VIndexedStore<MOPSTIndexedUnord, LSWidth16, "vsuxei16.v">;
-def VSUXEI32_V : VIndexedStore<MOPSTIndexedUnord, LSWidth32, "vsuxei32.v">;
-def VSUXEI64_V : VIndexedStore<MOPSTIndexedUnord, LSWidth64, "vsuxei64.v">;
-
-def VSOXEI8_V : VIndexedStore<MOPSTIndexedOrder, LSWidth8, "vsoxei8.v">;
-def VSOXEI16_V : VIndexedStore<MOPSTIndexedOrder, LSWidth16, "vsoxei16.v">;
-def VSOXEI32_V : VIndexedStore<MOPSTIndexedOrder, LSWidth32, "vsoxei32.v">;
-def VSOXEI64_V : VIndexedStore<MOPSTIndexedOrder, LSWidth64, "vsoxei64.v">;
-
-defm VL1R : VWholeLoad<0, "vl1r", VR>;
-defm VL2R : VWholeLoad<1, "vl2r", VRM2>;
-defm VL4R : VWholeLoad<3, "vl4r", VRM4>;
-defm VL8R : VWholeLoad<7, "vl8r", VRM8>;
+foreach n = [8, 16, 32, 64] in {
+defvar w = !cast<RISCVWidth>("LSWidth" # n);
+
+def VLUXEI # n # _V :
+ VIndexedLoad<MOPLDIndexedUnord, w, "vluxei" # n # ".v">,
+ VLXSched<n, "U">;
+def VLOXEI # n # _V :
+ VIndexedLoad<MOPLDIndexedOrder, w, "vloxei" # n # ".v">,
+ VLXSched<n, "O">;
+
+def VSUXEI # n # _V :
+ VIndexedStore<MOPSTIndexedUnord, w, "vsuxei" # n # ".v">,
+ VSXSched<n, "U">;
+def VSOXEI # n # _V :
+ VIndexedStore<MOPSTIndexedOrder, w, "vsoxei" # n # ".v">,
+ VSXSched<n, "O">;
+}
+
+defm VL1R : VWholeLoad1<"vl1r", VR>;
+defm VL2R : VWholeLoad2<"vl2r", VRM2>;
+defm VL4R : VWholeLoad4<"vl4r", VRM4>;
+defm VL8R : VWholeLoad8<"vl8r", VRM8>;
+
def : InstAlias<"vl1r.v $vd, (${rs1})", (VL1RE8_V VR:$vd, GPR:$rs1)>;
def : InstAlias<"vl2r.v $vd, (${rs1})", (VL2RE8_V VRM2:$vd, GPR:$rs1)>;
def : InstAlias<"vl4r.v $vd, (${rs1})", (VL4RE8_V VRM4:$vd, GPR:$rs1)>;
def : InstAlias<"vl8r.v $vd, (${rs1})", (VL8RE8_V VRM8:$vd, GPR:$rs1)>;
-def VS1R_V : VWholeStore<0, "vs1r.v", VR>;
-def VS2R_V : VWholeStore<1, "vs2r.v", VRM2>;
-def VS4R_V : VWholeStore<3, "vs4r.v", VRM4>;
-def VS8R_V : VWholeStore<7, "vs8r.v", VRM8>;
+def VS1R_V : VWholeStore<0, "vs1r.v", VR>,
+ Sched<[WriteVST1R, ReadVST1R, ReadVSTX]>;
+def VS2R_V : VWholeStore<1, "vs2r.v", VRM2>,
+ Sched<[WriteVST2R, ReadVST2R, ReadVSTX]>;
+def VS4R_V : VWholeStore<3, "vs4r.v", VRM4>,
+ Sched<[WriteVST4R, ReadVST4R, ReadVSTX]>;
+def VS8R_V : VWholeStore<7, "vs8r.v", VRM8>,
+ Sched<[WriteVST8R, ReadVST8R, ReadVSTX]>;
// Vector Single-Width Integer Add and Subtract
defm VADD_V : VALU_IV_V_X_I<"vadd", 0b000000>;
@@ -588,9 +994,9 @@ def : InstAlias<"vnot.v $vd, $vs$vm",
(VXOR_VI VR:$vd, VR:$vs, -1, VMaskOp:$vm)>;
// Vector Single-Width Bit Shift Instructions
-defm VSLL_V : VALU_IV_V_X_I<"vsll", 0b100101, uimm5>;
-defm VSRL_V : VALU_IV_V_X_I<"vsrl", 0b101000, uimm5>;
-defm VSRA_V : VALU_IV_V_X_I<"vsra", 0b101001, uimm5>;
+defm VSLL_V : VSHT_IV_V_X_I<"vsll", 0b100101, uimm5>;
+defm VSRL_V : VSHT_IV_V_X_I<"vsrl", 0b101000, uimm5>;
+defm VSRA_V : VSHT_IV_V_X_I<"vsra", 0b101001, uimm5>;
// Vector Narrowing Integer Right Shift Instructions
// Refer to 11.3. Narrowing Vector Arithmetic Instructions
@@ -598,8 +1004,8 @@ defm VSRA_V : VALU_IV_V_X_I<"vsra", 0b101001, uimm5>;
// vector register group (specified by vs2). The destination vector register
// group cannot overlap the mask register if used, unless LMUL=1.
let Constraints = "@earlyclobber $vd" in {
-defm VNSRL_W : VALU_IV_V_X_I<"vnsrl", 0b101100, uimm5, "w">;
-defm VNSRA_W : VALU_IV_V_X_I<"vnsra", 0b101101, uimm5, "w">;
+defm VNSRL_W : VNSHT_IV_V_X_I<"vnsrl", 0b101100, uimm5, "w">;
+defm VNSRA_W : VNSHT_IV_V_X_I<"vnsra", 0b101101, uimm5, "w">;
} // Constraints = "@earlyclobber $vd"
def : InstAlias<"vncvt.x.x.w $vd, $vs$vm",
@@ -607,14 +1013,14 @@ def : InstAlias<"vncvt.x.x.w $vd, $vs$vm",
// Vector Integer Comparison Instructions
let RVVConstraint = NoConstraint in {
-defm VMSEQ_V : VALU_IV_V_X_I<"vmseq", 0b011000>;
-defm VMSNE_V : VALU_IV_V_X_I<"vmsne", 0b011001>;
-defm VMSLTU_V : VALU_IV_V_X<"vmsltu", 0b011010>;
-defm VMSLT_V : VALU_IV_V_X<"vmslt", 0b011011>;
-defm VMSLEU_V : VALU_IV_V_X_I<"vmsleu", 0b011100>;
-defm VMSLE_V : VALU_IV_V_X_I<"vmsle", 0b011101>;
-defm VMSGTU_V : VALU_IV_X_I<"vmsgtu", 0b011110>;
-defm VMSGT_V : VALU_IV_X_I<"vmsgt", 0b011111>;
+defm VMSEQ_V : VCMP_IV_V_X_I<"vmseq", 0b011000>;
+defm VMSNE_V : VCMP_IV_V_X_I<"vmsne", 0b011001>;
+defm VMSLTU_V : VCMP_IV_V_X<"vmsltu", 0b011010>;
+defm VMSLT_V : VCMP_IV_V_X<"vmslt", 0b011011>;
+defm VMSLEU_V : VCMP_IV_V_X_I<"vmsleu", 0b011100>;
+defm VMSLE_V : VCMP_IV_V_X_I<"vmsle", 0b011101>;
+defm VMSGTU_V : VCMP_IV_X_I<"vmsgtu", 0b011110>;
+defm VMSGT_V : VCMP_IV_X_I<"vmsgt", 0b011111>;
} // RVVConstraint = NoConstraint
def : InstAlias<"vmsgtu.vv $vd, $va, $vb$vm",
@@ -672,84 +1078,87 @@ def PseudoVMSGE_VX_M_T : Pseudo<(outs VR:$vd, VRNoV0:$scratch),
}
// Vector Integer Min/Max Instructions
-defm VMINU_V : VALU_IV_V_X<"vminu", 0b000100>;
-defm VMIN_V : VALU_IV_V_X<"vmin", 0b000101>;
-defm VMAXU_V : VALU_IV_V_X<"vmaxu", 0b000110>;
-defm VMAX_V : VALU_IV_V_X<"vmax", 0b000111>;
+defm VMINU_V : VCMP_IV_V_X<"vminu", 0b000100>;
+defm VMIN_V : VCMP_IV_V_X<"vmin", 0b000101>;
+defm VMAXU_V : VCMP_IV_V_X<"vmaxu", 0b000110>;
+defm VMAX_V : VCMP_IV_V_X<"vmax", 0b000111>;
// Vector Single-Width Integer Multiply Instructions
-defm VMUL_V : VALU_MV_V_X<"vmul", 0b100101>;
-defm VMULH_V : VALU_MV_V_X<"vmulh", 0b100111>;
-defm VMULHU_V : VALU_MV_V_X<"vmulhu", 0b100100>;
-defm VMULHSU_V : VALU_MV_V_X<"vmulhsu", 0b100110>;
+defm VMUL_V : VMUL_MV_V_X<"vmul", 0b100101>;
+defm VMULH_V : VMUL_MV_V_X<"vmulh", 0b100111>;
+defm VMULHU_V : VMUL_MV_V_X<"vmulhu", 0b100100>;
+defm VMULHSU_V : VMUL_MV_V_X<"vmulhsu", 0b100110>;
// Vector Integer Divide Instructions
-defm VDIVU_V : VALU_MV_V_X<"vdivu", 0b100000>;
-defm VDIV_V : VALU_MV_V_X<"vdiv", 0b100001>;
-defm VREMU_V : VALU_MV_V_X<"vremu", 0b100010>;
-defm VREM_V : VALU_MV_V_X<"vrem", 0b100011>;
+defm VDIVU_V : VDIV_MV_V_X<"vdivu", 0b100000>;
+defm VDIV_V : VDIV_MV_V_X<"vdiv", 0b100001>;
+defm VREMU_V : VDIV_MV_V_X<"vremu", 0b100010>;
+defm VREM_V : VDIV_MV_V_X<"vrem", 0b100011>;
// Vector Widening Integer Multiply Instructions
let Constraints = "@earlyclobber $vd", RVVConstraint = WidenV in {
-defm VWMUL_V : VALU_MV_V_X<"vwmul", 0b111011>;
-defm VWMULU_V : VALU_MV_V_X<"vwmulu", 0b111000>;
-defm VWMULSU_V : VALU_MV_V_X<"vwmulsu", 0b111010>;
+defm VWMUL_V : VWMUL_MV_V_X<"vwmul", 0b111011>;
+defm VWMULU_V : VWMUL_MV_V_X<"vwmulu", 0b111000>;
+defm VWMULSU_V : VWMUL_MV_V_X<"vwmulsu", 0b111010>;
} // Constraints = "@earlyclobber $vd", RVVConstraint = WidenV
// Vector Single-Width Integer Multiply-Add Instructions
-defm VMACC_V : VALUr_MV_V_X<"vmacc", 0b101101>;
-defm VNMSAC_V : VALUr_MV_V_X<"vnmsac", 0b101111>;
-defm VMADD_V : VALUr_MV_V_X<"vmadd", 0b101001>;
-defm VNMSUB_V : VALUr_MV_V_X<"vnmsub", 0b101011>;
+defm VMACC_V : VMAC_MV_V_X<"vmacc", 0b101101>;
+defm VNMSAC_V : VMAC_MV_V_X<"vnmsac", 0b101111>;
+defm VMADD_V : VMAC_MV_V_X<"vmadd", 0b101001>;
+defm VNMSUB_V : VMAC_MV_V_X<"vnmsub", 0b101011>;
// Vector Widening Integer Multiply-Add Instructions
let Constraints = "@earlyclobber $vd", RVVConstraint = WidenV in {
-defm VWMACCU_V : VALUr_MV_V_X<"vwmaccu", 0b111100>;
-defm VWMACC_V : VALUr_MV_V_X<"vwmacc", 0b111101>;
-defm VWMACCSU_V : VALUr_MV_V_X<"vwmaccsu", 0b111111>;
-defm VWMACCUS_V : VALUr_MV_X<"vwmaccus", 0b111110>;
+defm VWMACCU_V : VWMAC_MV_V_X<"vwmaccu", 0b111100>;
+defm VWMACC_V : VWMAC_MV_V_X<"vwmacc", 0b111101>;
+defm VWMACCSU_V : VWMAC_MV_V_X<"vwmaccsu", 0b111111>;
+defm VWMACCUS_V : VWMAC_MV_X<"vwmaccus", 0b111110>;
} // Constraints = "@earlyclobber $vd", RVVConstraint = WidenV
// Vector Integer Merge Instructions
-defm VMERGE_V : VALUm_IV_V_X_I<"vmerge", 0b010111>;
+defm VMERGE_V : VMRG_IV_V_X_I<"vmerge", 0b010111>;
// Vector Integer Move Instructions
let hasSideEffects = 0, mayLoad = 0, mayStore = 0, vs2 = 0, vm = 1,
RVVConstraint = NoConstraint in {
// op vd, vs1
def VMV_V_V : RVInstVV<0b010111, OPIVV, (outs VR:$vd),
- (ins VR:$vs1), "vmv.v.v", "$vd, $vs1">;
+ (ins VR:$vs1), "vmv.v.v", "$vd, $vs1">,
+ Sched<[WriteVIMovV, ReadVIMovV]>;
// op vd, rs1
def VMV_V_X : RVInstVX<0b010111, OPIVX, (outs VR:$vd),
- (ins GPR:$rs1), "vmv.v.x", "$vd, $rs1">;
+ (ins GPR:$rs1), "vmv.v.x", "$vd, $rs1">,
+ Sched<[WriteVIMovX, ReadVIMovX]>;
// op vd, imm
def VMV_V_I : RVInstIVI<0b010111, (outs VR:$vd),
- (ins simm5:$imm), "vmv.v.i", "$vd, $imm">;
+ (ins simm5:$imm), "vmv.v.i", "$vd, $imm">,
+ Sched<[WriteVIMovI]>;
} // hasSideEffects = 0, mayLoad = 0, mayStore = 0
// Vector Fixed-Point Arithmetic Instructions
-defm VSADDU_V : VALU_IV_V_X_I<"vsaddu", 0b100000>;
-defm VSADD_V : VALU_IV_V_X_I<"vsadd", 0b100001>;
-defm VSSUBU_V : VALU_IV_V_X<"vssubu", 0b100010>;
-defm VSSUB_V : VALU_IV_V_X<"vssub", 0b100011>;
+defm VSADDU_V : VSALU_IV_V_X_I<"vsaddu", 0b100000>;
+defm VSADD_V : VSALU_IV_V_X_I<"vsadd", 0b100001>;
+defm VSSUBU_V : VSALU_IV_V_X<"vssubu", 0b100010>;
+defm VSSUB_V : VSALU_IV_V_X<"vssub", 0b100011>;
// Vector Single-Width Averaging Add and Subtract
-defm VAADDU_V : VALU_MV_V_X<"vaaddu", 0b001000>;
-defm VAADD_V : VALU_MV_V_X<"vaadd", 0b001001>;
-defm VASUBU_V : VALU_MV_V_X<"vasubu", 0b001010>;
-defm VASUB_V : VALU_MV_V_X<"vasub", 0b001011>;
+defm VAADDU_V : VAALU_MV_V_X<"vaaddu", 0b001000>;
+defm VAADD_V : VAALU_MV_V_X<"vaadd", 0b001001>;
+defm VASUBU_V : VAALU_MV_V_X<"vasubu", 0b001010>;
+defm VASUB_V : VAALU_MV_V_X<"vasub", 0b001011>;
// Vector Single-Width Fractional Multiply with Rounding and Saturation
-defm VSMUL_V : VALU_IV_V_X<"vsmul", 0b100111>;
+defm VSMUL_V : VSMUL_IV_V_X<"vsmul", 0b100111>;
// Vector Single-Width Scaling Shift Instructions
-defm VSSRL_V : VALU_IV_V_X_I<"vssrl", 0b101010, uimm5>;
-defm VSSRA_V : VALU_IV_V_X_I<"vssra", 0b101011, uimm5>;
+defm VSSRL_V : VSSHF_IV_V_X_I<"vssrl", 0b101010, uimm5>;
+defm VSSRA_V : VSSHF_IV_V_X_I<"vssra", 0b101011, uimm5>;
// Vector Narrowing Fixed-Point Clip Instructions
let Constraints = "@earlyclobber $vd" in {
-defm VNCLIPU_W : VALU_IV_V_X_I<"vnclipu", 0b101110, uimm5, "w">;
-defm VNCLIP_W : VALU_IV_V_X_I<"vnclip", 0b101111, uimm5, "w">;
+defm VNCLIPU_W : VNCLP_IV_V_X_I<"vnclipu", 0b101110, uimm5, "w">;
+defm VNCLIP_W : VNCLP_IV_V_X_I<"vnclip", 0b101111, uimm5, "w">;
} // Constraints = "@earlyclobber $vd"
} // Predicates = [HasStdExtV]
@@ -762,60 +1171,60 @@ defm VFRSUB_V : VALU_FV_F<"vfrsub", 0b100111>;
// Vector Widening Floating-Point Add/Subtract Instructions
let Constraints = "@earlyclobber $vd" in {
let RVVConstraint = WidenV in {
-defm VFWADD_V : VALU_FV_V_F<"vfwadd", 0b110000>;
-defm VFWSUB_V : VALU_FV_V_F<"vfwsub", 0b110010>;
+defm VFWADD_V : VWALU_FV_V_F<"vfwadd", 0b110000>;
+defm VFWSUB_V : VWALU_FV_V_F<"vfwsub", 0b110010>;
} // RVVConstraint = WidenV
// Set earlyclobber for following instructions for second and mask operands.
// This has the downside that the earlyclobber constraint is too coarse and
// will impose unnecessary restrictions by not allowing the destination to
// overlap with the first (wide) operand.
let RVVConstraint = WidenW in {
-defm VFWADD_W : VALU_FV_V_F<"vfwadd", 0b110100, "w">;
-defm VFWSUB_W : VALU_FV_V_F<"vfwsub", 0b110110, "w">;
+defm VFWADD_W : VWALU_FV_V_F<"vfwadd", 0b110100, "w">;
+defm VFWSUB_W : VWALU_FV_V_F<"vfwsub", 0b110110, "w">;
} // RVVConstraint = WidenW
} // Constraints = "@earlyclobber $vd"
// Vector Single-Width Floating-Point Multiply/Divide Instructions
-defm VFMUL_V : VALU_FV_V_F<"vfmul", 0b100100>;
-defm VFDIV_V : VALU_FV_V_F<"vfdiv", 0b100000>;
-defm VFRDIV_V : VALU_FV_F<"vfrdiv", 0b100001>;
+defm VFMUL_V : VMUL_FV_V_F<"vfmul", 0b100100>;
+defm VFDIV_V : VDIV_FV_V_F<"vfdiv", 0b100000>;
+defm VFRDIV_V : VRDIV_FV_F<"vfrdiv", 0b100001>;
// Vector Widening Floating-Point Multiply
let Constraints = "@earlyclobber $vd", RVVConstraint = WidenV in {
-defm VFWMUL_V : VALU_FV_V_F<"vfwmul", 0b111000>;
+defm VFWMUL_V : VWMUL_FV_V_F<"vfwmul", 0b111000>;
} // Constraints = "@earlyclobber $vd", RVVConstraint = WidenV
// Vector Single-Width Floating-Point Fused Multiply-Add Instructions
-defm VFMACC_V : VALUr_FV_V_F<"vfmacc", 0b101100>;
-defm VFNMACC_V : VALUr_FV_V_F<"vfnmacc", 0b101101>;
-defm VFMSAC_V : VALUr_FV_V_F<"vfmsac", 0b101110>;
-defm VFNMSAC_V : VALUr_FV_V_F<"vfnmsac", 0b101111>;
-defm VFMADD_V : VALUr_FV_V_F<"vfmadd", 0b101000>;
-defm VFNMADD_V : VALUr_FV_V_F<"vfnmadd", 0b101001>;
-defm VFMSUB_V : VALUr_FV_V_F<"vfmsub", 0b101010>;
-defm VFNMSUB_V : VALUr_FV_V_F<"vfnmsub", 0b101011>;
+defm VFMACC_V : VMAC_FV_V_F<"vfmacc", 0b101100>;
+defm VFNMACC_V : VMAC_FV_V_F<"vfnmacc", 0b101101>;
+defm VFMSAC_V : VMAC_FV_V_F<"vfmsac", 0b101110>;
+defm VFNMSAC_V : VMAC_FV_V_F<"vfnmsac", 0b101111>;
+defm VFMADD_V : VMAC_FV_V_F<"vfmadd", 0b101000>;
+defm VFNMADD_V : VMAC_FV_V_F<"vfnmadd", 0b101001>;
+defm VFMSUB_V : VMAC_FV_V_F<"vfmsub", 0b101010>;
+defm VFNMSUB_V : VMAC_FV_V_F<"vfnmsub", 0b101011>;
// Vector Widening Floating-Point Fused Multiply-Add Instructions
let Constraints = "@earlyclobber $vd", RVVConstraint = WidenV in {
-defm VFWMACC_V : VALUr_FV_V_F<"vfwmacc", 0b111100>;
-defm VFWNMACC_V : VALUr_FV_V_F<"vfwnmacc", 0b111101>;
-defm VFWMSAC_V : VALUr_FV_V_F<"vfwmsac", 0b111110>;
-defm VFWNMSAC_V : VALUr_FV_V_F<"vfwnmsac", 0b111111>;
+defm VFWMACC_V : VWMAC_FV_V_F<"vfwmacc", 0b111100>;
+defm VFWNMACC_V : VWMAC_FV_V_F<"vfwnmacc", 0b111101>;
+defm VFWMSAC_V : VWMAC_FV_V_F<"vfwmsac", 0b111110>;
+defm VFWNMSAC_V : VWMAC_FV_V_F<"vfwnmsac", 0b111111>;
} // Constraints = "@earlyclobber $vd", RVVConstraint = WidenV
// Vector Floating-Point Square-Root Instruction
-defm VFSQRT_V : VALU_FV_VS2<"vfsqrt.v", 0b010011, 0b00000>;
-defm VFRSQRT7_V : VALU_FV_VS2<"vfrsqrt7.v", 0b010011, 0b00100>;
-defm VFREC7_V : VALU_FV_VS2<"vfrec7.v", 0b010011, 0b00101>;
+defm VFSQRT_V : VSQR_FV_VS2<"vfsqrt.v", 0b010011, 0b00000>;
+defm VFRSQRT7_V : VRCP_FV_VS2<"vfrsqrt7.v", 0b010011, 0b00100>;
+defm VFREC7_V : VRCP_FV_VS2<"vfrec7.v", 0b010011, 0b00101>;
// Vector Floating-Point MIN/MAX Instructions
-defm VFMIN_V : VALU_FV_V_F<"vfmin", 0b000100>;
-defm VFMAX_V : VALU_FV_V_F<"vfmax", 0b000110>;
+defm VFMIN_V : VCMP_FV_V_F<"vfmin", 0b000100>;
+defm VFMAX_V : VCMP_FV_V_F<"vfmax", 0b000110>;
// Vector Floating-Point Sign-Injection Instructions
-defm VFSGNJ_V : VALU_FV_V_F<"vfsgnj", 0b001000>;
-defm VFSGNJN_V : VALU_FV_V_F<"vfsgnjn", 0b001001>;
-defm VFSGNJX_V : VALU_FV_V_F<"vfsgnjx", 0b001010>;
+defm VFSGNJ_V : VSGNJ_FV_V_F<"vfsgnj", 0b001000>;
+defm VFSGNJN_V : VSGNJ_FV_V_F<"vfsgnjn", 0b001001>;
+defm VFSGNJX_V : VSGNJ_FV_V_F<"vfsgnjx", 0b001010>;
def : InstAlias<"vfneg.v $vd, $vs$vm",
(VFSGNJN_VV VR:$vd, VR:$vs, VR:$vs, VMaskOp:$vm)>;
@@ -824,12 +1233,12 @@ def : InstAlias<"vfabs.v $vd, $vs$vm",
// Vector Floating-Point Compare Instructions
let RVVConstraint = NoConstraint in {
-defm VMFEQ_V : VALU_FV_V_F<"vmfeq", 0b011000>;
-defm VMFNE_V : VALU_FV_V_F<"vmfne", 0b011100>;
-defm VMFLT_V : VALU_FV_V_F<"vmflt", 0b011011>;
-defm VMFLE_V : VALU_FV_V_F<"vmfle", 0b011001>;
-defm VMFGT_V : VALU_FV_F<"vmfgt", 0b011101>;
-defm VMFGE_V : VALU_FV_F<"vmfge", 0b011111>;
+defm VMFEQ_V : VCMP_FV_V_F<"vmfeq", 0b011000>;
+defm VMFNE_V : VCMP_FV_V_F<"vmfne", 0b011100>;
+defm VMFLT_V : VCMP_FV_V_F<"vmflt", 0b011011>;
+defm VMFLE_V : VCMP_FV_V_F<"vmfle", 0b011001>;
+defm VMFGT_V : VCMP_FV_F<"vmfgt", 0b011101>;
+defm VMFGE_V : VCMP_FV_F<"vmfge", 0b011111>;
} // RVVConstraint = NoConstraint
def : InstAlias<"vmfgt.vv $vd, $va, $vb$vm",
@@ -838,68 +1247,70 @@ def : InstAlias<"vmfge.vv $vd, $va, $vb$vm",
(VMFLE_VV VR:$vd, VR:$vb, VR:$va, VMaskOp:$vm), 0>;
// Vector Floating-Point Classify Instruction
-defm VFCLASS_V : VALU_FV_VS2<"vfclass.v", 0b010011, 0b10000>;
+defm VFCLASS_V : VCLS_FV_VS2<"vfclass.v", 0b010011, 0b10000>;
let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in {
+
// Vector Floating-Point Merge Instruction
+let vm = 0 in
def VFMERGE_VFM : RVInstVX<0b010111, OPFVF, (outs VR:$vd),
(ins VR:$vs2, FPR32:$rs1, VMV0:$v0),
- "vfmerge.vfm", "$vd, $vs2, $rs1, v0"> {
- let vm = 0;
-}
+ "vfmerge.vfm", "$vd, $vs2, $rs1, v0">,
+ Sched<[WriteVFMergeV, ReadVFMergeV, ReadVFMergeF, ReadVMask]>;
// Vector Floating-Point Move Instruction
let RVVConstraint = NoConstraint in
+let vm = 1, vs2 = 0 in
def VFMV_V_F : RVInstVX<0b010111, OPFVF, (outs VR:$vd),
- (ins FPR32:$rs1), "vfmv.v.f", "$vd, $rs1"> {
- let vs2 = 0;
- let vm = 1;
-}
+ (ins FPR32:$rs1), "vfmv.v.f", "$vd, $rs1">,
+ Sched<[WriteVFMovV, ReadVFMovF]>;
+
} // hasSideEffects = 0, mayLoad = 0, mayStore = 0
// Single-Width Floating-Point/Integer Type-Convert Instructions
-defm VFCVT_XU_F_V : VALU_FV_VS2<"vfcvt.xu.f.v", 0b010010, 0b00000>;
-defm VFCVT_X_F_V : VALU_FV_VS2<"vfcvt.x.f.v", 0b010010, 0b00001>;
-defm VFCVT_RTZ_XU_F_V : VALU_FV_VS2<"vfcvt.rtz.xu.f.v", 0b010010, 0b00110>;
-defm VFCVT_RTZ_X_F_V : VALU_FV_VS2<"vfcvt.rtz.x.f.v", 0b010010, 0b00111>;
-defm VFCVT_F_XU_V : VALU_FV_VS2<"vfcvt.f.xu.v", 0b010010, 0b00010>;
-defm VFCVT_F_X_V : VALU_FV_VS2<"vfcvt.f.x.v", 0b010010, 0b00011>;
+defm VFCVT_XU_F_V : VCVTI_FV_VS2<"vfcvt.xu.f.v", 0b010010, 0b00000>;
+defm VFCVT_X_F_V : VCVTI_FV_VS2<"vfcvt.x.f.v", 0b010010, 0b00001>;
+defm VFCVT_RTZ_XU_F_V : VCVTI_FV_VS2<"vfcvt.rtz.xu.f.v", 0b010010, 0b00110>;
+defm VFCVT_RTZ_X_F_V : VCVTI_FV_VS2<"vfcvt.rtz.x.f.v", 0b010010, 0b00111>;
+defm VFCVT_F_XU_V : VCVTF_IV_VS2<"vfcvt.f.xu.v", 0b010010, 0b00010>;
+defm VFCVT_F_X_V : VCVTF_IV_VS2<"vfcvt.f.x.v", 0b010010, 0b00011>;
// Widening Floating-Point/Integer Type-Convert Instructions
let Constraints = "@earlyclobber $vd", RVVConstraint = WidenCvt in {
-defm VFWCVT_XU_F_V : VALU_FV_VS2<"vfwcvt.xu.f.v", 0b010010, 0b01000>;
-defm VFWCVT_X_F_V : VALU_FV_VS2<"vfwcvt.x.f.v", 0b010010, 0b01001>;
-defm VFWCVT_RTZ_XU_F_V : VALU_FV_VS2<"vfwcvt.rtz.xu.f.v", 0b010010, 0b01110>;
-defm VFWCVT_RTZ_X_F_V : VALU_FV_VS2<"vfwcvt.rtz.x.f.v", 0b010010, 0b01111>;
-defm VFWCVT_F_XU_V : VALU_FV_VS2<"vfwcvt.f.xu.v", 0b010010, 0b01010>;
-defm VFWCVT_F_X_V : VALU_FV_VS2<"vfwcvt.f.x.v", 0b010010, 0b01011>;
-defm VFWCVT_F_F_V : VALU_FV_VS2<"vfwcvt.f.f.v", 0b010010, 0b01100>;
+defm VFWCVT_XU_F_V : VWCVTI_FV_VS2<"vfwcvt.xu.f.v", 0b010010, 0b01000>;
+defm VFWCVT_X_F_V : VWCVTI_FV_VS2<"vfwcvt.x.f.v", 0b010010, 0b01001>;
+defm VFWCVT_RTZ_XU_F_V : VWCVTI_FV_VS2<"vfwcvt.rtz.xu.f.v", 0b010010, 0b01110>;
+defm VFWCVT_RTZ_X_F_V : VWCVTI_FV_VS2<"vfwcvt.rtz.x.f.v", 0b010010, 0b01111>;
+defm VFWCVT_F_XU_V : VWCVTF_IV_VS2<"vfwcvt.f.xu.v", 0b010010, 0b01010>;
+defm VFWCVT_F_X_V : VWCVTF_IV_VS2<"vfwcvt.f.x.v", 0b010010, 0b01011>;
+defm VFWCVT_F_F_V : VWCVTF_FV_VS2<"vfwcvt.f.f.v", 0b010010, 0b01100>;
} // Constraints = "@earlyclobber $vd", RVVConstraint = WidenCvt
// Narrowing Floating-Point/Integer Type-Convert Instructions
let Constraints = "@earlyclobber $vd" in {
-defm VFNCVT_XU_F_W : VALU_FV_VS2<"vfncvt.xu.f.w", 0b010010, 0b10000>;
-defm VFNCVT_X_F_W : VALU_FV_VS2<"vfncvt.x.f.w", 0b010010, 0b10001>;
-defm VFNCVT_RTZ_XU_F_W : VALU_FV_VS2<"vfncvt.rtz.xu.f.w", 0b010010, 0b10110>;
-defm VFNCVT_RTZ_X_F_W : VALU_FV_VS2<"vfncvt.rtz.x.f.w", 0b010010, 0b10111>;
-defm VFNCVT_F_XU_W : VALU_FV_VS2<"vfncvt.f.xu.w", 0b010010, 0b10010>;
-defm VFNCVT_F_X_W : VALU_FV_VS2<"vfncvt.f.x.w", 0b010010, 0b10011>;
-defm VFNCVT_F_F_W : VALU_FV_VS2<"vfncvt.f.f.w", 0b010010, 0b10100>;
-defm VFNCVT_ROD_F_F_W : VALU_FV_VS2<"vfncvt.rod.f.f.w", 0b010010, 0b10101>;
+defm VFNCVT_XU_F_W : VNCVTI_FV_VS2<"vfncvt.xu.f.w", 0b010010, 0b10000>;
+defm VFNCVT_X_F_W : VNCVTI_FV_VS2<"vfncvt.x.f.w", 0b010010, 0b10001>;
+defm VFNCVT_RTZ_XU_F_W : VNCVTI_FV_VS2<"vfncvt.rtz.xu.f.w", 0b010010, 0b10110>;
+defm VFNCVT_RTZ_X_F_W : VNCVTI_FV_VS2<"vfncvt.rtz.x.f.w", 0b010010, 0b10111>;
+defm VFNCVT_F_XU_W : VNCVTF_IV_VS2<"vfncvt.f.xu.w", 0b010010, 0b10010>;
+defm VFNCVT_F_X_W : VNCVTF_IV_VS2<"vfncvt.f.x.w", 0b010010, 0b10011>;
+defm VFNCVT_F_F_W : VNCVTF_FV_VS2<"vfncvt.f.f.w", 0b010010, 0b10100>;
+defm VFNCVT_ROD_F_F_W : VNCVTF_FV_VS2<"vfncvt.rod.f.f.w", 0b010010, 0b10101>;
} // Constraints = "@earlyclobber $vd"
} // Predicates = [HasStdExtV, HasStdExtF]
let Predicates = [HasStdExtV] in {
+
// Vector Single-Width Integer Reduction Instructions
let RVVConstraint = NoConstraint in {
-defm VREDSUM : VALU_MV_V<"vredsum", 0b000000>;
-defm VREDMAXU : VALU_MV_V<"vredmaxu", 0b000110>;
-defm VREDMAX : VALU_MV_V<"vredmax", 0b000111>;
-defm VREDMINU : VALU_MV_V<"vredminu", 0b000100>;
-defm VREDMIN : VALU_MV_V<"vredmin", 0b000101>;
-defm VREDAND : VALU_MV_V<"vredand", 0b000001>;
-defm VREDOR : VALU_MV_V<"vredor", 0b000010>;
-defm VREDXOR : VALU_MV_V<"vredxor", 0b000011>;
+defm VREDSUM : VRED_MV_V<"vredsum", 0b000000>;
+defm VREDMAXU : VRED_MV_V<"vredmaxu", 0b000110>;
+defm VREDMAX : VRED_MV_V<"vredmax", 0b000111>;
+defm VREDMINU : VRED_MV_V<"vredminu", 0b000100>;
+defm VREDMIN : VRED_MV_V<"vredmin", 0b000101>;
+defm VREDAND : VRED_MV_V<"vredand", 0b000001>;
+defm VREDOR : VRED_MV_V<"vredor", 0b000010>;
+defm VREDXOR : VRED_MV_V<"vredxor", 0b000011>;
} // RVVConstraint = NoConstraint
// Vector Widening Integer Reduction Instructions
@@ -908,18 +1319,19 @@ let Constraints = "@earlyclobber $vd", RVVConstraint = NoConstraint in {
// This has the downside that the earlyclobber constraint is too coarse and
// will impose unnecessary restrictions by not allowing the destination to
// overlap with the first (wide) operand.
-defm VWREDSUMU : VALU_IV_V<"vwredsumu", 0b110000>;
-defm VWREDSUM : VALU_IV_V<"vwredsum", 0b110001>;
+defm VWREDSUMU : VWRED_IV_V<"vwredsumu", 0b110000>;
+defm VWREDSUM : VWRED_IV_V<"vwredsum", 0b110001>;
} // Constraints = "@earlyclobber $vd", RVVConstraint = NoConstraint
+
} // Predicates = [HasStdExtV]
let Predicates = [HasStdExtV, HasStdExtF] in {
// Vector Single-Width Floating-Point Reduction Instructions
let RVVConstraint = NoConstraint in {
-defm VFREDOSUM : VALU_FV_V<"vfredosum", 0b000011>;
-defm VFREDSUM : VALU_FV_V<"vfredsum", 0b000001>;
-defm VFREDMAX : VALU_FV_V<"vfredmax", 0b000111>;
-defm VFREDMIN : VALU_FV_V<"vfredmin", 0b000101>;
+defm VFREDOSUM : VREDO_FV_V<"vfredosum", 0b000011>;
+defm VFREDSUM : VRED_FV_V<"vfredsum", 0b000001>;
+defm VFREDMAX : VRED_FV_V<"vfredmax", 0b000111>;
+defm VFREDMIN : VRED_FV_V<"vfredmin", 0b000101>;
} // RVVConstraint = NoConstraint
// Vector Widening Floating-Point Reduction Instructions
@@ -928,22 +1340,22 @@ let Constraints = "@earlyclobber $vd", RVVConstraint = NoConstraint in {
// This has the downside that the earlyclobber constraint is too coarse and
// will impose unnecessary restrictions by not allowing the destination to
// overlap with the first (wide) operand.
-defm VFWREDOSUM : VALU_FV_V<"vfwredosum", 0b110011>;
-defm VFWREDSUM : VALU_FV_V<"vfwredsum", 0b110001>;
+defm VFWREDOSUM : VWREDO_FV_V<"vfwredosum", 0b110011>;
+defm VFWREDSUM : VWRED_FV_V<"vfwredsum", 0b110001>;
} // Constraints = "@earlyclobber $vd", RVVConstraint = NoConstraint
} // Predicates = [HasStdExtV, HasStdExtF]
let Predicates = [HasStdExtV] in {
// Vector Mask-Register Logical Instructions
let RVVConstraint = NoConstraint in {
-defm VMAND_M : VALU_MV_Mask<"vmand", 0b011001, "m">;
-defm VMNAND_M : VALU_MV_Mask<"vmnand", 0b011101, "m">;
-defm VMANDNOT_M : VALU_MV_Mask<"vmandnot", 0b011000, "m">;
-defm VMXOR_M : VALU_MV_Mask<"vmxor", 0b011011, "m">;
-defm VMOR_M : VALU_MV_Mask<"vmor", 0b011010, "m">;
-defm VMNOR_M : VALU_MV_Mask<"vmnor", 0b011110, "m">;
-defm VMORNOT_M : VALU_MV_Mask<"vmornot", 0b011100, "m">;
-defm VMXNOR_M : VALU_MV_Mask<"vmxnor", 0b011111, "m">;
+defm VMAND_M : VMALU_MV_Mask<"vmand", 0b011001, "m">;
+defm VMNAND_M : VMALU_MV_Mask<"vmnand", 0b011101, "m">;
+defm VMANDNOT_M : VMALU_MV_Mask<"vmandnot", 0b011000, "m">;
+defm VMXOR_M : VMALU_MV_Mask<"vmxor", 0b011011, "m">;
+defm VMOR_M : VMALU_MV_Mask<"vmor", 0b011010, "m">;
+defm VMNOR_M : VMALU_MV_Mask<"vmnor", 0b011110, "m">;
+defm VMORNOT_M : VMALU_MV_Mask<"vmornot", 0b011100, "m">;
+defm VMXNOR_M : VMALU_MV_Mask<"vmxnor", 0b011111, "m">;
}
def : InstAlias<"vmmv.m $vd, $vs",
@@ -957,98 +1369,113 @@ def : InstAlias<"vmnot.m $vd, $vs",
let hasSideEffects = 0, mayLoad = 0, mayStore = 0,
RVVConstraint = NoConstraint in {
+
// Vector mask population count vpopc
def VPOPC_M : RVInstV<0b010000, 0b10000, OPMVV, (outs GPR:$vd),
- (ins VR:$vs2, VMaskOp:$vm),
- "vpopc.m", "$vd, $vs2$vm">;
+ (ins VR:$vs2, VMaskOp:$vm),
+ "vpopc.m", "$vd, $vs2$vm">,
+ Sched<[WriteVMPopV, ReadVMPopV, ReadVMask]>;
// vfirst find-first-set mask bit
def VFIRST_M : RVInstV<0b010000, 0b10001, OPMVV, (outs GPR:$vd),
- (ins VR:$vs2, VMaskOp:$vm),
- "vfirst.m", "$vd, $vs2$vm">;
+ (ins VR:$vs2, VMaskOp:$vm),
+ "vfirst.m", "$vd, $vs2$vm">,
+ Sched<[WriteVMFFSV, ReadVMFFSV, ReadVMask]>;
+
} // hasSideEffects = 0, mayLoad = 0, mayStore = 0
let Constraints = "@earlyclobber $vd", RVVConstraint = Iota in {
+
// vmsbf.m set-before-first mask bit
-defm VMSBF_M : VALU_MV_VS2<"vmsbf.m", 0b010100, 0b00001>;
+defm VMSBF_M : VMSFS_MV_V<"vmsbf.m", 0b010100, 0b00001>;
// vmsif.m set-including-first mask bit
-defm VMSIF_M : VALU_MV_VS2<"vmsif.m", 0b010100, 0b00011>;
+defm VMSIF_M : VMSFS_MV_V<"vmsif.m", 0b010100, 0b00011>;
// vmsof.m set-only-first mask bit
-defm VMSOF_M : VALU_MV_VS2<"vmsof.m", 0b010100, 0b00010>;
+defm VMSOF_M : VMSFS_MV_V<"vmsof.m", 0b010100, 0b00010>;
// Vector Iota Instruction
-defm VIOTA_M : VALU_MV_VS2<"viota.m", 0b010100, 0b10000>;
+defm VIOTA_M : VMIOT_MV_V<"viota.m", 0b010100, 0b10000>;
+
} // Constraints = "@earlyclobber $vd", RVVConstraint = Iota
// Vector Element Index Instruction
let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in {
+
+let vs2 = 0 in
def VID_V : RVInstV<0b010100, 0b10001, OPMVV, (outs VR:$vd),
- (ins VMaskOp:$vm), "vid.v", "$vd$vm"> {
- let vs2 = 0;
-}
+ (ins VMaskOp:$vm), "vid.v", "$vd$vm">,
+ Sched<[WriteVMIdxV, ReadVMask]>;
// Integer Scalar Move Instructions
let vm = 1, RVVConstraint = NoConstraint in {
def VMV_X_S : RVInstV<0b010000, 0b00000, OPMVV, (outs GPR:$vd),
- (ins VR:$vs2), "vmv.x.s", "$vd, $vs2">;
+ (ins VR:$vs2), "vmv.x.s", "$vd, $vs2">,
+ Sched<[WriteVIMovVX, ReadVIMovVX]>;
let Constraints = "$vd = $vd_wb" in
def VMV_S_X : RVInstV2<0b010000, 0b00000, OPMVX, (outs VR:$vd_wb),
- (ins VR:$vd, GPR:$rs1), "vmv.s.x", "$vd, $rs1">;
-
+ (ins VR:$vd, GPR:$rs1), "vmv.s.x", "$vd, $rs1">,
+ Sched<[WriteVIMovXV, ReadVIMovXV, ReadVIMovXX]>;
}
+
} // hasSideEffects = 0, mayLoad = 0, mayStore = 0
+
} // Predicates = [HasStdExtV]
let Predicates = [HasStdExtV, HasStdExtF] in {
+
let hasSideEffects = 0, mayLoad = 0, mayStore = 0, vm = 1,
RVVConstraint = NoConstraint in {
// Floating-Point Scalar Move Instructions
def VFMV_F_S : RVInstV<0b010000, 0b00000, OPFVV, (outs FPR32:$vd),
- (ins VR:$vs2), "vfmv.f.s", "$vd, $vs2">;
+ (ins VR:$vs2), "vfmv.f.s", "$vd, $vs2">,
+ Sched<[WriteVFMovVF, ReadVFMovVF]>;
let Constraints = "$vd = $vd_wb" in
def VFMV_S_F : RVInstV2<0b010000, 0b00000, OPFVF, (outs VR:$vd_wb),
- (ins VR:$vd, FPR32:$rs1), "vfmv.s.f", "$vd, $rs1">;
+ (ins VR:$vd, FPR32:$rs1), "vfmv.s.f", "$vd, $rs1">,
+ Sched<[WriteVFMovFV, ReadVFMovFV, ReadVFMovFX]>;
} // hasSideEffects = 0, mayLoad = 0, mayStore = 0, vm = 1
+
} // Predicates = [HasStdExtV, HasStdExtF]
let Predicates = [HasStdExtV] in {
// Vector Slide Instructions
let Constraints = "@earlyclobber $vd", RVVConstraint = SlideUp in {
-defm VSLIDEUP_V : VALU_IV_X_I<"vslideup", 0b001110, uimm5>;
-defm VSLIDE1UP_V : VALU_MV_X<"vslide1up", 0b001110>;
+defm VSLIDEUP_V : VSLD_IV_X_I<"vslideup", 0b001110, uimm5>;
+defm VSLIDE1UP_V : VSLD1_MV_X<"vslide1up", 0b001110>;
} // Constraints = "@earlyclobber $vd", RVVConstraint = SlideUp
-defm VSLIDEDOWN_V : VALU_IV_X_I<"vslidedown", 0b001111, uimm5>;
-defm VSLIDE1DOWN_V : VALU_MV_X<"vslide1down", 0b001111>;
+defm VSLIDEDOWN_V : VSLD_IV_X_I<"vslidedown", 0b001111, uimm5>;
+defm VSLIDE1DOWN_V : VSLD1_MV_X<"vslide1down", 0b001111>;
} // Predicates = [HasStdExtV]
let Predicates = [HasStdExtV, HasStdExtF] in {
let Constraints = "@earlyclobber $vd", RVVConstraint = SlideUp in {
-defm VFSLIDE1UP_V : VALU_FV_F<"vfslide1up", 0b001110>;
+defm VFSLIDE1UP_V : VSLD1_FV_F<"vfslide1up", 0b001110>;
} // Constraints = "@earlyclobber $vd", RVVConstraint = SlideUp
-defm VFSLIDE1DOWN_V : VALU_FV_F<"vfslide1down", 0b001111>;
+defm VFSLIDE1DOWN_V : VSLD1_FV_F<"vfslide1down", 0b001111>;
} // Predicates = [HasStdExtV, HasStdExtF]
let Predicates = [HasStdExtV] in {
// Vector Register Gather Instruction
let Constraints = "@earlyclobber $vd", RVVConstraint = Vrgather in {
-defm VRGATHER_V : VALU_IV_V_X_I<"vrgather", 0b001100, uimm5>;
-def VRGATHEREI16_VV : VALUVV<0b001110, OPIVV, "vrgatherei16.vv">;
+defm VRGATHER_V : VGTR_IV_V_X_I<"vrgather", 0b001100, uimm5>;
+def VRGATHEREI16_VV : VALUVV<0b001110, OPIVV, "vrgatherei16.vv">,
+ Sched<[WriteVGatherV, ReadVGatherV, ReadVGatherV]>;
} // Constraints = "@earlyclobber $vd", RVVConstraint = Vrgather
// Vector Compress Instruction
let Constraints = "@earlyclobber $vd", RVVConstraint = Vcompress in {
-defm VCOMPRESS_V : VALU_MV_Mask<"vcompress", 0b010111>;
+defm VCOMPRESS_V : VCPR_MV_Mask<"vcompress", 0b010111>;
} // Constraints = "@earlyclobber $vd", RVVConstraint = Vcompress
let hasSideEffects = 0, mayLoad = 0, mayStore = 0,
RVVConstraint = NoConstraint in {
-foreach nf = [1, 2, 4, 8] in {
- def VMV#nf#R_V : RVInstV<0b100111, !add(nf, -1), OPIVI, (outs VR:$vd),
- (ins VR:$vs2), "vmv" # nf # "r.v",
- "$vd, $vs2"> {
- let Uses = [];
- let vm = 1;
- }
+foreach n = [1, 2, 4, 8] in {
+ def VMV#n#R_V : RVInstV<0b100111, !add(n, -1), OPIVI, (outs VR:$vd),
+ (ins VR:$vs2), "vmv" # n # "r.v", "$vd, $vs2">,
+ VMVRSched<n> {
+ let Uses = [];
+ let vm = 1;
+}
}
} // hasSideEffects = 0, mayLoad = 0, mayStore = 0
} // Predicates = [HasStdExtV]
diff --git a/llvm/lib/Target/RISCV/RISCVSchedRocket.td b/llvm/lib/Target/RISCV/RISCVSchedRocket.td
index ed26a5026114..14f59152ed42 100644
--- a/llvm/lib/Target/RISCV/RISCVSchedRocket.td
+++ b/llvm/lib/Target/RISCV/RISCVSchedRocket.td
@@ -231,6 +231,9 @@ def : ReadAdvance<ReadFMovI64ToF64, 0>;
def : ReadAdvance<ReadFClass32, 0>;
def : ReadAdvance<ReadFClass64, 0>;
+//===----------------------------------------------------------------------===//
+// Unsupported extensions
+defm : UnsupportedSchedV;
defm : UnsupportedSchedZba;
defm : UnsupportedSchedZbb;
defm : UnsupportedSchedZfh;
diff --git a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
index 314af180aca1..75ca6ca861be 100644
--- a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
+++ b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
@@ -219,6 +219,9 @@ def : ReadAdvance<ReadFMovI64ToF64, 0>;
def : ReadAdvance<ReadFClass32, 0>;
def : ReadAdvance<ReadFClass64, 0>;
+//===----------------------------------------------------------------------===//
+// Unsupported extensions
+defm : UnsupportedSchedV;
defm : UnsupportedSchedZba;
defm : UnsupportedSchedZbb;
defm : UnsupportedSchedZfh;
diff --git a/llvm/lib/Target/RISCV/RISCVSchedule.td b/llvm/lib/Target/RISCV/RISCVSchedule.td
index f31e4af46c1b..4971ca1d4e3e 100644
--- a/llvm/lib/Target/RISCV/RISCVSchedule.td
+++ b/llvm/lib/Target/RISCV/RISCVSchedule.td
@@ -230,3 +230,4 @@ def : ReadAdvance<ReadFSqrt16, 0>;
// Include the scheduler resources for other instruction extensions.
include "RISCVScheduleB.td"
+include "RISCVScheduleV.td"
diff --git a/llvm/lib/Target/RISCV/RISCVScheduleV.td b/llvm/lib/Target/RISCV/RISCVScheduleV.td
new file mode 100644
index 000000000000..43af1802d706
--- /dev/null
+++ b/llvm/lib/Target/RISCV/RISCVScheduleV.td
@@ -0,0 +1,820 @@
+//===-- RISCVScheduleV.td - RISCV Scheduling Definitions V -*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+/// Define scheduler resources associated with def operands.
+
+// 7. Vector Loads and Stores
+// 7.4. Vector Unit-Stride Instructions
+def WriteVLDE8 : SchedWrite;
+def WriteVLDE16 : SchedWrite;
+def WriteVLDE32 : SchedWrite;
+def WriteVLDE64 : SchedWrite;
+def WriteVSTE8 : SchedWrite;
+def WriteVSTE16 : SchedWrite;
+def WriteVSTE32 : SchedWrite;
+def WriteVSTE64 : SchedWrite;
+// 7.4.1. Vector Unit-Strided Mask
+def WriteVLDM : SchedWrite;
+def WriteVSTM : SchedWrite;
+// 7.5. Vector Strided Instructions
+def WriteVLDS8 : SchedWrite;
+def WriteVLDS16 : SchedWrite;
+def WriteVLDS32 : SchedWrite;
+def WriteVLDS64 : SchedWrite;
+def WriteVSTS8 : SchedWrite;
+def WriteVSTS16 : SchedWrite;
+def WriteVSTS32 : SchedWrite;
+def WriteVSTS64 : SchedWrite;
+// 7.6. Vector Indexed Instructions
+def WriteVLDUX8 : SchedWrite;
+def WriteVLDUX16 : SchedWrite;
+def WriteVLDUX32 : SchedWrite;
+def WriteVLDUX64 : SchedWrite;
+def WriteVLDOX8 : SchedWrite;
+def WriteVLDOX16 : SchedWrite;
+def WriteVLDOX32 : SchedWrite;
+def WriteVLDOX64 : SchedWrite;
+def WriteVSTUX8 : SchedWrite;
+def WriteVSTUX16 : SchedWrite;
+def WriteVSTUX32 : SchedWrite;
+def WriteVSTUX64 : SchedWrite;
+def WriteVSTOX8 : SchedWrite;
+def WriteVSTOX16 : SchedWrite;
+def WriteVSTOX32 : SchedWrite;
+def WriteVSTOX64 : SchedWrite;
+// 7.7. Vector Unit-stride Fault-Only-First Loads
+def WriteVLDFF8 : SchedWrite;
+def WriteVLDFF16 : SchedWrite;
+def WriteVLDFF32 : SchedWrite;
+def WriteVLDFF64 : SchedWrite;
+// 7.9. Vector Whole Register Instructions
+def WriteVLD1R8 : SchedWrite;
+def WriteVLD1R16 : SchedWrite;
+def WriteVLD1R32 : SchedWrite;
+def WriteVLD1R64 : SchedWrite;
+def WriteVLD2R8 : SchedWrite;
+def WriteVLD2R16 : SchedWrite;
+def WriteVLD2R32 : SchedWrite;
+def WriteVLD2R64 : SchedWrite;
+def WriteVLD4R8 : SchedWrite;
+def WriteVLD4R16 : SchedWrite;
+def WriteVLD4R32 : SchedWrite;
+def WriteVLD4R64 : SchedWrite;
+def WriteVLD8R8 : SchedWrite;
+def WriteVLD8R16 : SchedWrite;
+def WriteVLD8R32 : SchedWrite;
+def WriteVLD8R64 : SchedWrite;
+def WriteVST1R : SchedWrite;
+def WriteVST2R : SchedWrite;
+def WriteVST4R : SchedWrite;
+def WriteVST8R : SchedWrite;
+
+// 11. Vector Integer Arithmetic Instructions
+// 11.1. Vector Single-Width Integer Add and Subtract
+// 11.5. Vector Bitwise Logical Instructions
+def WriteVIALUV : SchedWrite;
+def WriteVIALUX : SchedWrite;
+def WriteVIALUI : SchedWrite;
+// 11.2. Vector Widening Integer Add/Subtract
+def WriteVIWALUV : SchedWrite;
+def WriteVIWALUX : SchedWrite;
+def WriteVIWALUI : SchedWrite;
+// 11.3. Vector Integer Extension
+def WriteVExtV : SchedWrite;
+// 11.4. Vector Integer Arithmetic with Carry or Borrow Instructions
+def WriteVICALUV : SchedWrite;
+def WriteVICALUX : SchedWrite;
+def WriteVICALUI : SchedWrite;
+// 11.6. Vector Single-Width Bit Shift Instructions
+def WriteVShiftV : SchedWrite;
+def WriteVShiftX : SchedWrite;
+def WriteVShiftI : SchedWrite;
+// 11.7. Vector Narrowing Integer Right Shift Instructions
+def WriteVNShiftV : SchedWrite;
+def WriteVNShiftX : SchedWrite;
+def WriteVNShiftI : SchedWrite;
+// 11.8. Vector Integer Comparison Instructions
+// 11.9. Vector Integer Min/Max Instructions
+def WriteVICmpV : SchedWrite;
+def WriteVICmpX : SchedWrite;
+def WriteVICmpI : SchedWrite;
+// 11.10. Vector Single-Width Integer Multiply Instructions
+def WriteVIMulV : SchedWrite;
+def WriteVIMulX : SchedWrite;
+// 11.11. Vector Integer Divide Instructions
+def WriteVIDivV : SchedWrite;
+def WriteVIDivX : SchedWrite;
+// 11.12. Vector Widening Integer Multiply Instructions
+def WriteVIWMulV : SchedWrite;
+def WriteVIWMulX : SchedWrite;
+// 11.13. Vector Single-Width Integer Multiply-Add Instructions
+def WriteVIMulAddV : SchedWrite;
+def WriteVIMulAddX : SchedWrite;
+// 11.14. Vector Widening Integer Multiply-Add Instructions
+def WriteVIWMulAddV : SchedWrite;
+def WriteVIWMulAddX : SchedWrite;
+// 11.15. Vector Integer Merge Instructions
+def WriteVIMergeV : SchedWrite;
+def WriteVIMergeX : SchedWrite;
+def WriteVIMergeI : SchedWrite;
+// 11.16. Vector Integer Move Instructions
+def WriteVIMovV : SchedWrite;
+def WriteVIMovX : SchedWrite;
+def WriteVIMovI : SchedWrite;
+
+// 12. Vector Fixed-Point Arithmetic Instructions
+// 12.1. Vector Single-Width Saturating Add and Subtract
+def WriteVSALUV : SchedWrite;
+def WriteVSALUX : SchedWrite;
+def WriteVSALUI : SchedWrite;
+// 12.2. Vector Single-Width Averaging Add and Subtract
+def WriteVAALUV : SchedWrite;
+def WriteVAALUX : SchedWrite;
+// 12.3. Vector Single-Width Fractional Multiply with Rounding and Saturation
+def WriteVSMulV : SchedWrite;
+def WriteVSMulX : SchedWrite;
+// 12.4. Vector Single-Width Scaling Shift Instructions
+def WriteVSShiftV : SchedWrite;
+def WriteVSShiftX : SchedWrite;
+def WriteVSShiftI : SchedWrite;
+// 12.5. Vector Narrowing Fixed-Point Clip Instructions
+def WriteVNClipV : SchedWrite;
+def WriteVNClipX : SchedWrite;
+def WriteVNClipI : SchedWrite;
+
+// 13. Vector Floating-Point Instructions
+// 13.2. Vector Single-Width Floating-Point Add/Subtract Instructions
+def WriteVFALUV : SchedWrite;
+def WriteVFALUF : SchedWrite;
+// 13.3. Vector Widening Floating-Point Add/Subtract Instructions
+def WriteVFWALUV : SchedWrite;
+def WriteVFWALUF : SchedWrite;
+// 13.4. Vector Single-Width Floating-Point Multiply/Divide Instructions
+def WriteVFMulV : SchedWrite;
+def WriteVFMulF : SchedWrite;
+def WriteVFDivV : SchedWrite;
+def WriteVFDivF : SchedWrite;
+// 13.5. Vector Widening Floating-Point Multiply
+def WriteVFWMulV : SchedWrite;
+def WriteVFWMulF : SchedWrite;
+// 13.6. Vector Single-Width Floating-Point Fused Multiply-Add Instructions
+def WriteVFMulAddV : SchedWrite;
+def WriteVFMulAddF : SchedWrite;
+// 13.7. Vector Widening Floating-Point Fused Multiply-Add Instructions
+def WriteVFWMulAddV : SchedWrite;
+def WriteVFWMulAddF : SchedWrite;
+// 13.8. Vector Floating-Point Square-Root Instruction
+def WriteVFSqrtV : SchedWrite;
+// 13.9. Vector Floating-Point Reciprocal Square-Root Estimate Instruction
+// 13.10. Vector Floating-Point Reciprocal Estimate Instruction
+def WriteVFRecpV : SchedWrite;
+// 13.11. Vector Floating-Point MIN/MAX Instructions
+// 13.13. Vector Floating-Point Compare Instructions
+def WriteVFCmpV : SchedWrite;
+def WriteVFCmpF : SchedWrite;
+// 13.12. Vector Floating-Point Sign-Injection Instructions
+def WriteVFSgnjV : SchedWrite;
+def WriteVFSgnjF : SchedWrite;
+// 13.14. Vector Floating-Point Classify Instruction
+def WriteVFClassV : SchedWrite;
+// 13.15. Vector Floating-Point Merge Instruction
+def WriteVFMergeV : SchedWrite;
+// 13.16. Vector Floating-Point Move Instruction
+def WriteVFMovV : SchedWrite;
+// 13.17. Single-Width Floating-Point/Integer Type-Convert Instructions
+def WriteVFCvtIToFV : SchedWrite;
+def WriteVFCvtFToIV : SchedWrite;
+def WriteVFCvtFToFV : SchedWrite;
+// 13.18. Widening Floating-Point/Integer Type-Convert Instructions
+def WriteVFWCvtIToFV : SchedWrite;
+def WriteVFWCvtFToIV : SchedWrite;
+def WriteVFWCvtFToFV : SchedWrite;
+// 13.19. Narrowing Floating-Point/Integer Type-Convert Instructions
+def WriteVFNCvtIToFV : SchedWrite;
+def WriteVFNCvtFToIV : SchedWrite;
+def WriteVFNCvtFToFV : SchedWrite;
+
+// 14. Vector Reduction Operations
+// 14.1. Vector Single-Width Integer Reduction Instructions
+def WriteVIRedV : SchedWrite;
+// 14.2. Vector Widening Integer Reduction Instructions
+def WriteVIWRedV : SchedWrite;
+// 14.3. Vector Single-Width Floating-Point Reduction Instructions
+def WriteVFRedV : SchedWrite;
+def WriteVFRedOV : SchedWrite;
+// 14.4. Vector Widening Floating-Point Reduction Instructions
+def WriteVFWRedV : SchedWrite;
+def WriteVFWRedOV : SchedWrite;
+
+// 15. Vector Mask Instructions
+// 15.1. Vector Mask-Register Logical Instructions
+def WriteVMALUV : SchedWrite;
+// 15.2. Vector Mask Population Count
+def WriteVMPopV : SchedWrite;
+// 15.3. Vector Find-First-Set Mask Bit
+def WriteVMFFSV : SchedWrite;
+// 15.4. Vector Set-Before-First Mask Bit
+// 15.5. Vector Set-Including-First Mask Bit
+// 15.6. Vector Set-only-First Mask Bit
+def WriteVMSFSV : SchedWrite;
+// 15.8. Vector Iota Instruction
+def WriteVMIotV : SchedWrite;
+// 15.9. Vector Element Index Instruction
+def WriteVMIdxV : SchedWrite;
+
+// 16. Vector Permutation Instructions
+// 16.1. Integer Scalar Move Instructions
+def WriteVIMovVX : SchedWrite;
+def WriteVIMovXV : SchedWrite;
+// 16.2. Floating-Point Scalar Move Instructions
+def WriteVFMovVF : SchedWrite;
+def WriteVFMovFV : SchedWrite;
+// 16.3. Vector Slide Instructions
+def WriteVISlideX : SchedWrite;
+def WriteVISlideI : SchedWrite;
+def WriteVISlide1X : SchedWrite;
+def WriteVFSlide1F : SchedWrite;
+// 16.4. Vector Register Gather Instructions
+def WriteVGatherV : SchedWrite;
+def WriteVGatherX : SchedWrite;
+def WriteVGatherI : SchedWrite;
+// 16.5. Vector Compress Instruction
+def WriteVCompressV : SchedWrite;
+// 16.6. Whole Vector Register Move
+def WriteVMov1V : SchedWrite;
+def WriteVMov2V : SchedWrite;
+def WriteVMov4V : SchedWrite;
+def WriteVMov8V : SchedWrite;
+
+//===----------------------------------------------------------------------===//
+/// Define scheduler resources associated with use operands.
+
+// 7. Vector Loads and Stores
+def ReadVLDX : SchedRead;
+def ReadVSTX : SchedRead;
+// 7.4. Vector Unit-Stride Instructions
+def ReadVSTE8V : SchedRead;
+def ReadVSTE16V : SchedRead;
+def ReadVSTE32V : SchedRead;
+def ReadVSTE64V : SchedRead;
+// 7.4.1. Vector Unit-Strided Mask
+def ReadVSTM : SchedRead;
+// 7.5. Vector Strided Instructions
+def ReadVLDSX : SchedRead;
+def ReadVSTSX : SchedRead;
+def ReadVSTS8V : SchedRead;
+def ReadVSTS16V : SchedRead;
+def ReadVSTS32V : SchedRead;
+def ReadVSTS64V : SchedRead;
+// 7.6. Vector Indexed Instructions
+def ReadVLDUXV : SchedRead;
+def ReadVLDOXV : SchedRead;
+def ReadVSTUX8 : SchedRead;
+def ReadVSTUX16 : SchedRead;
+def ReadVSTUX32 : SchedRead;
+def ReadVSTUX64 : SchedRead;
+def ReadVSTUXV : SchedRead;
+def ReadVSTUX8V : SchedRead;
+def ReadVSTUX16V : SchedRead;
+def ReadVSTUX32V : SchedRead;
+def ReadVSTUX64V : SchedRead;
+def ReadVSTOX8 : SchedRead;
+def ReadVSTOX16 : SchedRead;
+def ReadVSTOX32 : SchedRead;
+def ReadVSTOX64 : SchedRead;
+def ReadVSTOXV : SchedRead;
+def ReadVSTOX8V : SchedRead;
+def ReadVSTOX16V : SchedRead;
+def ReadVSTOX32V : SchedRead;
+def ReadVSTOX64V : SchedRead;
+// 7.9. Vector Whole Register Instructions
+def ReadVST1R : SchedRead;
+def ReadVST2R : SchedRead;
+def ReadVST4R : SchedRead;
+def ReadVST8R : SchedRead;
+
+// 11. Vector Integer Arithmetic Instructions
+// 11.1. Vector Single-Width Integer Add and Subtract
+// 11.5. Vector Bitwise Logical Instructions
+def ReadVIALUV : SchedRead;
+def ReadVIALUX : SchedRead;
+// 11.2. Vector Widening Integer Add/Subtract
+def ReadVIWALUV : SchedRead;
+def ReadVIWALUX : SchedRead;
+// 11.3. Vector Integer Extension
+def ReadVExtV : SchedRead;
+// 11.4. Vector Integer Arithmetic with Carry or Borrow Instructions
+def ReadVIALUCV : SchedRead;
+def ReadVIALUCX : SchedRead;
+// 11.6. Vector Single-Width Bit Shift Instructions
+def ReadVShiftV : SchedRead;
+def ReadVShiftX : SchedRead;
+// 11.7. Vector Narrowing Integer Right Shift Instructions
+def ReadVNShiftV : SchedRead;
+def ReadVNShiftX : SchedRead;
+// 11.8. Vector Integer Comparison Instructions
+// 11.9. Vector Integer Min/Max Instructions
+def ReadVICmpV : SchedRead;
+def ReadVICmpX : SchedRead;
+// 11.10. Vector Single-Width Integer Multiply Instructions
+def ReadVIMulV : SchedRead;
+def ReadVIMulX : SchedRead;
+// 11.11. Vector Integer Divide Instructions
+def ReadVIDivV : SchedRead;
+def ReadVIDivX : SchedRead;
+// 11.12. Vector Widening Integer Multiply Instructions
+def ReadVIWMulV : SchedRead;
+def ReadVIWMulX : SchedRead;
+// 11.13. Vector Single-Width Integer Multiply-Add Instructions
+def ReadVIMulAddV : SchedRead;
+def ReadVIMulAddX : SchedRead;
+// 11.14. Vector Widening Integer Multiply-Add Instructions
+def ReadVIWMulAddV : SchedRead;
+def ReadVIWMulAddX : SchedRead;
+// 11.15. Vector Integer Merge Instructions
+def ReadVIMergeV : SchedRead;
+def ReadVIMergeX : SchedRead;
+// 11.16. Vector Integer Move Instructions
+def ReadVIMovV : SchedRead;
+def ReadVIMovX : SchedRead;
+
+// 12. Vector Fixed-Point Arithmetic Instructions
+// 12.1. Vector Single-Width Saturating Add and Subtract
+def ReadVSALUV : SchedRead;
+def ReadVSALUX : SchedRead;
+// 12.2. Vector Single-Width Averaging Add and Subtract
+def ReadVAALUV : SchedRead;
+def ReadVAALUX : SchedRead;
+// 12.3. Vector Single-Width Fractional Multiply with Rounding and Saturation
+def ReadVSMulV : SchedRead;
+def ReadVSMulX : SchedRead;
+// 12.4. Vector Single-Width Scaling Shift Instructions
+def ReadVSShiftV : SchedRead;
+def ReadVSShiftX : SchedRead;
+// 12.5. Vector Narrowing Fixed-Point Clip Instructions
+def ReadVNClipV : SchedRead;
+def ReadVNClipX : SchedRead;
+
+// 13. Vector Floating-Point Instructions
+// 13.2. Vector Single-Width Floating-Point Add/Subtract Instructions
+def ReadVFALUV : SchedRead;
+def ReadVFALUF : SchedRead;
+// 13.3. Vector Widening Floating-Point Add/Subtract Instructions
+def ReadVFWALUV : SchedRead;
+def ReadVFWALUF : SchedRead;
+// 13.4. Vector Single-Width Floating-Point Multiply/Divide Instructions
+def ReadVFMulV : SchedRead;
+def ReadVFMulF : SchedRead;
+def ReadVFDivV : SchedRead;
+def ReadVFDivF : SchedRead;
+// 13.5. Vector Widening Floating-Point Multiply
+def ReadVFWMulV : SchedRead;
+def ReadVFWMulF : SchedRead;
+// 13.6. Vector Single-Width Floating-Point Fused Multiply-Add Instructions
+def ReadVFMulAddV : SchedRead;
+def ReadVFMulAddF : SchedRead;
+// 13.7. Vector Widening Floating-Point Fused Multiply-Add Instructions
+def ReadVFWMulAddV : SchedRead;
+def ReadVFWMulAddF : SchedRead;
+// 13.8. Vector Floating-Point Square-Root Instruction
+def ReadVFSqrtV : SchedRead;
+// 13.9. Vector Floating-Point Reciprocal Square-Root Estimate Instruction
+// 13.10. Vector Floating-Point Reciprocal Estimate Instruction
+def ReadVFRecpV : SchedRead;
+// 13.11. Vector Floating-Point MIN/MAX Instructions
+// 13.13. Vector Floating-Point Compare Instructions
+def ReadVFCmpV : SchedRead;
+def ReadVFCmpF : SchedRead;
+// 13.12. Vector Floating-Point Sign-Injection Instructions
+def ReadVFSgnjV : SchedRead;
+def ReadVFSgnjF : SchedRead;
+// 13.14. Vector Floating-Point Classify Instruction
+def ReadVFClassV : SchedRead;
+// 13.15. Vector Floating-Point Merge Instruction
+def ReadVFMergeV : SchedRead;
+def ReadVFMergeF : SchedRead;
+// 13.16. Vector Floating-Point Move Instruction
+def ReadVFMovF : SchedRead;
+// 13.17. Single-Width Floating-Point/Integer Type-Convert Instructions
+def ReadVFCvtIToFV : SchedRead;
+def ReadVFCvtFToIV : SchedRead;
+// 13.18. Widening Floating-Point/Integer Type-Convert Instructions
+def ReadVFWCvtIToFV : SchedRead;
+def ReadVFWCvtFToIV : SchedRead;
+def ReadVFWCvtFToFV : SchedRead;
+// 13.19. Narrowing Floating-Point/Integer Type-Convert Instructions
+def ReadVFNCvtIToFV : SchedRead;
+def ReadVFNCvtFToIV : SchedRead;
+def ReadVFNCvtFToFV : SchedRead;
+
+// 14. Vector Reduction Operations
+// 14.1. Vector Single-Width Integer Reduction Instructions
+def ReadVIRedV : SchedRead;
+def ReadVIRedV0 : SchedRead;
+// 14.2. Vector Widening Integer Reduction Instructions
+def ReadVIWRedV : SchedRead;
+def ReadVIWRedV0 : SchedRead;
+// 14.3. Vector Single-Width Floating-Point Reduction Instructions
+def ReadVFRedV : SchedRead;
+def ReadVFRedV0 : SchedRead;
+def ReadVFRedOV : SchedRead;
+def ReadVFRedOV0 : SchedRead;
+// 14.4. Vector Widening Floating-Point Reduction Instructions
+def ReadVFWRedV : SchedRead;
+def ReadVFWRedV0 : SchedRead;
+def ReadVFWRedOV : SchedRead;
+def ReadVFWRedOV0 : SchedRead;
+
+// 15. Vector Mask Instructions
+// 15.1. Vector Mask-Register Logical Instructions
+def ReadVMALUV : SchedRead;
+// 15.2. Vector Mask Population Count
+def ReadVMPopV : SchedRead;
+// 15.3. Vector Find-First-Set Mask Bit
+def ReadVMFFSV : SchedRead;
+// 15.4. Vector Set-Before-First Mask Bit
+// 15.5. Vector Set-Including-First Mask Bit
+// 15.6. Vector Set-only-First Mask Bit
+def ReadVMSFSV : SchedRead;
+// 15.8. Vector Iota Instruction
+def ReadVMIotV : SchedRead;
+
+// 16. Vector Permutation Instructions
+// 16.1. Integer Scalar Move Instructions
+def ReadVIMovVX : SchedRead;
+def ReadVIMovXV : SchedRead;
+def ReadVIMovXX : SchedRead;
+// 16.2. Floating-Point Scalar Move Instructions
+def ReadVFMovVF : SchedRead;
+def ReadVFMovFV : SchedRead;
+def ReadVFMovFX : SchedRead;
+// 16.3. Vector Slide Instructions
+def ReadVISlideV : SchedRead;
+def ReadVISlideX : SchedRead;
+def ReadVFSlideV : SchedRead;
+def ReadVFSlideF : SchedRead;
+// 16.4. Vector Register Gather Instructions
+def ReadVGatherV : SchedRead;
+def ReadVGatherX : SchedRead;
+// 16.5. Vector Compress Instruction
+def ReadVCompressV : SchedRead;
+// 16.6. Whole Vector Register Move
+def ReadVMov1V : SchedRead;
+def ReadVMov2V : SchedRead;
+def ReadVMov4V : SchedRead;
+def ReadVMov8V : SchedRead;
+
+// Others
+def ReadVMask : SchedRead;
+
+//===----------------------------------------------------------------------===//
+/// Define default scheduler resources for V.
+
+multiclass UnsupportedSchedV {
+let Unsupported = true in {
+
+// 7. Vector Loads and Stores
+def : WriteRes<WriteVLDE8, []>;
+def : WriteRes<WriteVLDE16, []>;
+def : WriteRes<WriteVLDE32, []>;
+def : WriteRes<WriteVLDE64, []>;
+def : WriteRes<WriteVSTE8, []>;
+def : WriteRes<WriteVSTE16, []>;
+def : WriteRes<WriteVSTE32, []>;
+def : WriteRes<WriteVSTE64, []>;
+def : WriteRes<WriteVLDM, []>;
+def : WriteRes<WriteVSTM, []>;
+def : WriteRes<WriteVLDS8, []>;
+def : WriteRes<WriteVLDS16, []>;
+def : WriteRes<WriteVLDS32, []>;
+def : WriteRes<WriteVLDS64, []>;
+def : WriteRes<WriteVSTS8, []>;
+def : WriteRes<WriteVSTS16, []>;
+def : WriteRes<WriteVSTS32, []>;
+def : WriteRes<WriteVSTS64, []>;
+def : WriteRes<WriteVLDUX8, []>;
+def : WriteRes<WriteVLDUX16, []>;
+def : WriteRes<WriteVLDUX32, []>;
+def : WriteRes<WriteVLDUX64, []>;
+def : WriteRes<WriteVLDOX8, []>;
+def : WriteRes<WriteVLDOX16, []>;
+def : WriteRes<WriteVLDOX32, []>;
+def : WriteRes<WriteVLDOX64, []>;
+def : WriteRes<WriteVSTUX8, []>;
+def : WriteRes<WriteVSTUX16, []>;
+def : WriteRes<WriteVSTUX32, []>;
+def : WriteRes<WriteVSTUX64, []>;
+def : WriteRes<WriteVSTOX8, []>;
+def : WriteRes<WriteVSTOX16, []>;
+def : WriteRes<WriteVSTOX32, []>;
+def : WriteRes<WriteVSTOX64, []>;
+def : WriteRes<WriteVLDFF8, []>;
+def : WriteRes<WriteVLDFF16, []>;
+def : WriteRes<WriteVLDFF32, []>;
+def : WriteRes<WriteVLDFF64, []>;
+def : WriteRes<WriteVLD1R8, []>;
+def : WriteRes<WriteVLD1R16, []>;
+def : WriteRes<WriteVLD1R32, []>;
+def : WriteRes<WriteVLD1R64, []>;
+def : WriteRes<WriteVLD2R8, []>;
+def : WriteRes<WriteVLD2R16, []>;
+def : WriteRes<WriteVLD2R32, []>;
+def : WriteRes<WriteVLD2R64, []>;
+def : WriteRes<WriteVLD4R8, []>;
+def : WriteRes<WriteVLD4R16, []>;
+def : WriteRes<WriteVLD4R32, []>;
+def : WriteRes<WriteVLD4R64, []>;
+def : WriteRes<WriteVLD8R8, []>;
+def : WriteRes<WriteVLD8R16, []>;
+def : WriteRes<WriteVLD8R32, []>;
+def : WriteRes<WriteVLD8R64, []>;
+def : WriteRes<WriteVST1R, []>;
+def : WriteRes<WriteVST2R, []>;
+def : WriteRes<WriteVST4R, []>;
+def : WriteRes<WriteVST8R, []>;
+
+// 12. Vector Integer Arithmetic Instructions
+def : WriteRes<WriteVIALUV, []>;
+def : WriteRes<WriteVIALUX, []>;
+def : WriteRes<WriteVIALUI, []>;
+def : WriteRes<WriteVIWALUV, []>;
+def : WriteRes<WriteVIWALUX, []>;
+def : WriteRes<WriteVIWALUI, []>;
+def : WriteRes<WriteVExtV, []>;
+def : WriteRes<WriteVICALUV, []>;
+def : WriteRes<WriteVICALUX, []>;
+def : WriteRes<WriteVICALUI, []>;
+def : WriteRes<WriteVShiftV, []>;
+def : WriteRes<WriteVShiftX, []>;
+def : WriteRes<WriteVShiftI, []>;
+def : WriteRes<WriteVNShiftV, []>;
+def : WriteRes<WriteVNShiftX, []>;
+def : WriteRes<WriteVNShiftI, []>;
+def : WriteRes<WriteVICmpV, []>;
+def : WriteRes<WriteVICmpX, []>;
+def : WriteRes<WriteVICmpI, []>;
+def : WriteRes<WriteVIMulV, []>;
+def : WriteRes<WriteVIMulX, []>;
+def : WriteRes<WriteVIDivV, []>;
+def : WriteRes<WriteVIDivX, []>;
+def : WriteRes<WriteVIWMulV, []>;
+def : WriteRes<WriteVIWMulX, []>;
+def : WriteRes<WriteVIMulAddV, []>;
+def : WriteRes<WriteVIMulAddX, []>;
+def : WriteRes<WriteVIWMulAddV, []>;
+def : WriteRes<WriteVIWMulAddX, []>;
+def : WriteRes<WriteVIMergeV, []>;
+def : WriteRes<WriteVIMergeX, []>;
+def : WriteRes<WriteVIMergeI, []>;
+def : WriteRes<WriteVIMovV, []>;
+def : WriteRes<WriteVIMovX, []>;
+def : WriteRes<WriteVIMovI, []>;
+
+// 13. Vector Fixed-Point Arithmetic Instructions
+def : WriteRes<WriteVSALUV, []>;
+def : WriteRes<WriteVSALUX, []>;
+def : WriteRes<WriteVSALUI, []>;
+def : WriteRes<WriteVAALUV, []>;
+def : WriteRes<WriteVAALUX, []>;
+def : WriteRes<WriteVSMulV, []>;
+def : WriteRes<WriteVSMulX, []>;
+def : WriteRes<WriteVSShiftV, []>;
+def : WriteRes<WriteVSShiftX, []>;
+def : WriteRes<WriteVSShiftI, []>;
+def : WriteRes<WriteVNClipV, []>;
+def : WriteRes<WriteVNClipX, []>;
+def : WriteRes<WriteVNClipI, []>;
+
+// 14. Vector Floating-Point Instructions
+def : WriteRes<WriteVFALUV, []>;
+def : WriteRes<WriteVFALUF, []>;
+def : WriteRes<WriteVFWALUV, []>;
+def : WriteRes<WriteVFWALUF, []>;
+def : WriteRes<WriteVFMulV, []>;
+def : WriteRes<WriteVFMulF, []>;
+def : WriteRes<WriteVFDivV, []>;
+def : WriteRes<WriteVFDivF, []>;
+def : WriteRes<WriteVFWMulV, []>;
+def : WriteRes<WriteVFWMulF, []>;
+def : WriteRes<WriteVFMulAddV, []>;
+def : WriteRes<WriteVFMulAddF, []>;
+def : WriteRes<WriteVFWMulAddV, []>;
+def : WriteRes<WriteVFWMulAddF, []>;
+def : WriteRes<WriteVFSqrtV, []>;
+def : WriteRes<WriteVFRecpV, []>;
+def : WriteRes<WriteVFCmpV, []>;
+def : WriteRes<WriteVFCmpF, []>;
+def : WriteRes<WriteVFSgnjV, []>;
+def : WriteRes<WriteVFSgnjF, []>;
+def : WriteRes<WriteVFClassV, []>;
+def : WriteRes<WriteVFMergeV, []>;
+def : WriteRes<WriteVFMovV, []>;
+def : WriteRes<WriteVFCvtIToFV, []>;
+def : WriteRes<WriteVFCvtFToIV, []>;
+def : WriteRes<WriteVFCvtFToFV, []>;
+def : WriteRes<WriteVFWCvtIToFV, []>;
+def : WriteRes<WriteVFWCvtFToIV, []>;
+def : WriteRes<WriteVFWCvtFToFV, []>;
+def : WriteRes<WriteVFNCvtIToFV, []>;
+def : WriteRes<WriteVFNCvtFToIV, []>;
+def : WriteRes<WriteVFNCvtFToFV, []>;
+
+// 15. Vector Reduction Operations
+def : WriteRes<WriteVIRedV, []>;
+def : WriteRes<WriteVIWRedV, []>;
+def : WriteRes<WriteVFRedV, []>;
+def : WriteRes<WriteVFRedOV, []>;
+def : WriteRes<WriteVFWRedV, []>;
+def : WriteRes<WriteVFWRedOV, []>;
+
+// 16. Vector Mask Instructions
+def : WriteRes<WriteVMALUV, []>;
+def : WriteRes<WriteVMPopV, []>;
+def : WriteRes<WriteVMFFSV, []>;
+def : WriteRes<WriteVMSFSV, []>;
+def : WriteRes<WriteVMIotV, []>;
+def : WriteRes<WriteVMIdxV, []>;
+
+// 17. Vector Permutation Instructions
+def : WriteRes<WriteVIMovVX, []>;
+def : WriteRes<WriteVIMovXV, []>;
+def : WriteRes<WriteVFMovVF, []>;
+def : WriteRes<WriteVFMovFV, []>;
+def : WriteRes<WriteVISlideX, []>;
+def : WriteRes<WriteVISlideI, []>;
+def : WriteRes<WriteVISlide1X, []>;
+def : WriteRes<WriteVFSlide1F, []>;
+def : WriteRes<WriteVGatherV, []>;
+def : WriteRes<WriteVGatherX, []>;
+def : WriteRes<WriteVGatherI, []>;
+def : WriteRes<WriteVCompressV, []>;
+def : WriteRes<WriteVMov1V, []>;
+def : WriteRes<WriteVMov2V, []>;
+def : WriteRes<WriteVMov4V, []>;
+def : WriteRes<WriteVMov8V, []>;
+
+// 7. Vector Loads and Stores
+def : ReadAdvance<ReadVLDX, 0>;
+def : ReadAdvance<ReadVSTX, 0>;
+def : ReadAdvance<ReadVSTE8V, 0>;
+def : ReadAdvance<ReadVSTE16V, 0>;
+def : ReadAdvance<ReadVSTE32V, 0>;
+def : ReadAdvance<ReadVSTE64V, 0>;
+def : ReadAdvance<ReadVSTM, 0>;
+def : ReadAdvance<ReadVLDSX, 0>;
+def : ReadAdvance<ReadVSTSX, 0>;
+def : ReadAdvance<ReadVSTS8V, 0>;
+def : ReadAdvance<ReadVSTS16V, 0>;
+def : ReadAdvance<ReadVSTS32V, 0>;
+def : ReadAdvance<ReadVSTS64V, 0>;
+def : ReadAdvance<ReadVLDUXV, 0>;
+def : ReadAdvance<ReadVLDOXV, 0>;
+def : ReadAdvance<ReadVSTUXV, 0>;
+def : ReadAdvance<ReadVSTUX8, 0>;
+def : ReadAdvance<ReadVSTUX16, 0>;
+def : ReadAdvance<ReadVSTUX32, 0>;
+def : ReadAdvance<ReadVSTUX64, 0>;
+def : ReadAdvance<ReadVSTUX8V, 0>;
+def : ReadAdvance<ReadVSTUX16V, 0>;
+def : ReadAdvance<ReadVSTUX32V, 0>;
+def : ReadAdvance<ReadVSTUX64V, 0>;
+def : ReadAdvance<ReadVSTOX8, 0>;
+def : ReadAdvance<ReadVSTOX16, 0>;
+def : ReadAdvance<ReadVSTOX32, 0>;
+def : ReadAdvance<ReadVSTOX64, 0>;
+def : ReadAdvance<ReadVSTOXV, 0>;
+def : ReadAdvance<ReadVSTOX8V, 0>;
+def : ReadAdvance<ReadVSTOX16V, 0>;
+def : ReadAdvance<ReadVSTOX32V, 0>;
+def : ReadAdvance<ReadVSTOX64V, 0>;
+def : ReadAdvance<ReadVST1R, 0>;
+def : ReadAdvance<ReadVST2R, 0>;
+def : ReadAdvance<ReadVST4R, 0>;
+def : ReadAdvance<ReadVST8R, 0>;
+
+// 12. Vector Integer Arithmetic Instructions
+def : ReadAdvance<ReadVIALUV, 0>;
+def : ReadAdvance<ReadVIALUX, 0>;
+def : ReadAdvance<ReadVIWALUV, 0>;
+def : ReadAdvance<ReadVIWALUX, 0>;
+def : ReadAdvance<ReadVExtV, 0>;
+def : ReadAdvance<ReadVIALUCV, 0>;
+def : ReadAdvance<ReadVIALUCX, 0>;
+def : ReadAdvance<ReadVShiftV, 0>;
+def : ReadAdvance<ReadVShiftX, 0>;
+def : ReadAdvance<ReadVNShiftV, 0>;
+def : ReadAdvance<ReadVNShiftX, 0>;
+def : ReadAdvance<ReadVICmpV, 0>;
+def : ReadAdvance<ReadVICmpX, 0>;
+def : ReadAdvance<ReadVIMulV, 0>;
+def : ReadAdvance<ReadVIMulX, 0>;
+def : ReadAdvance<ReadVIDivV, 0>;
+def : ReadAdvance<ReadVIDivX, 0>;
+def : ReadAdvance<ReadVIWMulV, 0>;
+def : ReadAdvance<ReadVIWMulX, 0>;
+def : ReadAdvance<ReadVIMulAddV, 0>;
+def : ReadAdvance<ReadVIMulAddX, 0>;
+def : ReadAdvance<ReadVIWMulAddV, 0>;
+def : ReadAdvance<ReadVIWMulAddX, 0>;
+def : ReadAdvance<ReadVIMergeV, 0>;
+def : ReadAdvance<ReadVIMergeX, 0>;
+def : ReadAdvance<ReadVIMovV, 0>;
+def : ReadAdvance<ReadVIMovX, 0>;
+
+// 13. Vector Fixed-Point Arithmetic Instructions
+def : ReadAdvance<ReadVSALUV, 0>;
+def : ReadAdvance<ReadVSALUX, 0>;
+def : ReadAdvance<ReadVAALUV, 0>;
+def : ReadAdvance<ReadVAALUX, 0>;
+def : ReadAdvance<ReadVSMulV, 0>;
+def : ReadAdvance<ReadVSMulX, 0>;
+def : ReadAdvance<ReadVSShiftV, 0>;
+def : ReadAdvance<ReadVSShiftX, 0>;
+def : ReadAdvance<ReadVNClipV, 0>;
+def : ReadAdvance<ReadVNClipX, 0>;
+
+// 14. Vector Floating-Point Instructions
+def : ReadAdvance<ReadVFALUV, 0>;
+def : ReadAdvance<ReadVFALUF, 0>;
+def : ReadAdvance<ReadVFWALUV, 0>;
+def : ReadAdvance<ReadVFWALUF, 0>;
+def : ReadAdvance<ReadVFMulV, 0>;
+def : ReadAdvance<ReadVFMulF, 0>;
+def : ReadAdvance<ReadVFDivV, 0>;
+def : ReadAdvance<ReadVFDivF, 0>;
+def : ReadAdvance<ReadVFWMulV, 0>;
+def : ReadAdvance<ReadVFWMulF, 0>;
+def : ReadAdvance<ReadVFMulAddV, 0>;
+def : ReadAdvance<ReadVFMulAddF, 0>;
+def : ReadAdvance<ReadVFWMulAddV, 0>;
+def : ReadAdvance<ReadVFWMulAddF, 0>;
+def : ReadAdvance<ReadVFSqrtV, 0>;
+def : ReadAdvance<ReadVFRecpV, 0>;
+def : ReadAdvance<ReadVFCmpV, 0>;
+def : ReadAdvance<ReadVFCmpF, 0>;
+def : ReadAdvance<ReadVFSgnjV, 0>;
+def : ReadAdvance<ReadVFSgnjF, 0>;
+def : ReadAdvance<ReadVFClassV, 0>;
+def : ReadAdvance<ReadVFMergeV, 0>;
+def : ReadAdvance<ReadVFMergeF, 0>;
+def : ReadAdvance<ReadVFMovF, 0>;
+def : ReadAdvance<ReadVFCvtIToFV, 0>;
+def : ReadAdvance<ReadVFCvtFToIV, 0>;
+def : ReadAdvance<ReadVFWCvtIToFV, 0>;
+def : ReadAdvance<ReadVFWCvtFToIV, 0>;
+def : ReadAdvance<ReadVFWCvtFToFV, 0>;
+def : ReadAdvance<ReadVFNCvtIToFV, 0>;
+def : ReadAdvance<ReadVFNCvtFToIV, 0>;
+def : ReadAdvance<ReadVFNCvtFToFV, 0>;
+
+// 15. Vector Reduction Operations
+def : ReadAdvance<ReadVIRedV, 0>;
+def : ReadAdvance<ReadVIRedV0, 0>;
+def : ReadAdvance<ReadVIWRedV, 0>;
+def : ReadAdvance<ReadVIWRedV0, 0>;
+def : ReadAdvance<ReadVFRedV, 0>;
+def : ReadAdvance<ReadVFRedV0, 0>;
+def : ReadAdvance<ReadVFRedOV, 0>;
+def : ReadAdvance<ReadVFRedOV0, 0>;
+def : ReadAdvance<ReadVFWRedV, 0>;
+def : ReadAdvance<ReadVFWRedV0, 0>;
+def : ReadAdvance<ReadVFWRedOV, 0>;
+def : ReadAdvance<ReadVFWRedOV0, 0>;
+
+// 16. Vector Mask Instructions
+def : ReadAdvance<ReadVMALUV, 0>;
+def : ReadAdvance<ReadVMPopV, 0>;
+def : ReadAdvance<ReadVMFFSV, 0>;
+def : ReadAdvance<ReadVMSFSV, 0>;
+def : ReadAdvance<ReadVMIotV, 0>;
+
+// 17. Vector Permutation Instructions
+def : ReadAdvance<ReadVIMovVX, 0>;
+def : ReadAdvance<ReadVIMovXV, 0>;
+def : ReadAdvance<ReadVIMovXX, 0>;
+def : ReadAdvance<ReadVFMovVF, 0>;
+def : ReadAdvance<ReadVFMovFV, 0>;
+def : ReadAdvance<ReadVFMovFX, 0>;
+def : ReadAdvance<ReadVISlideV, 0>;
+def : ReadAdvance<ReadVISlideX, 0>;
+def : ReadAdvance<ReadVFSlideV, 0>;
+def : ReadAdvance<ReadVFSlideF, 0>;
+def : ReadAdvance<ReadVGatherV, 0>;
+def : ReadAdvance<ReadVGatherX, 0>;
+def : ReadAdvance<ReadVCompressV, 0>;
+def : ReadAdvance<ReadVMov1V, 0>;
+def : ReadAdvance<ReadVMov2V, 0>;
+def : ReadAdvance<ReadVMov4V, 0>;
+def : ReadAdvance<ReadVMov8V, 0>;
+
+// Others
+def : ReadAdvance<ReadVMask, 0>;
+
+} // Unsupported
+} // UnsupportedSchedV
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 3a64b3460030..a69850896436 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -6704,17 +6704,21 @@ static bool getTargetConstantBitsFromNode(SDValue Op, unsigned EltSizeInBits,
if (Op.getOpcode() == X86ISD::SUBV_BROADCAST_LOAD) {
auto *MemIntr = cast<MemIntrinsicSDNode>(Op);
SDValue Ptr = MemIntr->getBasePtr();
+ // The source constant may be larger than the subvector broadcast,
+ // ensure we extract the correct subvector constants.
if (const Constant *Cst = getTargetConstantFromBasePtr(Ptr)) {
Type *CstTy = Cst->getType();
unsigned CstSizeInBits = CstTy->getPrimitiveSizeInBits();
- if (!CstTy->isVectorTy() || (SizeInBits % CstSizeInBits) != 0)
+ unsigned SubVecSizeInBits = MemIntr->getMemoryVT().getStoreSizeInBits();
+ if (!CstTy->isVectorTy() || (CstSizeInBits % SubVecSizeInBits) != 0 ||
+ (SizeInBits % SubVecSizeInBits) != 0)
return false;
- unsigned SubEltSizeInBits = CstTy->getScalarSizeInBits();
- unsigned NumSubElts = CstSizeInBits / SubEltSizeInBits;
- unsigned NumSubVecs = SizeInBits / CstSizeInBits;
+ unsigned CstEltSizeInBits = CstTy->getScalarSizeInBits();
+ unsigned NumSubElts = SubVecSizeInBits / CstEltSizeInBits;
+ unsigned NumSubVecs = SizeInBits / SubVecSizeInBits;
APInt UndefSubElts(NumSubElts, 0);
SmallVector<APInt, 64> SubEltBits(NumSubElts * NumSubVecs,
- APInt(SubEltSizeInBits, 0));
+ APInt(CstEltSizeInBits, 0));
for (unsigned i = 0; i != NumSubElts; ++i) {
if (!CollectConstantBits(Cst->getAggregateElement(i), SubEltBits[i],
UndefSubElts, i))
diff --git a/llvm/lib/Target/X86/X86InstrArithmetic.td b/llvm/lib/Target/X86/X86InstrArithmetic.td
index e83e1e74ff52..ba00e7da81f9 100644
--- a/llvm/lib/Target/X86/X86InstrArithmetic.td
+++ b/llvm/lib/Target/X86/X86InstrArithmetic.td
@@ -708,6 +708,19 @@ class BinOpRM<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
mnemonic, "{$src2, $src1|$src1, $src2}", pattern>,
Sched<[sched.Folded, sched.ReadAfterFold]>;
+// BinOpRM - Instructions like "adc reg, reg, [mem]".
+// There is an implicit register read at the end of the operand sequence.
+class BinOpRM_ImplicitUse<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+ dag outlist, X86FoldableSchedWrite sched, list<dag> pattern>
+ : ITy<opcode, MRMSrcMem, typeinfo, outlist,
+ (ins typeinfo.RegClass:$src1, typeinfo.MemOperand:$src2),
+ mnemonic, "{$src2, $src1|$src1, $src2}", pattern>,
+ Sched<[sched.Folded, sched.ReadAfterFold,
+ // base, scale, index, offset, segment.
+ ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault,
+ // implicit register read.
+ sched.ReadAfterFold]>;
+
// BinOpRM_F - Instructions like "cmp reg, [mem]".
class BinOpRM_F<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
SDNode opnode>
@@ -725,7 +738,7 @@ class BinOpRM_RF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
// BinOpRM_RFF - Instructions like "adc reg, reg, [mem]".
class BinOpRM_RFF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
SDNode opnode>
- : BinOpRM<opcode, mnemonic, typeinfo, (outs typeinfo.RegClass:$dst), WriteADC,
+ : BinOpRM_ImplicitUse<opcode, mnemonic, typeinfo, (outs typeinfo.RegClass:$dst), WriteADC,
[(set typeinfo.RegClass:$dst, EFLAGS,
(opnode typeinfo.RegClass:$src1, (typeinfo.LoadNode addr:$src2),
EFLAGS))]>;
@@ -805,7 +818,11 @@ class BinOpMR_RMW<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
SDNode opnode>
: BinOpMR<opcode, mnemonic, typeinfo,
[(store (opnode (load addr:$dst), typeinfo.RegClass:$src), addr:$dst),
- (implicit EFLAGS)]>, Sched<[WriteALURMW]>;
+ (implicit EFLAGS)]>, Sched<[WriteALURMW,
+ // base, scale, index, offset, segment
+ ReadDefault, ReadDefault, ReadDefault,
+ ReadDefault, ReadDefault,
+ WriteALU.ReadAfterFold]>; // reg
// BinOpMR_RMW_FF - Instructions like "adc [mem], reg".
class BinOpMR_RMW_FF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
@@ -813,7 +830,12 @@ class BinOpMR_RMW_FF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
: BinOpMR<opcode, mnemonic, typeinfo,
[(store (opnode (load addr:$dst), typeinfo.RegClass:$src, EFLAGS),
addr:$dst),
- (implicit EFLAGS)]>, Sched<[WriteADCRMW]>;
+ (implicit EFLAGS)]>, Sched<[WriteADCRMW,
+ // base, scale, index, offset, segment
+ ReadDefault, ReadDefault, ReadDefault,
+ ReadDefault, ReadDefault,
+ WriteALU.ReadAfterFold, // reg
+ WriteALU.ReadAfterFold]>; // EFLAGS
// BinOpMR_F - Instructions like "cmp [mem], reg".
class BinOpMR_F<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp
index 762317425026..91b16ec66ee3 100644
--- a/llvm/lib/Transforms/IPO/Attributor.cpp
+++ b/llvm/lib/Transforms/IPO/Attributor.cpp
@@ -32,6 +32,7 @@
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/NoFolder.h"
#include "llvm/IR/ValueHandle.h"
@@ -250,10 +251,12 @@ Value *AA::getWithType(Value &V, Type &Ty) {
return Constant::getNullValue(&Ty);
if (C->getType()->isPointerTy() && Ty.isPointerTy())
return ConstantExpr::getPointerCast(C, &Ty);
- if (C->getType()->isIntegerTy() && Ty.isIntegerTy())
- return ConstantExpr::getTrunc(C, &Ty, /* OnlyIfReduced */ true);
- if (C->getType()->isFloatingPointTy() && Ty.isFloatingPointTy())
- return ConstantExpr::getFPTrunc(C, &Ty, /* OnlyIfReduced */ true);
+ if (C->getType()->getPrimitiveSizeInBits() >= Ty.getPrimitiveSizeInBits()) {
+ if (C->getType()->isIntegerTy() && Ty.isIntegerTy())
+ return ConstantExpr::getTrunc(C, &Ty, /* OnlyIfReduced */ true);
+ if (C->getType()->isFloatingPointTy() && Ty.isFloatingPointTy())
+ return ConstantExpr::getFPTrunc(C, &Ty, /* OnlyIfReduced */ true);
+ }
}
return nullptr;
}
@@ -1023,7 +1026,7 @@ bool Attributor::checkForAllUses(function_ref<bool(const Use &, bool &)> Pred,
while (!Worklist.empty()) {
const Use *U = Worklist.pop_back_val();
- if (!Visited.insert(U).second)
+ if (isa<PHINode>(U->getUser()) && !Visited.insert(U).second)
continue;
LLVM_DEBUG(dbgs() << "[Attributor] Check use: " << **U << " in "
<< *U->getUser() << "\n");
@@ -1925,49 +1928,85 @@ void Attributor::createShallowWrapper(Function &F) {
NumFnShallowWrappersCreated++;
}
+bool Attributor::isInternalizable(Function &F) {
+ if (F.isDeclaration() || F.hasLocalLinkage() ||
+ GlobalValue::isInterposableLinkage(F.getLinkage()))
+ return false;
+ return true;
+}
+
Function *Attributor::internalizeFunction(Function &F, bool Force) {
if (!AllowDeepWrapper && !Force)
return nullptr;
- if (F.isDeclaration() || F.hasLocalLinkage() ||
- GlobalValue::isInterposableLinkage(F.getLinkage()))
+ if (!isInternalizable(F))
return nullptr;
- Module &M = *F.getParent();
- FunctionType *FnTy = F.getFunctionType();
-
- // create a copy of the current function
- Function *Copied = Function::Create(FnTy, F.getLinkage(), F.getAddressSpace(),
- F.getName() + ".internalized");
- ValueToValueMapTy VMap;
- auto *NewFArgIt = Copied->arg_begin();
- for (auto &Arg : F.args()) {
- auto ArgName = Arg.getName();
- NewFArgIt->setName(ArgName);
- VMap[&Arg] = &(*NewFArgIt++);
- }
- SmallVector<ReturnInst *, 8> Returns;
-
- // Copy the body of the original function to the new one
- CloneFunctionInto(Copied, &F, VMap, CloneFunctionChangeType::LocalChangesOnly,
- Returns);
+ SmallPtrSet<Function *, 2> FnSet = {&F};
+ DenseMap<Function *, Function *> InternalizedFns;
+ internalizeFunctions(FnSet, InternalizedFns);
- // Set the linakage and visibility late as CloneFunctionInto has some implicit
- // requirements.
- Copied->setVisibility(GlobalValue::DefaultVisibility);
- Copied->setLinkage(GlobalValue::PrivateLinkage);
+ return InternalizedFns[&F];
+}
- // Copy metadata
- SmallVector<std::pair<unsigned, MDNode *>, 1> MDs;
- F.getAllMetadata(MDs);
- for (auto MDIt : MDs)
- if (!Copied->hasMetadata())
- Copied->addMetadata(MDIt.first, *MDIt.second);
+bool Attributor::internalizeFunctions(SmallPtrSetImpl<Function *> &FnSet,
+ DenseMap<Function *, Function *> &FnMap) {
+ for (Function *F : FnSet)
+ if (!Attributor::isInternalizable(*F))
+ return false;
- M.getFunctionList().insert(F.getIterator(), Copied);
- F.replaceAllUsesWith(Copied);
- Copied->setDSOLocal(true);
+ FnMap.clear();
+ // Generate the internalized version of each function.
+ for (Function *F : FnSet) {
+ Module &M = *F->getParent();
+ FunctionType *FnTy = F->getFunctionType();
+
+ // Create a copy of the current function
+ Function *Copied =
+ Function::Create(FnTy, F->getLinkage(), F->getAddressSpace(),
+ F->getName() + ".internalized");
+ ValueToValueMapTy VMap;
+ auto *NewFArgIt = Copied->arg_begin();
+ for (auto &Arg : F->args()) {
+ auto ArgName = Arg.getName();
+ NewFArgIt->setName(ArgName);
+ VMap[&Arg] = &(*NewFArgIt++);
+ }
+ SmallVector<ReturnInst *, 8> Returns;
+
+ // Copy the body of the original function to the new one
+ CloneFunctionInto(Copied, F, VMap,
+ CloneFunctionChangeType::LocalChangesOnly, Returns);
+
+ // Set the linakage and visibility late as CloneFunctionInto has some
+ // implicit requirements.
+ Copied->setVisibility(GlobalValue::DefaultVisibility);
+ Copied->setLinkage(GlobalValue::PrivateLinkage);
+
+ // Copy metadata
+ SmallVector<std::pair<unsigned, MDNode *>, 1> MDs;
+ F->getAllMetadata(MDs);
+ for (auto MDIt : MDs)
+ if (!Copied->hasMetadata())
+ Copied->addMetadata(MDIt.first, *MDIt.second);
+
+ M.getFunctionList().insert(F->getIterator(), Copied);
+ Copied->setDSOLocal(true);
+ FnMap[F] = Copied;
+ }
+
+ // Replace all uses of the old function with the new internalized function
+ // unless the caller is a function that was just internalized.
+ for (Function *F : FnSet) {
+ auto &InternalizedFn = FnMap[F];
+ auto IsNotInternalized = [&](Use &U) -> bool {
+ if (auto *CB = dyn_cast<CallBase>(U.getUser()))
+ return !FnMap.lookup(CB->getCaller());
+ return false;
+ };
+ F->replaceUsesWithIf(InternalizedFn, IsNotInternalized);
+ }
- return Copied;
+ return true;
}
bool Attributor::isValidFunctionSignatureRewrite(
diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
index 98ce286d5139..3529923a9082 100644
--- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
+++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
@@ -1149,19 +1149,23 @@ struct AAPointerInfoFloating : public AAPointerInfoImpl {
return true;
};
+ /// Helper struct, will support ranges eventually.
+ struct OffsetInfo {
+ int64_t Offset = AA::PointerInfo::OffsetAndSize::Unknown;
+
+ bool operator==(const OffsetInfo &OI) const { return Offset == OI.Offset; }
+ };
+
/// See AbstractAttribute::updateImpl(...).
ChangeStatus updateImpl(Attributor &A) override {
using namespace AA::PointerInfo;
State S = getState();
ChangeStatus Changed = ChangeStatus::UNCHANGED;
Value &AssociatedValue = getAssociatedValue();
- struct OffsetInfo {
- int64_t Offset = 0;
- };
const DataLayout &DL = A.getDataLayout();
DenseMap<Value *, OffsetInfo> OffsetInfoMap;
- OffsetInfoMap[&AssociatedValue] = {};
+ OffsetInfoMap[&AssociatedValue] = OffsetInfo{0};
auto HandlePassthroughUser = [&](Value *Usr, OffsetInfo &PtrOI,
bool &Follow) {
@@ -1219,8 +1223,48 @@ struct AAPointerInfoFloating : public AAPointerInfoImpl {
Follow = true;
return true;
}
- if (isa<CastInst>(Usr) || isa<PHINode>(Usr) || isa<SelectInst>(Usr))
+ if (isa<CastInst>(Usr) || isa<SelectInst>(Usr))
return HandlePassthroughUser(Usr, PtrOI, Follow);
+
+ // For PHIs we need to take care of the recurrence explicitly as the value
+ // might change while we iterate through a loop. For now, we give up if
+ // the PHI is not invariant.
+ if (isa<PHINode>(Usr)) {
+ // Check if the PHI is invariant (so far).
+ OffsetInfo &UsrOI = OffsetInfoMap[Usr];
+ if (UsrOI == PtrOI)
+ return true;
+
+ // Check if the PHI operand has already an unknown offset as we can't
+ // improve on that anymore.
+ if (PtrOI.Offset == OffsetAndSize::Unknown) {
+ UsrOI = PtrOI;
+ Follow = true;
+ return true;
+ }
+
+ // Check if the PHI operand is not dependent on the PHI itself.
+ APInt Offset(DL.getIndexTypeSizeInBits(AssociatedValue.getType()), 0);
+ if (&AssociatedValue == CurPtr->stripAndAccumulateConstantOffsets(
+ DL, Offset, /* AllowNonInbounds */ true)) {
+ if (Offset != PtrOI.Offset) {
+ LLVM_DEBUG(dbgs()
+ << "[AAPointerInfo] PHI operand pointer offset mismatch "
+ << *CurPtr << " in " << *Usr << "\n");
+ return false;
+ }
+ return HandlePassthroughUser(Usr, PtrOI, Follow);
+ }
+
+ // TODO: Approximate in case we know the direction of the recurrence.
+ LLVM_DEBUG(dbgs() << "[AAPointerInfo] PHI operand is too complex "
+ << *CurPtr << " in " << *Usr << "\n");
+ UsrOI = PtrOI;
+ UsrOI.Offset = OffsetAndSize::Unknown;
+ Follow = true;
+ return true;
+ }
+
if (auto *LoadI = dyn_cast<LoadInst>(Usr))
return handleAccess(A, *LoadI, *CurPtr, /* Content */ nullptr,
AccessKind::AK_READ, PtrOI.Offset, Changed,
diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
index b80349352719..d6b97915ede6 100644
--- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
+++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
@@ -4176,28 +4176,32 @@ PreservedAnalyses OpenMPOptPass::run(Module &M, ModuleAnalysisManager &AM) {
ORE.emit([&]() {
OptimizationRemarkAnalysis ORA(DEBUG_TYPE, "OMP140", &F);
return ORA << "Could not internalize function. "
- << "Some optimizations may not be possible.";
+ << "Some optimizations may not be possible. [OMP140]";
});
};
// Create internal copies of each function if this is a kernel Module. This
// allows iterprocedural passes to see every call edge.
- DenseSet<const Function *> InternalizedFuncs;
- if (isOpenMPDevice(M))
+ DenseMap<Function *, Function *> InternalizedMap;
+ if (isOpenMPDevice(M)) {
+ SmallPtrSet<Function *, 16> InternalizeFns;
for (Function &F : M)
if (!F.isDeclaration() && !Kernels.contains(&F) && IsCalled(F) &&
!DisableInternalization) {
- if (Attributor::internalizeFunction(F, /* Force */ true)) {
- InternalizedFuncs.insert(&F);
+ if (Attributor::isInternalizable(F)) {
+ InternalizeFns.insert(&F);
} else if (!F.hasLocalLinkage() && !F.hasFnAttribute(Attribute::Cold)) {
EmitRemark(F);
}
}
+ Attributor::internalizeFunctions(InternalizeFns, InternalizedMap);
+ }
+
// Look at every function in the Module unless it was internalized.
SmallVector<Function *, 16> SCC;
for (Function &F : M)
- if (!F.isDeclaration() && !InternalizedFuncs.contains(&F))
+ if (!F.isDeclaration() && !InternalizedMap.lookup(&F))
SCC.push_back(&F);
if (SCC.empty())
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 2b0ef0c5f2cc..c5e14ebf3ae3 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -5158,6 +5158,83 @@ Instruction *InstCombinerImpl::foldICmpUsingKnownBits(ICmpInst &I) {
if (!isa<Constant>(Op1) && Op1Min == Op1Max)
return new ICmpInst(Pred, Op0, ConstantExpr::getIntegerValue(Ty, Op1Min));
+ // Don't break up a clamp pattern -- (min(max X, Y), Z) -- by replacing a
+ // min/max canonical compare with some other compare. That could lead to
+ // conflict with select canonicalization and infinite looping.
+ // FIXME: This constraint may go away if min/max intrinsics are canonical.
+ auto isMinMaxCmp = [&](Instruction &Cmp) {
+ if (!Cmp.hasOneUse())
+ return false;
+ Value *A, *B;
+ SelectPatternFlavor SPF = matchSelectPattern(Cmp.user_back(), A, B).Flavor;
+ if (!SelectPatternResult::isMinOrMax(SPF))
+ return false;
+ return match(Op0, m_MaxOrMin(m_Value(), m_Value())) ||
+ match(Op1, m_MaxOrMin(m_Value(), m_Value()));
+ };
+ if (!isMinMaxCmp(I)) {
+ switch (Pred) {
+ default:
+ break;
+ case ICmpInst::ICMP_ULT: {
+ if (Op1Min == Op0Max) // A <u B -> A != B if max(A) == min(B)
+ return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1);
+ const APInt *CmpC;
+ if (match(Op1, m_APInt(CmpC))) {
+ // A <u C -> A == C-1 if min(A)+1 == C
+ if (*CmpC == Op0Min + 1)
+ return new ICmpInst(ICmpInst::ICMP_EQ, Op0,
+ ConstantInt::get(Op1->getType(), *CmpC - 1));
+ // X <u C --> X == 0, if the number of zero bits in the bottom of X
+ // exceeds the log2 of C.
+ if (Op0Known.countMinTrailingZeros() >= CmpC->ceilLogBase2())
+ return new ICmpInst(ICmpInst::ICMP_EQ, Op0,
+ Constant::getNullValue(Op1->getType()));
+ }
+ break;
+ }
+ case ICmpInst::ICMP_UGT: {
+ if (Op1Max == Op0Min) // A >u B -> A != B if min(A) == max(B)
+ return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1);
+ const APInt *CmpC;
+ if (match(Op1, m_APInt(CmpC))) {
+ // A >u C -> A == C+1 if max(a)-1 == C
+ if (*CmpC == Op0Max - 1)
+ return new ICmpInst(ICmpInst::ICMP_EQ, Op0,
+ ConstantInt::get(Op1->getType(), *CmpC + 1));
+ // X >u C --> X != 0, if the number of zero bits in the bottom of X
+ // exceeds the log2 of C.
+ if (Op0Known.countMinTrailingZeros() >= CmpC->getActiveBits())
+ return new ICmpInst(ICmpInst::ICMP_NE, Op0,
+ Constant::getNullValue(Op1->getType()));
+ }
+ break;
+ }
+ case ICmpInst::ICMP_SLT: {
+ if (Op1Min == Op0Max) // A <s B -> A != B if max(A) == min(B)
+ return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1);
+ const APInt *CmpC;
+ if (match(Op1, m_APInt(CmpC))) {
+ if (*CmpC == Op0Min + 1) // A <s C -> A == C-1 if min(A)+1 == C
+ return new ICmpInst(ICmpInst::ICMP_EQ, Op0,
+ ConstantInt::get(Op1->getType(), *CmpC - 1));
+ }
+ break;
+ }
+ case ICmpInst::ICMP_SGT: {
+ if (Op1Max == Op0Min) // A >s B -> A != B if min(A) == max(B)
+ return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1);
+ const APInt *CmpC;
+ if (match(Op1, m_APInt(CmpC))) {
+ if (*CmpC == Op0Max - 1) // A >s C -> A == C+1 if max(A)-1 == C
+ return new ICmpInst(ICmpInst::ICMP_EQ, Op0,
+ ConstantInt::get(Op1->getType(), *CmpC + 1));
+ }
+ break;
+ }
+ }
+ }
+
// Based on the range information we know about the LHS, see if we can
// simplify this comparison. For example, (x&4) < 8 is always true.
switch (Pred) {
@@ -5219,21 +5296,6 @@ Instruction *InstCombinerImpl::foldICmpUsingKnownBits(ICmpInst &I) {
return replaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
if (Op0Min.uge(Op1Max)) // A <u B -> false if min(A) >= max(B)
return replaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
- if (Op1Min == Op0Max) // A <u B -> A != B if max(A) == min(B)
- return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1);
-
- const APInt *CmpC;
- if (match(Op1, m_APInt(CmpC))) {
- // A <u C -> A == C-1 if min(A)+1 == C
- if (*CmpC == Op0Min + 1)
- return new ICmpInst(ICmpInst::ICMP_EQ, Op0,
- ConstantInt::get(Op1->getType(), *CmpC - 1));
- // X <u C --> X == 0, if the number of zero bits in the bottom of X
- // exceeds the log2 of C.
- if (Op0Known.countMinTrailingZeros() >= CmpC->ceilLogBase2())
- return new ICmpInst(ICmpInst::ICMP_EQ, Op0,
- Constant::getNullValue(Op1->getType()));
- }
break;
}
case ICmpInst::ICMP_UGT: {
@@ -5241,21 +5303,6 @@ Instruction *InstCombinerImpl::foldICmpUsingKnownBits(ICmpInst &I) {
return replaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
if (Op0Max.ule(Op1Min)) // A >u B -> false if max(A) <= max(B)
return replaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
- if (Op1Max == Op0Min) // A >u B -> A != B if min(A) == max(B)
- return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1);
-
- const APInt *CmpC;
- if (match(Op1, m_APInt(CmpC))) {
- // A >u C -> A == C+1 if max(a)-1 == C
- if (*CmpC == Op0Max - 1)
- return new ICmpInst(ICmpInst::ICMP_EQ, Op0,
- ConstantInt::get(Op1->getType(), *CmpC + 1));
- // X >u C --> X != 0, if the number of zero bits in the bottom of X
- // exceeds the log2 of C.
- if (Op0Known.countMinTrailingZeros() >= CmpC->getActiveBits())
- return new ICmpInst(ICmpInst::ICMP_NE, Op0,
- Constant::getNullValue(Op1->getType()));
- }
break;
}
case ICmpInst::ICMP_SLT: {
@@ -5263,14 +5310,6 @@ Instruction *InstCombinerImpl::foldICmpUsingKnownBits(ICmpInst &I) {
return replaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
if (Op0Min.sge(Op1Max)) // A <s B -> false if min(A) >= max(C)
return replaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
- if (Op1Min == Op0Max) // A <s B -> A != B if max(A) == min(B)
- return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1);
- const APInt *CmpC;
- if (match(Op1, m_APInt(CmpC))) {
- if (*CmpC == Op0Min + 1) // A <s C -> A == C-1 if min(A)+1 == C
- return new ICmpInst(ICmpInst::ICMP_EQ, Op0,
- ConstantInt::get(Op1->getType(), *CmpC - 1));
- }
break;
}
case ICmpInst::ICMP_SGT: {
@@ -5278,14 +5317,6 @@ Instruction *InstCombinerImpl::foldICmpUsingKnownBits(ICmpInst &I) {
return replaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
if (Op0Max.sle(Op1Min)) // A >s B -> false if max(A) <= min(B)
return replaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
- if (Op1Max == Op0Min) // A >s B -> A != B if min(A) == max(B)
- return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1);
- const APInt *CmpC;
- if (match(Op1, m_APInt(CmpC))) {
- if (*CmpC == Op0Max - 1) // A >s C -> A == C+1 if max(A)-1 == C
- return new ICmpInst(ICmpInst::ICMP_EQ, Op0,
- ConstantInt::get(Op1->getType(), *CmpC + 1));
- }
break;
}
case ICmpInst::ICMP_SGE:
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
index a8474e27383d..80abc775299a 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -261,8 +261,8 @@ private:
bool PointerReplacer::collectUsers(Instruction &I) {
for (auto U : I.users()) {
- Instruction *Inst = cast<Instruction>(&*U);
- if (LoadInst *Load = dyn_cast<LoadInst>(Inst)) {
+ auto *Inst = cast<Instruction>(&*U);
+ if (auto *Load = dyn_cast<LoadInst>(Inst)) {
if (Load->isVolatile())
return false;
Worklist.insert(Load);
@@ -270,7 +270,9 @@ bool PointerReplacer::collectUsers(Instruction &I) {
Worklist.insert(Inst);
if (!collectUsers(*Inst))
return false;
- } else if (isa<MemTransferInst>(Inst)) {
+ } else if (auto *MI = dyn_cast<MemTransferInst>(Inst)) {
+ if (MI->isVolatile())
+ return false;
Worklist.insert(Inst);
} else if (Inst->isLifetimeStartOrEnd()) {
continue;
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
index ce2b913dba61..5bbc3c87ca4f 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -3230,7 +3230,8 @@ Instruction *InstCombinerImpl::visitSelectInst(SelectInst &SI) {
Value *Mask;
if (match(TrueVal, m_Zero()) &&
match(FalseVal, m_MaskedLoad(m_Value(), m_Value(), m_Value(Mask),
- m_CombineOr(m_Undef(), m_Zero())))) {
+ m_CombineOr(m_Undef(), m_Zero()))) &&
+ (CondVal->getType() == Mask->getType())) {
// We can remove the select by ensuring the load zeros all lanes the
// select would have. We determine this by proving there is no overlap
// between the load and select masks.
diff --git a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index b585818af595..404852f1dd4d 100644
--- a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -1981,6 +1981,9 @@ class LSRInstance {
/// IV users that belong to profitable IVChains.
SmallPtrSet<Use*, MaxChains> IVIncSet;
+ /// Induction variables that were generated and inserted by the SCEV Expander.
+ SmallVector<llvm::WeakVH, 2> ScalarEvolutionIVs;
+
void OptimizeShadowIV();
bool FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse);
ICmpInst *OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse);
@@ -2085,6 +2088,9 @@ public:
TargetLibraryInfo &TLI, MemorySSAUpdater *MSSAU);
bool getChanged() const { return Changed; }
+ const SmallVectorImpl<WeakVH> &getScalarEvolutionIVs() const {
+ return ScalarEvolutionIVs;
+ }
void print_factors_and_types(raw_ostream &OS) const;
void print_fixups(raw_ostream &OS) const;
@@ -5589,6 +5595,11 @@ void LSRInstance::ImplementSolution(
GenerateIVChain(Chain, Rewriter, DeadInsts);
Changed = true;
}
+
+ for (const WeakVH &IV : Rewriter.getInsertedIVs())
+ if (IV && dyn_cast<Instruction>(&*IV)->getParent())
+ ScalarEvolutionIVs.push_back(IV);
+
// Clean up after ourselves. This must be done before deleting any
// instructions.
Rewriter.clear();
@@ -5859,87 +5870,399 @@ void LoopStrengthReduce::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addPreserved<MemorySSAWrapperPass>();
}
-using EqualValues = SmallVector<std::tuple<WeakVH, int64_t>, 4>;
-using EqualValuesMap =
- DenseMap<DbgValueInst *, SmallVector<std::pair<unsigned, EqualValues>>>;
-using LocationMap =
- DenseMap<DbgValueInst *, std::pair<DIExpression *, Metadata *>>;
+struct SCEVDbgValueBuilder {
+ SCEVDbgValueBuilder() = default;
+ SCEVDbgValueBuilder(const SCEVDbgValueBuilder &Base) {
+ Values = Base.Values;
+ Expr = Base.Expr;
+ }
+
+ /// The DIExpression as we translate the SCEV.
+ SmallVector<uint64_t, 6> Expr;
+ /// The location ops of the DIExpression.
+ SmallVector<llvm::ValueAsMetadata *, 2> Values;
+
+ void pushOperator(uint64_t Op) { Expr.push_back(Op); }
+ void pushUInt(uint64_t Operand) { Expr.push_back(Operand); }
+
+ /// Add a DW_OP_LLVM_arg to the expression, followed by the index of the value
+ /// in the set of values referenced by the expression.
+ void pushValue(llvm::Value *V) {
+ Expr.push_back(llvm::dwarf::DW_OP_LLVM_arg);
+ auto *It =
+ std::find(Values.begin(), Values.end(), llvm::ValueAsMetadata::get(V));
+ unsigned ArgIndex = 0;
+ if (It != Values.end()) {
+ ArgIndex = std::distance(Values.begin(), It);
+ } else {
+ ArgIndex = Values.size();
+ Values.push_back(llvm::ValueAsMetadata::get(V));
+ }
+ Expr.push_back(ArgIndex);
+ }
+
+ void pushValue(const SCEVUnknown *U) {
+ llvm::Value *V = cast<SCEVUnknown>(U)->getValue();
+ pushValue(V);
+ }
+
+ bool pushConst(const SCEVConstant *C) {
+ if (C->getAPInt().getMinSignedBits() > 64)
+ return false;
+ Expr.push_back(llvm::dwarf::DW_OP_consts);
+ Expr.push_back(C->getAPInt().getSExtValue());
+ return true;
+ }
+
+ /// Several SCEV types are sequences of the same arithmetic operator applied
+ /// to constants and values that may be extended or truncated.
+ bool pushArithmeticExpr(const llvm::SCEVCommutativeExpr *CommExpr,
+ uint64_t DwarfOp) {
+ assert((isa<llvm::SCEVAddExpr>(CommExpr) || isa<SCEVMulExpr>(CommExpr)) &&
+ "Expected arithmetic SCEV type");
+ bool Success = true;
+ unsigned EmitOperator = 0;
+ for (auto &Op : CommExpr->operands()) {
+ Success &= pushSCEV(Op);
+
+ if (EmitOperator >= 1)
+ pushOperator(DwarfOp);
+ ++EmitOperator;
+ }
+ return Success;
+ }
+
+ // TODO: Identify and omit noop casts.
+ bool pushCast(const llvm::SCEVCastExpr *C, bool IsSigned) {
+ const llvm::SCEV *Inner = C->getOperand(0);
+ const llvm::Type *Type = C->getType();
+ uint64_t ToWidth = Type->getIntegerBitWidth();
+ bool Success = pushSCEV(Inner);
+ uint64_t CastOps[] = {dwarf::DW_OP_LLVM_convert, ToWidth,
+ IsSigned ? llvm::dwarf::DW_ATE_signed
+ : llvm::dwarf::DW_ATE_unsigned};
+ for (const auto &Op : CastOps)
+ pushOperator(Op);
+ return Success;
+ }
+
+ // TODO: MinMax - although these haven't been encountered in the test suite.
+ bool pushSCEV(const llvm::SCEV *S) {
+ bool Success = true;
+ if (const SCEVConstant *StartInt = dyn_cast<SCEVConstant>(S)) {
+ Success &= pushConst(StartInt);
+
+ } else if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
+ if (!U->getValue())
+ return false;
+ pushValue(U->getValue());
+
+ } else if (const SCEVMulExpr *MulRec = dyn_cast<SCEVMulExpr>(S)) {
+ Success &= pushArithmeticExpr(MulRec, llvm::dwarf::DW_OP_mul);
+
+ } else if (const SCEVUDivExpr *UDiv = dyn_cast<SCEVUDivExpr>(S)) {
+ Success &= pushSCEV(UDiv->getLHS());
+ Success &= pushSCEV(UDiv->getRHS());
+ pushOperator(llvm::dwarf::DW_OP_div);
+
+ } else if (const SCEVCastExpr *Cast = dyn_cast<SCEVCastExpr>(S)) {
+ // Assert if a new and unknown SCEVCastEXpr type is encountered.
+ assert((isa<SCEVZeroExtendExpr>(Cast) || isa<SCEVTruncateExpr>(Cast) ||
+ isa<SCEVPtrToIntExpr>(Cast) || isa<SCEVSignExtendExpr>(Cast)) &&
+ "Unexpected cast type in SCEV.");
+ Success &= pushCast(Cast, (isa<SCEVSignExtendExpr>(Cast)));
+
+ } else if (const SCEVAddExpr *AddExpr = dyn_cast<SCEVAddExpr>(S)) {
+ Success &= pushArithmeticExpr(AddExpr, llvm::dwarf::DW_OP_plus);
+
+ } else if (isa<SCEVAddRecExpr>(S)) {
+ // Nested SCEVAddRecExpr are generated by nested loops and are currently
+ // unsupported.
+ return false;
+
+ } else {
+ return false;
+ }
+ return Success;
+ }
+
+ void setFinalExpression(llvm::DbgValueInst &DI, const DIExpression *OldExpr) {
+ // Re-state assumption that this dbg.value is not variadic. Any remaining
+ // opcodes in its expression operate on a single value already on the
+ // expression stack. Prepend our operations, which will re-compute and
+ // place that value on the expression stack.
+ assert(!DI.hasArgList());
+ auto *NewExpr =
+ DIExpression::prependOpcodes(OldExpr, Expr, /*StackValue*/ true);
+ DI.setExpression(NewExpr);
+
+ auto ValArrayRef = llvm::ArrayRef<llvm::ValueAsMetadata *>(Values);
+ DI.setRawLocation(llvm::DIArgList::get(DI.getContext(), ValArrayRef));
+ }
+
+ /// If a DVI can be emitted without a DIArgList, omit DW_OP_llvm_arg and the
+ /// location op index 0.
+ void setShortFinalExpression(llvm::DbgValueInst &DI,
+ const DIExpression *OldExpr) {
+ assert((Expr[0] == llvm::dwarf::DW_OP_LLVM_arg && Expr[1] == 0) &&
+ "Expected DW_OP_llvm_arg and 0.");
+ DI.replaceVariableLocationOp(
+ 0u, llvm::MetadataAsValue::get(DI.getContext(), Values[0]));
+
+ // See setFinalExpression: prepend our opcodes on the start of any old
+ // expression opcodes.
+ assert(!DI.hasArgList());
+ llvm::SmallVector<uint64_t, 6> FinalExpr(Expr.begin() + 2, Expr.end());
+ auto *NewExpr =
+ DIExpression::prependOpcodes(OldExpr, FinalExpr, /*StackValue*/ true);
+ DI.setExpression(NewExpr);
+ }
+
+ /// Once the IV and variable SCEV translation is complete, write it to the
+ /// source DVI.
+ void applyExprToDbgValue(llvm::DbgValueInst &DI,
+ const DIExpression *OldExpr) {
+ assert(!Expr.empty() && "Unexpected empty expression.");
+ // Emit a simpler form if only a single location is referenced.
+ if (Values.size() == 1 && Expr[0] == llvm::dwarf::DW_OP_LLVM_arg &&
+ Expr[1] == 0) {
+ setShortFinalExpression(DI, OldExpr);
+ } else {
+ setFinalExpression(DI, OldExpr);
+ }
+ }
+
+ /// Return true if the combination of arithmetic operator and underlying
+ /// SCEV constant value is an identity function.
+ bool isIdentityFunction(uint64_t Op, const SCEV *S) {
+ if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S)) {
+ if (C->getAPInt().getMinSignedBits() > 64)
+ return false;
+ int64_t I = C->getAPInt().getSExtValue();
+ switch (Op) {
+ case llvm::dwarf::DW_OP_plus:
+ case llvm::dwarf::DW_OP_minus:
+ return I == 0;
+ case llvm::dwarf::DW_OP_mul:
+ case llvm::dwarf::DW_OP_div:
+ return I == 1;
+ }
+ }
+ return false;
+ }
+
+ /// Convert a SCEV of a value to a DIExpression that is pushed onto the
+ /// builder's expression stack. The stack should already contain an
+ /// expression for the iteration count, so that it can be multiplied by
+ /// the stride and added to the start.
+ /// Components of the expression are omitted if they are an identity function.
+ /// Chain (non-affine) SCEVs are not supported.
+ bool SCEVToValueExpr(const llvm::SCEVAddRecExpr &SAR, ScalarEvolution &SE) {
+ assert(SAR.isAffine() && "Expected affine SCEV");
+ // TODO: Is this check needed?
+ if (isa<SCEVAddRecExpr>(SAR.getStart()))
+ return false;
+
+ const SCEV *Start = SAR.getStart();
+ const SCEV *Stride = SAR.getStepRecurrence(SE);
+
+ // Skip pushing arithmetic noops.
+ if (!isIdentityFunction(llvm::dwarf::DW_OP_mul, Stride)) {
+ if (!pushSCEV(Stride))
+ return false;
+ pushOperator(llvm::dwarf::DW_OP_mul);
+ }
+ if (!isIdentityFunction(llvm::dwarf::DW_OP_plus, Start)) {
+ if (!pushSCEV(Start))
+ return false;
+ pushOperator(llvm::dwarf::DW_OP_plus);
+ }
+ return true;
+ }
+
+ /// Convert a SCEV of a value to a DIExpression that is pushed onto the
+ /// builder's expression stack. The stack should already contain an
+ /// expression for the iteration count, so that it can be multiplied by
+ /// the stride and added to the start.
+ /// Components of the expression are omitted if they are an identity function.
+ bool SCEVToIterCountExpr(const llvm::SCEVAddRecExpr &SAR,
+ ScalarEvolution &SE) {
+ assert(SAR.isAffine() && "Expected affine SCEV");
+ if (isa<SCEVAddRecExpr>(SAR.getStart())) {
+ LLVM_DEBUG(dbgs() << "scev-salvage: IV SCEV. Unsupported nested AddRec: "
+ << SAR << '\n');
+ return false;
+ }
+ const SCEV *Start = SAR.getStart();
+ const SCEV *Stride = SAR.getStepRecurrence(SE);
+
+ // Skip pushing arithmetic noops.
+ if (!isIdentityFunction(llvm::dwarf::DW_OP_minus, Start)) {
+ if (!pushSCEV(Start))
+ return false;
+ pushOperator(llvm::dwarf::DW_OP_minus);
+ }
+ if (!isIdentityFunction(llvm::dwarf::DW_OP_div, Stride)) {
+ if (!pushSCEV(Stride))
+ return false;
+ pushOperator(llvm::dwarf::DW_OP_div);
+ }
+ return true;
+ }
+};
+
+struct DVIRecoveryRec {
+ DbgValueInst *DVI;
+ DIExpression *Expr;
+ Metadata *LocationOp;
+ const llvm::SCEV *SCEV;
+};
+
+static bool RewriteDVIUsingIterCount(DVIRecoveryRec CachedDVI,
+ const SCEVDbgValueBuilder &IterationCount,
+ ScalarEvolution &SE) {
+ // LSR may add locations to previously single location-op DVIs which
+ // are currently not supported.
+ if (CachedDVI.DVI->getNumVariableLocationOps() != 1)
+ return false;
+
+ // SCEVs for SSA values are most frquently of the form
+ // {start,+,stride}, but sometimes they are ({start,+,stride} + %a + ..).
+ // This is because %a is a PHI node that is not the IV. However, these
+ // SCEVs have not been observed to result in debuginfo-lossy optimisations,
+ // so its not expected this point will be reached.
+ if (!isa<SCEVAddRecExpr>(CachedDVI.SCEV))
+ return false;
+
+ LLVM_DEBUG(dbgs() << "scev-salvage: Value to salvage SCEV: "
+ << *CachedDVI.SCEV << '\n');
+
+ const auto *Rec = cast<SCEVAddRecExpr>(CachedDVI.SCEV);
+ if (!Rec->isAffine())
+ return false;
+
+ // Initialise a new builder with the iteration count expression. In
+ // combination with the value's SCEV this enables recovery.
+ SCEVDbgValueBuilder RecoverValue(IterationCount);
+ if (!RecoverValue.SCEVToValueExpr(*Rec, SE))
+ return false;
+
+ LLVM_DEBUG(dbgs() << "scev-salvage: Updating: " << *CachedDVI.DVI << '\n');
+ RecoverValue.applyExprToDbgValue(*CachedDVI.DVI, CachedDVI.Expr);
+ LLVM_DEBUG(dbgs() << "scev-salvage: to: " << *CachedDVI.DVI << '\n');
+ return true;
+}
+
+static bool
+DbgRewriteSalvageableDVIs(llvm::Loop *L, ScalarEvolution &SE,
+ llvm::PHINode *LSRInductionVar,
+ SmallVector<DVIRecoveryRec, 2> &DVIToUpdate) {
+ if (DVIToUpdate.empty())
+ return false;
-static void DbgGatherEqualValues(Loop *L, ScalarEvolution &SE,
- EqualValuesMap &DbgValueToEqualSet,
- LocationMap &DbgValueToLocation) {
+ const llvm::SCEV *SCEVInductionVar = SE.getSCEV(LSRInductionVar);
+ assert(SCEVInductionVar &&
+ "Anticipated a SCEV for the post-LSR induction variable");
+
+ bool Changed = false;
+ if (const SCEVAddRecExpr *IVAddRec =
+ dyn_cast<SCEVAddRecExpr>(SCEVInductionVar)) {
+ if (!IVAddRec->isAffine())
+ return false;
+
+ SCEVDbgValueBuilder IterCountExpr;
+ IterCountExpr.pushValue(LSRInductionVar);
+ if (!IterCountExpr.SCEVToIterCountExpr(*IVAddRec, SE))
+ return false;
+
+ LLVM_DEBUG(dbgs() << "scev-salvage: IV SCEV: " << *SCEVInductionVar
+ << '\n');
+
+ // Needn't salvage if the location op hasn't been undef'd by LSR.
+ for (auto &DVIRec : DVIToUpdate) {
+ if (!DVIRec.DVI->isUndef())
+ continue;
+
+ // Some DVIs that were single location-op when cached are now multi-op,
+ // due to LSR optimisations. However, multi-op salvaging is not yet
+ // supported by SCEV salvaging. But, we can attempt a salvage by restoring
+ // the pre-LSR single-op expression.
+ if (DVIRec.DVI->hasArgList()) {
+ if (!DVIRec.DVI->getVariableLocationOp(0))
+ continue;
+ llvm::Type *Ty = DVIRec.DVI->getVariableLocationOp(0)->getType();
+ DVIRec.DVI->setRawLocation(
+ llvm::ValueAsMetadata::get(UndefValue::get(Ty)));
+ DVIRec.DVI->setExpression(DVIRec.Expr);
+ }
+
+ Changed |= RewriteDVIUsingIterCount(DVIRec, IterCountExpr, SE);
+ }
+ }
+ return Changed;
+}
+
+/// Identify and cache salvageable DVI locations and expressions along with the
+/// corresponding SCEV(s). Also ensure that the DVI is not deleted before
+static void
+DbgGatherSalvagableDVI(Loop *L, ScalarEvolution &SE,
+ SmallVector<DVIRecoveryRec, 2> &SalvageableDVISCEVs,
+ SmallSet<AssertingVH<DbgValueInst>, 2> &DVIHandles) {
for (auto &B : L->getBlocks()) {
for (auto &I : *B) {
auto DVI = dyn_cast<DbgValueInst>(&I);
if (!DVI)
continue;
- for (unsigned Idx = 0; Idx < DVI->getNumVariableLocationOps(); ++Idx) {
- // TODO: We can duplicate results if the same arg appears more than
- // once.
- Value *V = DVI->getVariableLocationOp(Idx);
- if (!V || !SE.isSCEVable(V->getType()))
- continue;
- auto DbgValueSCEV = SE.getSCEV(V);
- EqualValues EqSet;
- for (PHINode &Phi : L->getHeader()->phis()) {
- if (V->getType() != Phi.getType())
- continue;
- if (!SE.isSCEVable(Phi.getType()))
- continue;
- auto PhiSCEV = SE.getSCEV(&Phi);
- Optional<APInt> Offset =
- SE.computeConstantDifference(DbgValueSCEV, PhiSCEV);
- if (Offset && Offset->getMinSignedBits() <= 64)
- EqSet.emplace_back(
- std::make_tuple(&Phi, Offset.getValue().getSExtValue()));
- }
- DbgValueToEqualSet[DVI].push_back({Idx, std::move(EqSet)});
- // If we fall back to using this raw location, at least one location op
- // must be dead. A DIArgList will automatically undef arguments when
- // they become unavailable, but a ValueAsMetadata will not; since we
- // know the value should be undef, we use the undef value directly here.
- Metadata *RawLocation =
- DVI->hasArgList() ? DVI->getRawLocation()
- : ValueAsMetadata::get(UndefValue::get(
- DVI->getVariableLocationOp(0)->getType()));
- DbgValueToLocation[DVI] = {DVI->getExpression(), RawLocation};
- }
+
+ if (DVI->hasArgList())
+ continue;
+
+ if (!DVI->getVariableLocationOp(0) ||
+ !SE.isSCEVable(DVI->getVariableLocationOp(0)->getType()))
+ continue;
+
+ SalvageableDVISCEVs.push_back(
+ {DVI, DVI->getExpression(), DVI->getRawLocation(),
+ SE.getSCEV(DVI->getVariableLocationOp(0))});
+ DVIHandles.insert(DVI);
}
}
}
-static void DbgApplyEqualValues(EqualValuesMap &DbgValueToEqualSet,
- LocationMap &DbgValueToLocation) {
- for (auto A : DbgValueToEqualSet) {
- auto *DVI = A.first;
- // Only update those that are now undef.
- if (!DVI->isUndef())
+/// Ideally pick the PHI IV inserted by ScalarEvolutionExpander. As a fallback
+/// any PHi from the loop header is usable, but may have less chance of
+/// surviving subsequent transforms.
+static llvm::PHINode *GetInductionVariable(const Loop &L, ScalarEvolution &SE,
+ const LSRInstance &LSR) {
+ // For now, just pick the first IV generated and inserted. Ideally pick an IV
+ // that is unlikely to be optimised away by subsequent transforms.
+ for (const WeakVH &IV : LSR.getScalarEvolutionIVs()) {
+ if (!IV)
continue;
- // The dbg.value may have had its value or expression changed during LSR by
- // a failed salvage attempt; refresh them from the map.
- auto *DbgDIExpr = DbgValueToLocation[DVI].first;
- DVI->setRawLocation(DbgValueToLocation[DVI].second);
- DVI->setExpression(DbgDIExpr);
- assert(DVI->isUndef() && "dbg.value with non-undef location should not "
- "have been modified by LSR.");
- for (auto IdxEV : A.second) {
- unsigned Idx = IdxEV.first;
- for (auto EV : IdxEV.second) {
- auto EVHandle = std::get<WeakVH>(EV);
- if (!EVHandle)
- continue;
- int64_t Offset = std::get<int64_t>(EV);
- DVI->replaceVariableLocationOp(Idx, EVHandle);
- if (Offset) {
- SmallVector<uint64_t, 8> Ops;
- DIExpression::appendOffset(Ops, Offset);
- DbgDIExpr = DIExpression::appendOpsToArg(DbgDIExpr, Ops, Idx, true);
- }
- DVI->setExpression(DbgDIExpr);
- break;
- }
+
+ assert(isa<PHINode>(&*IV) && "Expected PhI node.");
+ if (SE.isSCEVable((*IV).getType())) {
+ PHINode *Phi = dyn_cast<PHINode>(&*IV);
+ LLVM_DEBUG(dbgs() << "scev-salvage: IV : " << *IV
+ << "with SCEV: " << *SE.getSCEV(Phi) << "\n");
+ return Phi;
}
}
+
+ for (PHINode &Phi : L.getHeader()->phis()) {
+ if (!SE.isSCEVable(Phi.getType()))
+ continue;
+
+ const llvm::SCEV *PhiSCEV = SE.getSCEV(&Phi);
+ if (const llvm::SCEVAddRecExpr *Rec = dyn_cast<SCEVAddRecExpr>(PhiSCEV))
+ if (!Rec->isAffine())
+ continue;
+
+ LLVM_DEBUG(dbgs() << "scev-salvage: Selected IV from loop header: " << Phi
+ << " with SCEV: " << *PhiSCEV << "\n");
+ return &Phi;
+ }
+ return nullptr;
}
static bool ReduceLoopStrength(Loop *L, IVUsers &IU, ScalarEvolution &SE,
@@ -5948,20 +6271,21 @@ static bool ReduceLoopStrength(Loop *L, IVUsers &IU, ScalarEvolution &SE,
AssumptionCache &AC, TargetLibraryInfo &TLI,
MemorySSA *MSSA) {
+ // Debug preservation - before we start removing anything identify which DVI
+ // meet the salvageable criteria and store their DIExpression and SCEVs.
+ SmallVector<DVIRecoveryRec, 2> SalvageableDVI;
+ SmallSet<AssertingVH<DbgValueInst>, 2> DVIHandles;
+ DbgGatherSalvagableDVI(L, SE, SalvageableDVI, DVIHandles);
+
bool Changed = false;
std::unique_ptr<MemorySSAUpdater> MSSAU;
if (MSSA)
MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
// Run the main LSR transformation.
- Changed |=
- LSRInstance(L, IU, SE, DT, LI, TTI, AC, TLI, MSSAU.get()).getChanged();
-
- // Debug preservation - before we start removing anything create equivalence
- // sets for the llvm.dbg.value intrinsics.
- EqualValuesMap DbgValueToEqualSet;
- LocationMap DbgValueToLocation;
- DbgGatherEqualValues(L, SE, DbgValueToEqualSet, DbgValueToLocation);
+ const LSRInstance &Reducer =
+ LSRInstance(L, IU, SE, DT, LI, TTI, AC, TLI, MSSAU.get());
+ Changed |= Reducer.getChanged();
// Remove any extra phis created by processing inner loops.
Changed |= DeleteDeadPHIs(L->getHeader(), &TLI, MSSAU.get());
@@ -5981,8 +6305,22 @@ static bool ReduceLoopStrength(Loop *L, IVUsers &IU, ScalarEvolution &SE,
}
}
- DbgApplyEqualValues(DbgValueToEqualSet, DbgValueToLocation);
+ if (SalvageableDVI.empty())
+ return Changed;
+
+ // Obtain relevant IVs and attempt to rewrite the salvageable DVIs with
+ // expressions composed using the derived iteration count.
+ // TODO: Allow for multiple IV references for nested AddRecSCEVs
+ for (auto &L : LI) {
+ if (llvm::PHINode *IV = GetInductionVariable(*L, SE, Reducer))
+ DbgRewriteSalvageableDVIs(L, SE, IV, SalvageableDVI);
+ else {
+ LLVM_DEBUG(dbgs() << "scev-salvage: SCEV salvaging not possible. An IV "
+ "could not be identified.\n");
+ }
+ }
+ DVIHandles.clear();
return Changed;
}
diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp
index 5ec01454e5b2..fe160d5415bd 100644
--- a/llvm/lib/Transforms/Scalar/SROA.cpp
+++ b/llvm/lib/Transforms/Scalar/SROA.cpp
@@ -2811,10 +2811,11 @@ private:
if (BeginOffset > NewAllocaBeginOffset ||
EndOffset < NewAllocaEndOffset)
return false;
+ // Length must be in range for FixedVectorType.
auto *C = cast<ConstantInt>(II.getLength());
- if (C->getBitWidth() > 64)
+ const uint64_t Len = C->getLimitedValue();
+ if (Len > std::numeric_limits<unsigned>::max())
return false;
- const auto Len = C->getZExtValue();
auto *Int8Ty = IntegerType::getInt8Ty(NewAI.getContext());
auto *SrcTy = FixedVectorType::get(Int8Ty, Len);
return canConvertValue(DL, SrcTy, AllocaTy) &&
diff --git a/llvm/lib/Transforms/Utils/PredicateInfo.cpp b/llvm/lib/Transforms/Utils/PredicateInfo.cpp
index 91280762aaa7..bd2b6fafdf2e 100644
--- a/llvm/lib/Transforms/Utils/PredicateInfo.cpp
+++ b/llvm/lib/Transforms/Utils/PredicateInfo.cpp
@@ -16,6 +16,7 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/CFG.h"
#include "llvm/IR/AssemblyAnnotationWriter.h"
@@ -23,6 +24,7 @@
#include "llvm/IR/Dominators.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstIterator.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Metadata.h"
@@ -566,10 +568,18 @@ Value *PredicateInfoBuilder::materializeStack(unsigned int &Counter,
// to ensure we dominate all of our uses. Always insert right before the
// relevant instruction (terminator, assume), so that we insert in proper
// order in the case of multiple predicateinfo in the same block.
+ // The number of named values is used to detect if a new declaration was
+ // added. If so, that declaration is tracked so that it can be removed when
+ // the analysis is done. The corner case were a new declaration results in
+ // a name clash and the old name being renamed is not considered as that
+ // represents an invalid module.
if (isa<PredicateWithEdge>(ValInfo)) {
IRBuilder<> B(getBranchTerminator(ValInfo));
+ auto NumDecls = F.getParent()->getNumNamedValues();
Function *IF = Intrinsic::getDeclaration(
F.getParent(), Intrinsic::ssa_copy, Op->getType());
+ if (NumDecls != F.getParent()->getNumNamedValues())
+ PI.CreatedDeclarations.insert(IF);
CallInst *PIC =
B.CreateCall(IF, Op, Op->getName() + "." + Twine(Counter++));
PI.PredicateMap.insert({PIC, ValInfo});
@@ -581,8 +591,11 @@ Value *PredicateInfoBuilder::materializeStack(unsigned int &Counter,
// Insert the predicate directly after the assume. While it also holds
// directly before it, assume(i1 true) is not a useful fact.
IRBuilder<> B(PAssume->AssumeInst->getNextNode());
+ auto NumDecls = F.getParent()->getNumNamedValues();
Function *IF = Intrinsic::getDeclaration(
F.getParent(), Intrinsic::ssa_copy, Op->getType());
+ if (NumDecls != F.getParent()->getNumNamedValues())
+ PI.CreatedDeclarations.insert(IF);
CallInst *PIC = B.CreateCall(IF, Op);
PI.PredicateMap.insert({PIC, ValInfo});
Result.Def = PIC;
@@ -761,6 +774,23 @@ PredicateInfo::PredicateInfo(Function &F, DominatorTree &DT,
Builder.buildPredicateInfo();
}
+// Remove all declarations we created . The PredicateInfo consumers are
+// responsible for remove the ssa_copy calls created.
+PredicateInfo::~PredicateInfo() {
+ // Collect function pointers in set first, as SmallSet uses a SmallVector
+ // internally and we have to remove the asserting value handles first.
+ SmallPtrSet<Function *, 20> FunctionPtrs;
+ for (auto &F : CreatedDeclarations)
+ FunctionPtrs.insert(&*F);
+ CreatedDeclarations.clear();
+
+ for (Function *F : FunctionPtrs) {
+ assert(F->user_begin() == F->user_end() &&
+ "PredicateInfo consumer did not remove all SSA copies.");
+ F->eraseFromParent();
+ }
+}
+
Optional<PredicateConstraint> PredicateBase::getConstraint() const {
switch (Type) {
case PT_Assume:
@@ -827,6 +857,19 @@ void PredicateInfoPrinterLegacyPass::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<AssumptionCacheTracker>();
}
+// Replace ssa_copy calls created by PredicateInfo with their operand.
+static void replaceCreatedSSACopys(PredicateInfo &PredInfo, Function &F) {
+ for (Instruction &Inst : llvm::make_early_inc_range(instructions(F))) {
+ const auto *PI = PredInfo.getPredicateInfoFor(&Inst);
+ auto *II = dyn_cast<IntrinsicInst>(&Inst);
+ if (!PI || !II || II->getIntrinsicID() != Intrinsic::ssa_copy)
+ continue;
+
+ Inst.replaceAllUsesWith(II->getOperand(0));
+ Inst.eraseFromParent();
+ }
+}
+
bool PredicateInfoPrinterLegacyPass::runOnFunction(Function &F) {
auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
@@ -834,6 +877,8 @@ bool PredicateInfoPrinterLegacyPass::runOnFunction(Function &F) {
PredInfo->print(dbgs());
if (VerifyPredicateInfo)
PredInfo->verifyPredicateInfo();
+
+ replaceCreatedSSACopys(*PredInfo, F);
return false;
}
@@ -845,6 +890,7 @@ PreservedAnalyses PredicateInfoPrinterPass::run(Function &F,
auto PredInfo = std::make_unique<PredicateInfo>(F, DT, AC);
PredInfo->print(OS);
+ replaceCreatedSSACopys(*PredInfo, F);
return PreservedAnalyses::all();
}
diff --git a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
index 5af1c37e6197..3978e1e29825 100644
--- a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
+++ b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
@@ -1393,9 +1393,10 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
// can ensure that IVIncrement dominates the current uses.
PostIncLoops = SavedPostIncLoops;
- // Remember this PHI, even in post-inc mode.
+ // Remember this PHI, even in post-inc mode. LSR SCEV-based salvaging is most
+ // effective when we are able to use an IV inserted here, so record it.
InsertedValues.insert(PN);
-
+ InsertedIVs.push_back(PN);
return PN;
}
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index f24ae6b100d5..671bc6b5212b 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -5433,6 +5433,21 @@ void LoopVectorizationCostModel::collectLoopUniforms(ElementCount VF) {
// lane 0 demanded or b) are uses which demand only lane 0 of their operand.
for (auto *BB : TheLoop->blocks())
for (auto &I : *BB) {
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(&I)) {
+ switch (II->getIntrinsicID()) {
+ case Intrinsic::sideeffect:
+ case Intrinsic::experimental_noalias_scope_decl:
+ case Intrinsic::assume:
+ case Intrinsic::lifetime_start:
+ case Intrinsic::lifetime_end:
+ if (TheLoop->hasLoopInvariantOperands(&I))
+ addToWorklistIfAllowed(&I);
+ break;
+ default:
+ break;
+ }
+ }
+
// If there's no pointer operand, there's nothing to do.
auto *Ptr = getLoadStorePointerOperand(&I);
if (!Ptr)
@@ -8916,6 +8931,37 @@ VPBasicBlock *VPRecipeBuilder::handleReplication(
bool IsPredicated = LoopVectorizationPlanner::getDecisionAndClampRange(
[&](ElementCount VF) { return CM.isPredicatedInst(I); }, Range);
+ // Even if the instruction is not marked as uniform, there are certain
+ // intrinsic calls that can be effectively treated as such, so we check for
+ // them here. Conservatively, we only do this for scalable vectors, since
+ // for fixed-width VFs we can always fall back on full scalarization.
+ if (!IsUniform && Range.Start.isScalable() && isa<IntrinsicInst>(I)) {
+ switch (cast<IntrinsicInst>(I)->getIntrinsicID()) {
+ case Intrinsic::assume:
+ case Intrinsic::lifetime_start:
+ case Intrinsic::lifetime_end:
+ // For scalable vectors if one of the operands is variant then we still
+ // want to mark as uniform, which will generate one instruction for just
+ // the first lane of the vector. We can't scalarize the call in the same
+ // way as for fixed-width vectors because we don't know how many lanes
+ // there are.
+ //
+ // The reasons for doing it this way for scalable vectors are:
+ // 1. For the assume intrinsic generating the instruction for the first
+ // lane is still be better than not generating any at all. For
+ // example, the input may be a splat across all lanes.
+ // 2. For the lifetime start/end intrinsics the pointer operand only
+ // does anything useful when the input comes from a stack object,
+ // which suggests it should always be uniform. For non-stack objects
+ // the effect is to poison the object, which still allows us to
+ // remove the call.
+ IsUniform = true;
+ break;
+ default:
+ break;
+ }
+ }
+
auto *Recipe = new VPReplicateRecipe(I, Plan->mapToVPValues(I->operands()),
IsUniform, IsPredicated);
setRecipe(I, Recipe);
diff --git a/llvm/tools/llvm-mca/Views/TimelineView.cpp b/llvm/tools/llvm-mca/Views/TimelineView.cpp
index 9a949761bb75..4ecc3015529c 100644
--- a/llvm/tools/llvm-mca/Views/TimelineView.cpp
+++ b/llvm/tools/llvm-mca/Views/TimelineView.cpp
@@ -145,10 +145,11 @@ void TimelineView::printWaitTimeEntry(formatted_raw_ostream &OS,
double AverageTime1, AverageTime2, AverageTime3;
AverageTime1 =
- (double)Entry.CyclesSpentInSchedulerQueue / CumulativeExecutions;
- AverageTime2 = (double)Entry.CyclesSpentInSQWhileReady / CumulativeExecutions;
- AverageTime3 =
- (double)Entry.CyclesSpentAfterWBAndBeforeRetire / CumulativeExecutions;
+ (double)(Entry.CyclesSpentInSchedulerQueue * 10) / CumulativeExecutions;
+ AverageTime2 =
+ (double)(Entry.CyclesSpentInSQWhileReady * 10) / CumulativeExecutions;
+ AverageTime3 = (double)(Entry.CyclesSpentAfterWBAndBeforeRetire * 10) /
+ CumulativeExecutions;
OS << Executions;
OS.PadToColumn(13);
@@ -157,18 +158,18 @@ void TimelineView::printWaitTimeEntry(formatted_raw_ostream &OS,
if (!PrintingTotals)
tryChangeColor(OS, Entry.CyclesSpentInSchedulerQueue, CumulativeExecutions,
BufferSize);
- OS << format("%.1f", floor((AverageTime1 * 10) + 0.5) / 10);
+ OS << format("%.1f", floor(AverageTime1 + 0.5) / 10);
OS.PadToColumn(20);
if (!PrintingTotals)
tryChangeColor(OS, Entry.CyclesSpentInSQWhileReady, CumulativeExecutions,
BufferSize);
- OS << format("%.1f", floor((AverageTime2 * 10) + 0.5) / 10);
+ OS << format("%.1f", floor(AverageTime2 + 0.5) / 10);
OS.PadToColumn(27);
if (!PrintingTotals)
tryChangeColor(OS, Entry.CyclesSpentAfterWBAndBeforeRetire,
CumulativeExecutions,
getSubTargetInfo().getSchedModel().MicroOpBufferSize);
- OS << format("%.1f", floor((AverageTime3 * 10) + 0.5) / 10);
+ OS << format("%.1f", floor(AverageTime3 + 0.5) / 10);
if (OS.has_colors())
OS.resetColor();
diff --git a/openmp/runtime/src/kmp_taskdeps.cpp b/openmp/runtime/src/kmp_taskdeps.cpp
index 162fb38e1eed..dd3e7688d33f 100644
--- a/openmp/runtime/src/kmp_taskdeps.cpp
+++ b/openmp/runtime/src/kmp_taskdeps.cpp
@@ -344,6 +344,13 @@ __kmp_process_deps(kmp_int32 gtid, kmp_depnode_t *node, kmp_dephash_t **hash,
// link node as successor of all nodes in the prev_set if any
npredecessors +=
__kmp_depnode_link_successor(gtid, thread, task, node, prev_set);
+ if (dep_barrier) {
+ // clean last_out and prev_set if any; don't touch last_set
+ __kmp_node_deref(thread, last_out);
+ info->last_out = NULL;
+ __kmp_depnode_list_free(thread, prev_set);
+ info->prev_set = NULL;
+ }
} else { // last_set is of different dep kind, make it prev_set
// link node as successor of all nodes in the last_set
npredecessors +=
@@ -353,13 +360,21 @@ __kmp_process_deps(kmp_int32 gtid, kmp_depnode_t *node, kmp_dephash_t **hash,
info->last_out = NULL;
// clean prev_set if any
__kmp_depnode_list_free(thread, prev_set);
- // move last_set to prev_set, new last_set will be allocated
- info->prev_set = last_set;
+ if (!dep_barrier) {
+ // move last_set to prev_set, new last_set will be allocated
+ info->prev_set = last_set;
+ } else {
+ info->prev_set = NULL;
+ info->last_flag = 0;
+ }
info->last_set = NULL;
}
- info->last_flag = dep->flag; // store dep kind of the last_set
- info->last_set = __kmp_add_node(thread, info->last_set, node);
-
+ // for dep_barrier last_flag value should remain:
+ // 0 if last_set is empty, unchanged otherwise
+ if (!dep_barrier) {
+ info->last_flag = dep->flag; // store dep kind of the last_set
+ info->last_set = __kmp_add_node(thread, info->last_set, node);
+ }
// check if we are processing MTX dependency
if (dep->flag == KMP_DEP_MTX) {
if (info->mtx_lock == NULL) {
@@ -756,8 +771,6 @@ void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32 gtid, kmp_int32 ndeps,
kmp_depnode_t node = {0};
__kmp_init_node(&node);
- // the stack owns the node
- __kmp_node_ref(&node);
if (!__kmp_check_deps(gtid, &node, NULL, &current_task->td_dephash,
DEP_BARRIER, ndeps, dep_list, ndeps_noalias,
diff --git a/openmp/runtime/src/kmp_taskdeps.h b/openmp/runtime/src/kmp_taskdeps.h
index d1576dd5b791..73abf07018f3 100644
--- a/openmp/runtime/src/kmp_taskdeps.h
+++ b/openmp/runtime/src/kmp_taskdeps.h
@@ -23,8 +23,7 @@ static inline void __kmp_node_deref(kmp_info_t *thread, kmp_depnode_t *node) {
return;
kmp_int32 n = KMP_ATOMIC_DEC(&node->dn.nrefs) - 1;
- // TODO: temporarily disable assertion until the bug with dependences is fixed
- // KMP_DEBUG_ASSERT(n >= 0);
+ KMP_DEBUG_ASSERT(n >= 0);
if (n == 0) {
KMP_ASSERT(node->dn.nrefs == 0);
#if USE_FAST_MEMORY
diff --git a/openmp/runtime/src/kmp_tasking.cpp b/openmp/runtime/src/kmp_tasking.cpp
index 6c3e2c95cb5a..55e9c307638a 100644
--- a/openmp/runtime/src/kmp_tasking.cpp
+++ b/openmp/runtime/src/kmp_tasking.cpp
@@ -1441,6 +1441,7 @@ kmp_task_t *__kmpc_omp_target_task_alloc(ident_t *loc_ref, kmp_int32 gtid,
if (__kmp_enable_hidden_helper) {
auto &input_flags = reinterpret_cast<kmp_tasking_flags_t &>(flags);
input_flags.hidden_helper = TRUE;
+ input_flags.tiedness = TASK_UNTIED;
}
return __kmpc_omp_task_alloc(loc_ref, gtid, flags, sizeof_kmp_task_t,