diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2024-01-03 16:57:07 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2024-01-03 16:57:07 +0000 |
commit | 77dbea07356e1ab2f37a777d4d1ddc5dd3e301c2 (patch) | |
tree | bdb0bc8db7a91e1f8b4bb8729fc391e2adf45380 | |
parent | 99aabd70801bd4bc72c4942747f6d62c675112f5 (diff) |
Vendor import of llvm-project main llvmorg-18-init-16003-gfc5f51cf5af4.vendor/llvm-project/llvmorg-18-init-16003-gfc5f51cf5af4
324 files changed, 5050 insertions, 3695 deletions
diff --git a/clang/include/clang/AST/Type.h b/clang/include/clang/AST/Type.h index b3ae66e6e769..1afa69367286 100644 --- a/clang/include/clang/AST/Type.h +++ b/clang/include/clang/AST/Type.h @@ -2383,10 +2383,6 @@ public: /// Check if the type is the CUDA device builtin texture type. bool isCUDADeviceBuiltinTextureType() const; - bool isRVVType(unsigned ElementCount) const; - - bool isRVVType(unsigned Bitwidth, bool IsFloat, bool IsBFloat = false) const; - /// Return the implicit lifetime for this type, which must not be dependent. Qualifiers::ObjCLifetime getObjCARCImplicitLifetime() const; @@ -7283,28 +7279,6 @@ inline bool Type::isOpenCLSpecificType() const { isQueueT() || isReserveIDT() || isPipeType() || isOCLExtOpaqueType(); } -inline bool Type::isRVVType(unsigned ElementCount) const { - bool Ret = false; -#define RVV_VECTOR_TYPE(Name, Id, SingletonId, NumEls, ElBits, NF, IsSigned, \ - IsFP, IsBF) \ - if (NumEls == ElementCount) \ - Ret |= isSpecificBuiltinType(BuiltinType::Id); -#include "clang/Basic/RISCVVTypes.def" - return Ret; -} - -inline bool Type::isRVVType(unsigned Bitwidth, bool IsFloat, - bool IsBFloat) const { - bool Ret = false; -#define RVV_TYPE(Name, Id, SingletonId) -#define RVV_VECTOR_TYPE(Name, Id, SingletonId, NumEls, ElBits, NF, IsSigned, \ - IsFP, IsBF) \ - if (ElBits == Bitwidth && IsFloat == IsFP && IsBFloat == IsBF) \ - Ret |= isSpecificBuiltinType(BuiltinType::Id); -#include "clang/Basic/RISCVVTypes.def" - return Ret; -} - inline bool Type::isTemplateTypeParmType() const { return isa<TemplateTypeParmType>(CanonicalType); } diff --git a/clang/include/clang/Basic/BuiltinsAArch64.def b/clang/include/clang/Basic/BuiltinsAArch64.def index 82a1ba3c82ad..31ec84143f65 100644 --- a/clang/include/clang/Basic/BuiltinsAArch64.def +++ b/clang/include/clang/Basic/BuiltinsAArch64.def @@ -68,6 +68,9 @@ TARGET_BUILTIN(__builtin_arm_ldg, "v*v*", "t", "mte") TARGET_BUILTIN(__builtin_arm_stg, "vv*", "t", "mte") TARGET_BUILTIN(__builtin_arm_subp, "Uiv*v*", "t", "mte") +// SME state function +BUILTIN(__builtin_arm_get_sme_state, "vULi*ULi*", "n") + // Memory Operations TARGET_BUILTIN(__builtin_arm_mops_memset_tag, "v*v*iz", "", "mte,mops") diff --git a/clang/include/clang/Basic/riscv_sifive_vector.td b/clang/include/clang/Basic/riscv_sifive_vector.td index bb54e2664186..0d471f6c554c 100644 --- a/clang/include/clang/Basic/riscv_sifive_vector.td +++ b/clang/include/clang/Basic/riscv_sifive_vector.td @@ -109,10 +109,10 @@ multiclass RVVVFWMACCBuiltinSet<list<list<string>> suffixes_prototypes> { Name = NAME, HasMasked = false, Log2LMUL = [-2, -1, 0, 1, 2] in - defm NAME : RVVOutOp1Op2BuiltinSet<NAME, "b", suffixes_prototypes>; + defm NAME : RVVOutOp1Op2BuiltinSet<NAME, "y", suffixes_prototypes>; } -multiclass RVVVQMACCBuiltinSet<list<list<string>> suffixes_prototypes> { +multiclass RVVVQMACCDODBuiltinSet<list<list<string>> suffixes_prototypes> { let OverloadedName = NAME, Name = NAME, HasMasked = false, @@ -120,6 +120,14 @@ multiclass RVVVQMACCBuiltinSet<list<list<string>> suffixes_prototypes> { defm NAME : RVVOutOp1Op2BuiltinSet<NAME, "i", suffixes_prototypes>; } +multiclass RVVVQMACCQOQBuiltinSet<list<list<string>> suffixes_prototypes> { + let OverloadedName = NAME, + Name = NAME, + HasMasked = false, + Log2LMUL = [-1, 0, 1, 2] in + defm NAME : RVVOutOp1Op2BuiltinSet<NAME, "s", suffixes_prototypes>; +} + multiclass RVVVFNRCLIPBuiltinSet<string suffix, string prototype, string type_range> { let Log2LMUL = [-3, -2, -1, 0, 1, 2], Name = NAME, @@ -130,18 +138,18 @@ multiclass RVVVFNRCLIPBuiltinSet<string suffix, string prototype, string type_ra let UnMaskedPolicyScheme = HasPolicyOperand in let RequiredFeatures = ["Xsfvqmaccdod"] in { - defm sf_vqmaccu_2x8x2 : RVVVQMACCBuiltinSet<[["", "v", "vv(FixedSEW:8)SUv(FixedSEW:8)Uv"]]>; - defm sf_vqmacc_2x8x2 : RVVVQMACCBuiltinSet<[["", "v", "vv(FixedSEW:8)Sv(FixedSEW:8)v"]]>; - defm sf_vqmaccus_2x8x2 : RVVVQMACCBuiltinSet<[["", "v", "vv(FixedSEW:8)SUv(FixedSEW:8)v"]]>; - defm sf_vqmaccsu_2x8x2 : RVVVQMACCBuiltinSet<[["", "v", "vv(FixedSEW:8)Sv(FixedSEW:8)Uv"]]>; + defm sf_vqmaccu_2x8x2 : RVVVQMACCDODBuiltinSet<[["", "v", "vv(FixedSEW:8)SUv(FixedSEW:8)Uv"]]>; + defm sf_vqmacc_2x8x2 : RVVVQMACCDODBuiltinSet<[["", "v", "vv(FixedSEW:8)Sv(FixedSEW:8)v"]]>; + defm sf_vqmaccus_2x8x2 : RVVVQMACCDODBuiltinSet<[["", "v", "vv(FixedSEW:8)SUv(FixedSEW:8)v"]]>; + defm sf_vqmaccsu_2x8x2 : RVVVQMACCDODBuiltinSet<[["", "v", "vv(FixedSEW:8)Sv(FixedSEW:8)Uv"]]>; } let UnMaskedPolicyScheme = HasPolicyOperand in let RequiredFeatures = ["Xsfvqmaccqoq"] in { - defm sf_vqmaccu_4x8x4 : RVVVQMACCBuiltinSet<[["", "v", "vv(FixedSEW:8)SUv(FixedSEW:8)Uv"]]>; - defm sf_vqmacc_4x8x4 : RVVVQMACCBuiltinSet<[["", "v", "vv(FixedSEW:8)Sv(FixedSEW:8)v"]]>; - defm sf_vqmaccus_4x8x4 : RVVVQMACCBuiltinSet<[["", "v", "vv(FixedSEW:8)SUv(FixedSEW:8)v"]]>; - defm sf_vqmaccsu_4x8x4 : RVVVQMACCBuiltinSet<[["", "v", "vv(FixedSEW:8)Sv(FixedSEW:8)Uv"]]>; + defm sf_vqmaccu_4x8x4 : RVVVQMACCQOQBuiltinSet<[["", "w", "ww(FixedSEW:8)SUv(FixedSEW:8)Uv"]]>; + defm sf_vqmacc_4x8x4 : RVVVQMACCQOQBuiltinSet<[["", "w", "ww(FixedSEW:8)Sv(FixedSEW:8)v"]]>; + defm sf_vqmaccus_4x8x4 : RVVVQMACCQOQBuiltinSet<[["", "w", "ww(FixedSEW:8)SUv(FixedSEW:8)v"]]>; + defm sf_vqmaccsu_4x8x4 : RVVVQMACCQOQBuiltinSet<[["", "w", "ww(FixedSEW:8)Sv(FixedSEW:8)Uv"]]>; } let UnMaskedPolicyScheme = HasPolicyOperand in diff --git a/clang/include/clang/Basic/riscv_vector.td b/clang/include/clang/Basic/riscv_vector.td index f2dde7f540fb..e7d78b03511f 100644 --- a/clang/include/clang/Basic/riscv_vector.td +++ b/clang/include/clang/Basic/riscv_vector.td @@ -2441,11 +2441,9 @@ let HasMasked = false, HasVL = false, IRName = "" in { return Builder.CreateInsertVector(ResultType, Ops[0], Ops[2], Ops[1]); } }] in { - let Log2LMUL = [0, 1, 2] in { - foreach dst_lmul = ["(LFixedLog2LMUL:1)", "(LFixedLog2LMUL:2)", "(LFixedLog2LMUL:3)"] in { - def : RVVBuiltin<"v" # dst_lmul # "v", dst_lmul # "v" # dst_lmul # "vKzv", "csilxfd">; - def : RVVBuiltin<"Uv" # dst_lmul # "Uv", dst_lmul # "Uv" # dst_lmul #"UvKzUv", "csil">; - } + foreach dst_lmul = ["(LFixedLog2LMUL:1)", "(LFixedLog2LMUL:2)", "(LFixedLog2LMUL:3)"] in { + def : RVVBuiltin<"v" # dst_lmul # "v", dst_lmul # "v" # dst_lmul # "vKzv", "csilxfd">; + def : RVVBuiltin<"Uv" # dst_lmul # "Uv", dst_lmul # "Uv" # dst_lmul #"UvKzUv", "csil">; } foreach nf = NFList in { defvar T = "(Tuple:" # nf # ")"; diff --git a/clang/include/clang/Basic/riscv_vector_common.td b/clang/include/clang/Basic/riscv_vector_common.td index 4036ce8e6903..040db6f0cdbf 100644 --- a/clang/include/clang/Basic/riscv_vector_common.td +++ b/clang/include/clang/Basic/riscv_vector_common.td @@ -41,7 +41,7 @@ // x: float16_t (half) // f: float32_t (float) // d: float64_t (double) -// b: bfloat16_t (bfloat16) +// y: bfloat16_t (bfloat16) // // This way, given an LMUL, a record with a TypeRange "sil" will cause the // definition of 3 builtins. Each type "t" in the TypeRange (in this example diff --git a/clang/include/clang/StaticAnalyzer/Core/BugReporter/CommonBugCategories.h b/clang/include/clang/StaticAnalyzer/Core/BugReporter/CommonBugCategories.h index 5d2c96e5bc9d..45187433c069 100644 --- a/clang/include/clang/StaticAnalyzer/Core/BugReporter/CommonBugCategories.h +++ b/clang/include/clang/StaticAnalyzer/Core/BugReporter/CommonBugCategories.h @@ -13,6 +13,7 @@ namespace clang { namespace ento { namespace categories { +extern const char *const AppleAPIMisuse; extern const char *const CoreFoundationObjectiveC; extern const char *const LogicError; extern const char *const MemoryRefCount; diff --git a/clang/include/clang/StaticAnalyzer/Core/Checker.h b/clang/include/clang/StaticAnalyzer/Core/Checker.h index 8a46282a595e..2ec54a837c42 100644 --- a/clang/include/clang/StaticAnalyzer/Core/Checker.h +++ b/clang/include/clang/StaticAnalyzer/Core/Checker.h @@ -193,9 +193,8 @@ public: class Location { template <typename CHECKER> - static void _checkLocation(void *checker, - const SVal &location, bool isLoad, const Stmt *S, - CheckerContext &C) { + static void _checkLocation(void *checker, SVal location, bool isLoad, + const Stmt *S, CheckerContext &C) { ((const CHECKER *)checker)->checkLocation(location, isLoad, S, C); } @@ -209,8 +208,7 @@ public: class Bind { template <typename CHECKER> - static void _checkBind(void *checker, - const SVal &location, const SVal &val, const Stmt *S, + static void _checkBind(void *checker, SVal location, SVal val, const Stmt *S, CheckerContext &C) { ((const CHECKER *)checker)->checkBind(location, val, S, C); } @@ -456,10 +454,8 @@ namespace eval { class Assume { template <typename CHECKER> - static ProgramStateRef _evalAssume(void *checker, - ProgramStateRef state, - const SVal &cond, - bool assumption) { + static ProgramStateRef _evalAssume(void *checker, ProgramStateRef state, + SVal cond, bool assumption) { return ((const CHECKER *)checker)->evalAssume(state, cond, assumption); } diff --git a/clang/include/clang/StaticAnalyzer/Core/CheckerManager.h b/clang/include/clang/StaticAnalyzer/Core/CheckerManager.h index 39583c443eda..a45ba1bc573e 100644 --- a/clang/include/clang/StaticAnalyzer/Core/CheckerManager.h +++ b/clang/include/clang/StaticAnalyzer/Core/CheckerManager.h @@ -488,13 +488,11 @@ public: using CheckCallFunc = CheckerFn<void (const CallEvent &, CheckerContext &)>; - using CheckLocationFunc = - CheckerFn<void (const SVal &location, bool isLoad, const Stmt *S, - CheckerContext &)>; + using CheckLocationFunc = CheckerFn<void(SVal location, bool isLoad, + const Stmt *S, CheckerContext &)>; using CheckBindFunc = - CheckerFn<void (const SVal &location, const SVal &val, const Stmt *S, - CheckerContext &)>; + CheckerFn<void(SVal location, SVal val, const Stmt *S, CheckerContext &)>; using CheckEndAnalysisFunc = CheckerFn<void (ExplodedGraph &, BugReporter &, ExprEngine &)>; @@ -530,8 +528,7 @@ public: RegionAndSymbolInvalidationTraits *ITraits)>; using EvalAssumeFunc = - CheckerFn<ProgramStateRef (ProgramStateRef, const SVal &cond, - bool assumption)>; + CheckerFn<ProgramStateRef(ProgramStateRef, SVal cond, bool assumption)>; using EvalCallFunc = CheckerFn<bool (const CallEvent &, CheckerContext &)>; diff --git a/clang/lib/AST/Interp/Interp.h b/clang/lib/AST/Interp/Interp.h index a240d74d6342..828d4ea35526 100644 --- a/clang/lib/AST/Interp/Interp.h +++ b/clang/lib/AST/Interp/Interp.h @@ -37,7 +37,6 @@ namespace clang { namespace interp { -using APInt = llvm::APInt; using APSInt = llvm::APSInt; /// Convert a value to an APValue. diff --git a/clang/lib/ASTMatchers/Dynamic/Parser.cpp b/clang/lib/ASTMatchers/Dynamic/Parser.cpp index 27096a83b8dd..6a16c2184fcf 100644 --- a/clang/lib/ASTMatchers/Dynamic/Parser.cpp +++ b/clang/lib/ASTMatchers/Dynamic/Parser.cpp @@ -299,10 +299,8 @@ private: /// Consume all leading whitespace from \c Code. void consumeWhitespace() { - Code = Code.drop_while([](char c) { - // Don't trim newlines. - return StringRef(" \t\v\f\r").contains(c); - }); + // Don't trim newlines. + Code = Code.ltrim(" \t\v\f\r"); } SourceLocation currentLocation() { diff --git a/clang/lib/Basic/IdentifierTable.cpp b/clang/lib/Basic/IdentifierTable.cpp index 5902c6dc3ce0..d0d8316385b4 100644 --- a/clang/lib/Basic/IdentifierTable.cpp +++ b/clang/lib/Basic/IdentifierTable.cpp @@ -628,8 +628,7 @@ ObjCMethodFamily Selector::getMethodFamilyImpl(Selector sel) { return OMF_performSelector; // The other method families may begin with a prefix of underscores. - while (!name.empty() && name.front() == '_') - name = name.substr(1); + name = name.ltrim('_'); if (name.empty()) return OMF_None; switch (name.front()) { diff --git a/clang/lib/Basic/Targets/AArch64.cpp b/clang/lib/Basic/Targets/AArch64.cpp index 3ee39133fcee..2f8395cb8932 100644 --- a/clang/lib/Basic/Targets/AArch64.cpp +++ b/clang/lib/Basic/Targets/AArch64.cpp @@ -1365,8 +1365,7 @@ bool AArch64TargetInfo::validateConstraintModifier( StringRef Constraint, char Modifier, unsigned Size, std::string &SuggestedModifier) const { // Strip off constraint modifiers. - while (Constraint[0] == '=' || Constraint[0] == '+' || Constraint[0] == '&') - Constraint = Constraint.substr(1); + Constraint = Constraint.ltrim("=+&"); switch (Constraint[0]) { default: diff --git a/clang/lib/Basic/Targets/ARM.cpp b/clang/lib/Basic/Targets/ARM.cpp index 6e1842fc64e5..01f9e844da12 100644 --- a/clang/lib/Basic/Targets/ARM.cpp +++ b/clang/lib/Basic/Targets/ARM.cpp @@ -1230,8 +1230,7 @@ bool ARMTargetInfo::validateConstraintModifier( bool isInOut = (Constraint[0] == '+'); // Strip off constraint modifiers. - while (Constraint[0] == '=' || Constraint[0] == '+' || Constraint[0] == '&') - Constraint = Constraint.substr(1); + Constraint = Constraint.ltrim("=+&"); switch (Constraint[0]) { default: diff --git a/clang/lib/Basic/Targets/RISCV.cpp b/clang/lib/Basic/Targets/RISCV.cpp index 685462961ee3..6bc57a83a2d5 100644 --- a/clang/lib/Basic/Targets/RISCV.cpp +++ b/clang/lib/Basic/Targets/RISCV.cpp @@ -416,8 +416,7 @@ static void handleFullArchString(StringRef FullArchStr, Features.push_back("__RISCV_TargetAttrNeedOverride"); auto RII = llvm::RISCVISAInfo::parseArchString( FullArchStr, /* EnableExperimentalExtension */ true); - if (!RII) { - consumeError(RII.takeError()); + if (llvm::errorToBool(RII.takeError())) { // Forward the invalid FullArchStr. Features.push_back("+" + FullArchStr.str()); } else { diff --git a/clang/lib/Basic/Targets/X86.cpp b/clang/lib/Basic/Targets/X86.cpp index b97f88647fa4..3deaa19f8d4f 100644 --- a/clang/lib/Basic/Targets/X86.cpp +++ b/clang/lib/Basic/Targets/X86.cpp @@ -1613,8 +1613,7 @@ bool X86TargetInfo::validateOutputSize(const llvm::StringMap<bool> &FeatureMap, StringRef Constraint, unsigned Size) const { // Strip off constraint modifiers. - while (Constraint[0] == '=' || Constraint[0] == '+' || Constraint[0] == '&') - Constraint = Constraint.substr(1); + Constraint = Constraint.ltrim("=+&"); return validateOperandSize(FeatureMap, Constraint, Size); } diff --git a/clang/lib/Basic/Warnings.cpp b/clang/lib/Basic/Warnings.cpp index bab1af4f03b6..92954cab6fb0 100644 --- a/clang/lib/Basic/Warnings.cpp +++ b/clang/lib/Basic/Warnings.cpp @@ -96,11 +96,7 @@ void clang::ProcessWarningOptions(DiagnosticsEngine &Diags, // Check to see if this warning starts with "no-", if so, this is a // negative form of the option. - bool isPositive = true; - if (Opt.starts_with("no-")) { - isPositive = false; - Opt = Opt.substr(3); - } + bool isPositive = !Opt.consume_front("no-"); // Figure out how this option affects the warning. If -Wfoo, map the // diagnostic to a warning, if -Wno-foo, map it to ignore. diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 5081062da286..f71dbf1729a1 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -10430,6 +10430,26 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, return Builder.CreateCall(F, llvm::ConstantInt::get(Int32Ty, HintID)); } + if (BuiltinID == clang::AArch64::BI__builtin_arm_get_sme_state) { + // Create call to __arm_sme_state and store the results to the two pointers. + CallInst *CI = EmitRuntimeCall(CGM.CreateRuntimeFunction( + llvm::FunctionType::get(StructType::get(CGM.Int64Ty, CGM.Int64Ty), {}, + false), + "__arm_sme_state")); + auto Attrs = + AttributeList() + .addFnAttribute(getLLVMContext(), "aarch64_pstate_sm_compatible") + .addFnAttribute(getLLVMContext(), "aarch64_pstate_za_preserved"); + CI->setAttributes(Attrs); + CI->setCallingConv( + llvm::CallingConv:: + AArch64_SME_ABI_Support_Routines_PreserveMost_From_X2); + Builder.CreateStore(Builder.CreateExtractValue(CI, 0), + EmitPointerWithAlignment(E->getArg(0))); + return Builder.CreateStore(Builder.CreateExtractValue(CI, 1), + EmitPointerWithAlignment(E->getArg(1))); + } + if (BuiltinID == clang::AArch64::BI__builtin_arm_rbit) { assert((getContext().getTypeSize(E->getType()) == 32) && "rbit of unusual size!"); diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp index ed426098ac69..e362c9da51fe 100644 --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -6406,13 +6406,11 @@ static void emitOMPAtomicCaptureExpr(CodeGenFunction &CGF, } } -static void emitOMPAtomicCompareExpr(CodeGenFunction &CGF, - llvm::AtomicOrdering AO, const Expr *X, - const Expr *V, const Expr *R, - const Expr *E, const Expr *D, - const Expr *CE, bool IsXBinopExpr, - bool IsPostfixUpdate, bool IsFailOnly, - SourceLocation Loc) { +static void emitOMPAtomicCompareExpr( + CodeGenFunction &CGF, llvm::AtomicOrdering AO, llvm::AtomicOrdering FailAO, + const Expr *X, const Expr *V, const Expr *R, const Expr *E, const Expr *D, + const Expr *CE, bool IsXBinopExpr, bool IsPostfixUpdate, bool IsFailOnly, + SourceLocation Loc) { llvm::OpenMPIRBuilder &OMPBuilder = CGF.CGM.getOpenMPRuntime().getOMPBuilder(); @@ -6477,13 +6475,21 @@ static void emitOMPAtomicCompareExpr(CodeGenFunction &CGF, R->getType().isVolatileQualified()}; } - CGF.Builder.restoreIP(OMPBuilder.createAtomicCompare( - CGF.Builder, XOpVal, VOpVal, ROpVal, EVal, DVal, AO, Op, IsXBinopExpr, - IsPostfixUpdate, IsFailOnly)); + if (FailAO == llvm::AtomicOrdering::NotAtomic) { + // fail clause was not mentionend on the + // "#pragma omp atomic compare" construct. + CGF.Builder.restoreIP(OMPBuilder.createAtomicCompare( + CGF.Builder, XOpVal, VOpVal, ROpVal, EVal, DVal, AO, Op, IsXBinopExpr, + IsPostfixUpdate, IsFailOnly)); + } else + CGF.Builder.restoreIP(OMPBuilder.createAtomicCompare( + CGF.Builder, XOpVal, VOpVal, ROpVal, EVal, DVal, AO, Op, IsXBinopExpr, + IsPostfixUpdate, IsFailOnly, FailAO)); } static void emitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind, - llvm::AtomicOrdering AO, bool IsPostfixUpdate, + llvm::AtomicOrdering AO, + llvm::AtomicOrdering FailAO, bool IsPostfixUpdate, const Expr *X, const Expr *V, const Expr *R, const Expr *E, const Expr *UE, const Expr *D, const Expr *CE, bool IsXLHSInRHSPart, @@ -6504,12 +6510,8 @@ static void emitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind, IsXLHSInRHSPart, Loc); break; case OMPC_compare: { - emitOMPAtomicCompareExpr(CGF, AO, X, V, R, E, D, CE, IsXLHSInRHSPart, - IsPostfixUpdate, IsFailOnly, Loc); - break; - } - case OMPC_fail: { - //TODO + emitOMPAtomicCompareExpr(CGF, AO, FailAO, X, V, R, E, D, CE, + IsXLHSInRHSPart, IsPostfixUpdate, IsFailOnly, Loc); break; } default: @@ -6519,6 +6521,8 @@ static void emitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind, void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) { llvm::AtomicOrdering AO = llvm::AtomicOrdering::Monotonic; + // Fail Memory Clause Ordering. + llvm::AtomicOrdering FailAO = llvm::AtomicOrdering::NotAtomic; bool MemOrderingSpecified = false; if (S.getSingleClause<OMPSeqCstClause>()) { AO = llvm::AtomicOrdering::SequentiallyConsistent; @@ -6572,12 +6576,27 @@ void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) { } } + if (KindsEncountered.contains(OMPC_compare) && + KindsEncountered.contains(OMPC_fail)) { + Kind = OMPC_compare; + const auto *FailClause = S.getSingleClause<OMPFailClause>(); + if (FailClause) { + OpenMPClauseKind FailParameter = FailClause->getFailParameter(); + if (FailParameter == llvm::omp::OMPC_relaxed) + FailAO = llvm::AtomicOrdering::Monotonic; + else if (FailParameter == llvm::omp::OMPC_acquire) + FailAO = llvm::AtomicOrdering::Acquire; + else if (FailParameter == llvm::omp::OMPC_seq_cst) + FailAO = llvm::AtomicOrdering::SequentiallyConsistent; + } + } + LexicalScope Scope(*this, S.getSourceRange()); EmitStopPoint(S.getAssociatedStmt()); - emitOMPAtomicExpr(*this, Kind, AO, S.isPostfixUpdate(), S.getX(), S.getV(), - S.getR(), S.getExpr(), S.getUpdateExpr(), S.getD(), - S.getCondExpr(), S.isXLHSInRHSPart(), S.isFailOnly(), - S.getBeginLoc()); + emitOMPAtomicExpr(*this, Kind, AO, FailAO, S.isPostfixUpdate(), S.getX(), + S.getV(), S.getR(), S.getExpr(), S.getUpdateExpr(), + S.getD(), S.getCondExpr(), S.isXLHSInRHSPart(), + S.isFailOnly(), S.getBeginLoc()); } static void emitCommonOMPTargetDirective(CodeGenFunction &CGF, diff --git a/clang/lib/CodeGen/CodeGenAction.cpp b/clang/lib/CodeGen/CodeGenAction.cpp index 753a8fd74fa6..f8038497d90a 100644 --- a/clang/lib/CodeGen/CodeGenAction.cpp +++ b/clang/lib/CodeGen/CodeGenAction.cpp @@ -1139,8 +1139,7 @@ CodeGenAction::loadModule(MemoryBufferRef MBRef) { // Strip off a leading diagnostic code if there is one. StringRef Msg = Err.getMessage(); - if (Msg.starts_with("error: ")) - Msg = Msg.substr(7); + Msg.consume_front("error: "); unsigned DiagID = CI.getDiagnostics().getCustomDiagID(DiagnosticsEngine::Error, "%0"); diff --git a/clang/lib/CodeGen/Targets/LoongArch.cpp b/clang/lib/CodeGen/Targets/LoongArch.cpp index 7b2c31139b0b..63b9a1fdb988 100644 --- a/clang/lib/CodeGen/Targets/LoongArch.cpp +++ b/clang/lib/CodeGen/Targets/LoongArch.cpp @@ -324,13 +324,6 @@ ABIArgInfo LoongArchABIInfo::classifyArgumentType(QualType Ty, bool IsFixed, return ABIArgInfo::getDirect(); } - // Pass 128-bit/256-bit vector values via vector registers directly. - if (Ty->isVectorType() && (((getContext().getTypeSize(Ty) == 128) && - (getTarget().hasFeature("lsx"))) || - ((getContext().getTypeSize(Ty) == 256) && - getTarget().hasFeature("lasx")))) - return ABIArgInfo::getDirect(); - // Complex types for the *f or *d ABI must be passed directly rather than // using CoerceAndExpand. if (IsFixed && Ty->isComplexType() && FRLen && FARsLeft >= 2) { diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index ff95c899c5f3..9b2f2a374809 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -86,6 +86,7 @@ #include "llvm/Support/PrettyStackTrace.h" #include "llvm/Support/Process.h" #include "llvm/Support/Program.h" +#include "llvm/Support/RISCVISAInfo.h" #include "llvm/Support/StringSaver.h" #include "llvm/Support/VirtualFileSystem.h" #include "llvm/Support/raw_ostream.h" @@ -670,10 +671,15 @@ static llvm::Triple computeTargetTriple(const Driver &D, if (Args.hasArg(options::OPT_march_EQ) || Args.hasArg(options::OPT_mcpu_EQ)) { StringRef ArchName = tools::riscv::getRISCVArch(Args, Target); - if (ArchName.starts_with_insensitive("rv32")) - Target.setArch(llvm::Triple::riscv32); - else if (ArchName.starts_with_insensitive("rv64")) - Target.setArch(llvm::Triple::riscv64); + auto ISAInfo = llvm::RISCVISAInfo::parseArchString( + ArchName, /*EnableExperimentalExtensions=*/true); + if (!llvm::errorToBool(ISAInfo.takeError())) { + unsigned XLen = (*ISAInfo)->getXLen(); + if (XLen == 32) + Target.setArch(llvm::Triple::riscv32); + else if (XLen == 64) + Target.setArch(llvm::Triple::riscv64); + } } } diff --git a/clang/lib/Driver/ToolChains/Arch/RISCV.cpp b/clang/lib/Driver/ToolChains/Arch/RISCV.cpp index 25b43cefce6b..0717e3b813e1 100644 --- a/clang/lib/Driver/ToolChains/Arch/RISCV.cpp +++ b/clang/lib/Driver/ToolChains/Arch/RISCV.cpp @@ -167,13 +167,6 @@ void riscv::getRISCVTargetFeatures(const Driver &D, const llvm::Triple &Triple, Features.push_back("-relax"); } - // GCC Compatibility: -mno-save-restore is default, unless -msave-restore is - // specified. - if (Args.hasFlag(options::OPT_msave_restore, options::OPT_mno_save_restore, false)) - Features.push_back("+save-restore"); - else - Features.push_back("-save-restore"); - // -mno-unaligned-access is default, unless -munaligned-access is specified. AddTargetFeature(Args, Features, options::OPT_munaligned_access, options::OPT_mno_unaligned_access, "fast-unaligned-access"); @@ -222,10 +215,8 @@ StringRef riscv::getRISCVABI(const ArgList &Args, const llvm::Triple &Triple) { auto ParseResult = llvm::RISCVISAInfo::parseArchString( Arch, /* EnableExperimentalExtension */ true); - if (!ParseResult) - // Ignore parsing error, just go 3rd step. - consumeError(ParseResult.takeError()); - else + // Ignore parsing error, just go 3rd step. + if (!llvm::errorToBool(ParseResult.takeError())) return (*ParseResult)->computeDefaultABI(); // 3. Choose a default based on the triple diff --git a/clang/lib/Driver/ToolChains/Arch/X86.cpp b/clang/lib/Driver/ToolChains/Arch/X86.cpp index fef0522aaf45..53e26a9f8e22 100644 --- a/clang/lib/Driver/ToolChains/Arch/X86.cpp +++ b/clang/lib/Driver/ToolChains/Arch/X86.cpp @@ -237,9 +237,7 @@ void x86::getX86TargetFeatures(const Driver &D, const llvm::Triple &Triple, assert(Name.starts_with("m") && "Invalid feature name."); Name = Name.substr(1); - bool IsNegative = Name.starts_with("no-"); - if (IsNegative) - Name = Name.substr(3); + bool IsNegative = Name.consume_front("no-"); #ifndef NDEBUG assert(Name.starts_with("avx10.") && "Invalid AVX10 feature name."); diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 70dc7e54aca1..acfa11980506 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -2072,12 +2072,9 @@ void Clang::AddRISCVTargetArgs(const ArgList &Args, StringRef Arch = riscv::getRISCVArch(Args, Triple); auto ISAInfo = llvm::RISCVISAInfo::parseArchString( Arch, /*EnableExperimentalExtensions*/ true); - if (!ISAInfo) { - // Ignore parsing error. - consumeError(ISAInfo.takeError()); - } else { + // Ignore parsing error. + if (!errorToBool(ISAInfo.takeError())) MinVLen = (*ISAInfo)->getMinVLen(); - } // If the value is "zvl", use MinVLen from march. Otherwise, try to parse // as integer as long as we have a MinVLen. diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp index 6eb0ed8f3fed..2340191ca97d 100644 --- a/clang/lib/Driver/ToolChains/CommonArgs.cpp +++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp @@ -1133,6 +1133,16 @@ static bool isWholeArchivePresent(const ArgList &Args) { return WholeArchiveActive; } +/// Determine if driver is invoked to create a shared object library (-static) +static bool isSharedLinkage(const ArgList &Args) { + return Args.hasArg(options::OPT_shared); +} + +/// Determine if driver is invoked to create a static object library (-shared) +static bool isStaticLinkage(const ArgList &Args) { + return Args.hasArg(options::OPT_static); +} + /// Add Fortran runtime libs for MSVC static void addFortranRuntimeLibsMSVC(const ArgList &Args, llvm::opt::ArgStringList &CmdArgs) { @@ -1164,6 +1174,16 @@ static void addFortranRuntimeLibsMSVC(const ArgList &Args, // Add FortranMain runtime lib static void addFortranMain(const ToolChain &TC, const ArgList &Args, llvm::opt::ArgStringList &CmdArgs) { + // 0. Shared-library linkage + // If we are attempting to link a library, we should not add + // -lFortran_main.a to the link line, as the `main` symbol is not + // required for a library and should also be provided by one of + // the translation units of the code that this shared library + // will be linked against eventually. + if (isSharedLinkage(Args) || isStaticLinkage(Args)) { + return; + } + // 1. MSVC if (TC.getTriple().isKnownWindowsMSVCEnvironment()) { addFortranRuntimeLibsMSVC(Args, CmdArgs); @@ -2368,8 +2388,7 @@ static void GetSDLFromOffloadArchive( FoundAOB = true; } } else { - if (Lib.starts_with("-l")) - Lib = Lib.drop_front(2); + Lib.consume_front("-l"); for (auto LPath : LibraryPaths) { ArchiveOfBundles.clear(); auto LibFile = (Lib.starts_with(":") ? Lib.drop_front() diff --git a/clang/lib/Driver/ToolChains/Gnu.cpp b/clang/lib/Driver/ToolChains/Gnu.cpp index 38361d6889a1..a610a94a39a2 100644 --- a/clang/lib/Driver/ToolChains/Gnu.cpp +++ b/clang/lib/Driver/ToolChains/Gnu.cpp @@ -1741,11 +1741,9 @@ selectRISCVMultilib(const MultilibSet &RISCVMultilibSet, StringRef Arch, llvm::RISCVISAInfo::parseArchString( Arch, /*EnableExperimentalExtension=*/true, /*ExperimentalExtensionVersionCheck=*/false); - if (!ParseResult) { - // Ignore any error here, we assume it will be handled in another place. - consumeError(ParseResult.takeError()); + // Ignore any error here, we assume it will be handled in another place. + if (llvm::errorToBool(ParseResult.takeError())) return false; - } auto &ISAInfo = *ParseResult; @@ -1780,10 +1778,8 @@ selectRISCVMultilib(const MultilibSet &RISCVMultilibSet, StringRef Arch, llvm::RISCVISAInfo::parseArchString( Flag, /*EnableExperimentalExtension=*/true, /*ExperimentalExtensionVersionCheck=*/false); - if (!MLConfigParseResult) { - // Ignore any error here, we assume it will handled in another place. - llvm::consumeError(MLConfigParseResult.takeError()); - + // Ignore any error here, we assume it will handled in another place. + if (llvm::errorToBool(MLConfigParseResult.takeError())) { // We might get a parsing error if rv32e in the list, we could just skip // that and process the rest of multi-lib configs. Skip = true; diff --git a/clang/lib/Driver/ToolChains/Solaris.cpp b/clang/lib/Driver/ToolChains/Solaris.cpp index 9a9792d019d5..200ac46aa534 100644 --- a/clang/lib/Driver/ToolChains/Solaris.cpp +++ b/clang/lib/Driver/ToolChains/Solaris.cpp @@ -295,13 +295,12 @@ static StringRef getSolarisLibSuffix(const llvm::Triple &Triple) { switch (Triple.getArch()) { case llvm::Triple::x86: case llvm::Triple::sparc: + default: break; case llvm::Triple::x86_64: return "/amd64"; case llvm::Triple::sparcv9: return "/sparcv9"; - default: - llvm_unreachable("Unsupported architecture"); } return ""; } diff --git a/clang/lib/Format/ContinuationIndenter.cpp b/clang/lib/Format/ContinuationIndenter.cpp index 8489a30dd34a..102504182c45 100644 --- a/clang/lib/Format/ContinuationIndenter.cpp +++ b/clang/lib/Format/ContinuationIndenter.cpp @@ -398,7 +398,7 @@ bool ContinuationIndenter::mustBreak(const LineState &State) { } if ((startsNextParameter(Current, Style) || Previous.is(tok::semi) || (Previous.is(TT_TemplateCloser) && Current.is(TT_StartOfName) && - Style.isCpp() && + State.Line->First->isNot(TT_AttributeSquare) && Style.isCpp() && // FIXME: This is a temporary workaround for the case where clang-format // sets BreakBeforeParameter to avoid bin packing and this creates a // completely unnecessary line break after a template type that isn't diff --git a/clang/lib/Format/MatchFilePath.cpp b/clang/lib/Format/MatchFilePath.cpp index 412ee4954587..062b334dcdd8 100644 --- a/clang/lib/Format/MatchFilePath.cpp +++ b/clang/lib/Format/MatchFilePath.cpp @@ -19,8 +19,8 @@ using namespace llvm; namespace clang { namespace format { -// Check whether `FilePath` matches `Pattern` based on POSIX (1003.1-2008) -// 2.13.1, 2.13.2, and Rule 1 of 2.13.3. +// Check whether `FilePath` matches `Pattern` based on POSIX 2.13.1, 2.13.2, and +// Rule 1 of 2.13.3. bool matchFilePath(StringRef Pattern, StringRef FilePath) { assert(!Pattern.empty()); assert(!FilePath.empty()); diff --git a/clang/lib/Frontend/DependencyGraph.cpp b/clang/lib/Frontend/DependencyGraph.cpp index e96669f856bb..b471471f3528 100644 --- a/clang/lib/Frontend/DependencyGraph.cpp +++ b/clang/lib/Frontend/DependencyGraph.cpp @@ -110,8 +110,7 @@ void DependencyGraphCallback::OutputGraphFile() { writeNodeReference(OS, AllFiles[I]); OS << " [ shape=\"box\", label=\""; StringRef FileName = AllFiles[I].getName(); - if (FileName.starts_with(SysRoot)) - FileName = FileName.substr(SysRoot.size()); + FileName.consume_front(SysRoot); OS << DOT::EscapeString(std::string(FileName)) << "\"];\n"; } diff --git a/clang/lib/Frontend/LayoutOverrideSource.cpp b/clang/lib/Frontend/LayoutOverrideSource.cpp index f474d4fe8fdc..a1866ec09c9d 100644 --- a/clang/lib/Frontend/LayoutOverrideSource.cpp +++ b/clang/lib/Frontend/LayoutOverrideSource.cpp @@ -147,8 +147,7 @@ LayoutOverrideSource::LayoutOverrideSource(StringRef Filename) { // Skip over this offset, the following comma, and any spaces. LineStr = LineStr.substr(1); - while (!LineStr.empty() && isWhitespace(LineStr[0])) - LineStr = LineStr.substr(1); + LineStr = LineStr.drop_while(isWhitespace); } } @@ -163,8 +162,7 @@ LayoutOverrideSource::LayoutOverrideSource(StringRef Filename) { // Skip over this offset, the following comma, and any spaces. LineStr = LineStr.substr(1); - while (!LineStr.empty() && isWhitespace(LineStr[0])) - LineStr = LineStr.substr(1); + LineStr = LineStr.drop_while(isWhitespace); } continue; } @@ -180,8 +178,7 @@ LayoutOverrideSource::LayoutOverrideSource(StringRef Filename) { // Skip over this offset, the following comma, and any spaces. LineStr = LineStr.substr(1); - while (!LineStr.empty() && isWhitespace(LineStr[0])) - LineStr = LineStr.substr(1); + LineStr = LineStr.drop_while(isWhitespace); } } } diff --git a/clang/lib/Frontend/VerifyDiagnosticConsumer.cpp b/clang/lib/Frontend/VerifyDiagnosticConsumer.cpp index 09c1460d54e1..8a3d2286cd16 100644 --- a/clang/lib/Frontend/VerifyDiagnosticConsumer.cpp +++ b/clang/lib/Frontend/VerifyDiagnosticConsumer.cpp @@ -1144,8 +1144,7 @@ std::unique_ptr<Directive> Directive::create(bool RegexKind, std::string RegexStr; StringRef S = Text; while (!S.empty()) { - if (S.starts_with("{{")) { - S = S.drop_front(2); + if (S.consume_front("{{")) { size_t RegexMatchLength = S.find("}}"); assert(RegexMatchLength != StringRef::npos); // Append the regex, enclosed in parentheses. diff --git a/clang/lib/Headers/usermsrintrin.h b/clang/lib/Headers/usermsrintrin.h index 6d1424ad3b2e..61388376706d 100644 --- a/clang/lib/Headers/usermsrintrin.h +++ b/clang/lib/Headers/usermsrintrin.h @@ -14,12 +14,33 @@ #define __USERMSRINTRIN_H #ifdef __x86_64__ +/// Reads the contents of a 64-bit MSR specified in \a __A into \a dst. +/// +/// This intrinsic corresponds to the <c> URDMSR </c> instruction. +/// \param __A +/// An unsigned long long. +/// +/// \code{.operation} +/// DEST := MSR[__A] +/// \endcode static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__, __target__("usermsr"))) _urdmsr(unsigned long long __A) { return __builtin_ia32_urdmsr(__A); } +/// Writes the contents of \a __B into the 64-bit MSR specified in \a __A. +/// +/// This intrinsic corresponds to the <c> UWRMSR </c> instruction. +/// +/// \param __A +/// An unsigned long long. +/// \param __B +/// An unsigned long long. +/// +/// \code{.operation} +/// MSR[__A] := __B +/// \endcode static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("usermsr"))) _uwrmsr(unsigned long long __A, unsigned long long __B) { diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index 2a69325f0295..da0570b7b0f1 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -1219,8 +1219,8 @@ void Sema::checkFortifiedBuiltinMemoryFunction(FunctionDecl *FD, if (IsChkVariant) { FunctionName = FunctionName.drop_front(std::strlen("__builtin___")); FunctionName = FunctionName.drop_back(std::strlen("_chk")); - } else if (FunctionName.starts_with("__builtin_")) { - FunctionName = FunctionName.drop_front(std::strlen("__builtin_")); + } else { + FunctionName.consume_front("__builtin_"); } return FunctionName; }; @@ -5388,15 +5388,17 @@ bool Sema::CheckRISCVBuiltinFunctionCall(const TargetInfo &TI, QualType Op1Type = TheCall->getArg(0)->getType(); QualType Op2Type = TheCall->getArg(1)->getType(); QualType Op3Type = TheCall->getArg(2)->getType(); - uint64_t ElemSize = Op1Type->isRVVType(32, false) ? 32 : 64; + ASTContext::BuiltinVectorTypeInfo Info = + Context.getBuiltinVectorTypeInfo(Op1Type->castAs<BuiltinType>()); + uint64_t ElemSize = Context.getTypeSize(Info.ElementType); if (ElemSize == 64 && !TI.hasFeature("zvknhb")) return Diag(TheCall->getBeginLoc(), - diag::err_riscv_type_requires_extension) - << Op1Type << "zvknhb"; + diag::err_riscv_builtin_requires_extension) + << /* IsExtension */ true << TheCall->getSourceRange() << "zvknb"; - return CheckInvalidVLENandLMUL(TI, TheCall, *this, Op1Type, ElemSize << 2) || - CheckInvalidVLENandLMUL(TI, TheCall, *this, Op2Type, ElemSize << 2) || - CheckInvalidVLENandLMUL(TI, TheCall, *this, Op3Type, ElemSize << 2); + return CheckInvalidVLENandLMUL(TI, TheCall, *this, Op1Type, ElemSize * 4) || + CheckInvalidVLENandLMUL(TI, TheCall, *this, Op2Type, ElemSize * 4) || + CheckInvalidVLENandLMUL(TI, TheCall, *this, Op3Type, ElemSize * 4); } case RISCVVector::BI__builtin_rvv_sf_vc_i_se_u8mf8: @@ -6170,30 +6172,33 @@ bool Sema::CheckWebAssemblyBuiltinFunctionCall(const TargetInfo &TI, void Sema::checkRVVTypeSupport(QualType Ty, SourceLocation Loc, Decl *D) { const TargetInfo &TI = Context.getTargetInfo(); + + ASTContext::BuiltinVectorTypeInfo Info = + Context.getBuiltinVectorTypeInfo(Ty->castAs<BuiltinType>()); + unsigned EltSize = Context.getTypeSize(Info.ElementType); + unsigned MinElts = Info.EC.getKnownMinValue(); + // (ELEN, LMUL) pairs of (8, mf8), (16, mf4), (32, mf2), (64, m1) requires at // least zve64x - if ((Ty->isRVVType(/* Bitwidth */ 64, /* IsFloat */ false) || - Ty->isRVVType(/* ElementCount */ 1)) && + if (((EltSize == 64 && Info.ElementType->isIntegerType()) || MinElts == 1) && !TI.hasFeature("zve64x")) Diag(Loc, diag::err_riscv_type_requires_extension, D) << Ty << "zve64x"; - if (Ty->isRVVType(/* Bitwidth */ 16, /* IsFloat */ true) && - !TI.hasFeature("zvfh") && !TI.hasFeature("zvfhmin")) + else if (Info.ElementType->isFloat16Type() && !TI.hasFeature("zvfh") && + !TI.hasFeature("zvfhmin")) Diag(Loc, diag::err_riscv_type_requires_extension, D) << Ty << "zvfh or zvfhmin"; - // Check if enabled zvfbfmin for BFloat16 - if (Ty->isRVVType(/* Bitwidth */ 16, /* IsFloat */ false, - /* IsBFloat */ true) && - !TI.hasFeature("experimental-zvfbfmin")) + else if (Info.ElementType->isBFloat16Type() && + !TI.hasFeature("experimental-zvfbfmin")) Diag(Loc, diag::err_riscv_type_requires_extension, D) << Ty << "zvfbfmin"; - if (Ty->isRVVType(/* Bitwidth */ 32, /* IsFloat */ true) && - !TI.hasFeature("zve32f")) + else if (Info.ElementType->isSpecificBuiltinType(BuiltinType::Float) && + !TI.hasFeature("zve32f")) Diag(Loc, diag::err_riscv_type_requires_extension, D) << Ty << "zve32f"; - if (Ty->isRVVType(/* Bitwidth */ 64, /* IsFloat */ true) && - !TI.hasFeature("zve64d")) + else if (Info.ElementType->isSpecificBuiltinType(BuiltinType::Double) && + !TI.hasFeature("zve64d")) Diag(Loc, diag::err_riscv_type_requires_extension, D) << Ty << "zve64d"; // Given that caller already checked isRVVType() before calling this function, // if we don't have at least zve32x supported, then we need to emit error. - if (!TI.hasFeature("zve32x")) + else if (!TI.hasFeature("zve32x")) Diag(Loc, diag::err_riscv_type_requires_extension, D) << Ty << "zve32x"; } diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp index af8b90ecfed9..4a385a396fa6 100644 --- a/clang/lib/Sema/SemaDeclAttr.cpp +++ b/clang/lib/Sema/SemaDeclAttr.cpp @@ -5825,8 +5825,7 @@ struct IntrinToName { static bool ArmBuiltinAliasValid(unsigned BuiltinID, StringRef AliasName, ArrayRef<IntrinToName> Map, const char *IntrinNames) { - if (AliasName.starts_with("__arm_")) - AliasName = AliasName.substr(6); + AliasName.consume_front("__arm_"); const IntrinToName *It = llvm::lower_bound(Map, BuiltinID, [](const IntrinToName &L, unsigned Id) { return L.Id < Id; diff --git a/clang/lib/Sema/SemaInit.cpp b/clang/lib/Sema/SemaInit.cpp index 61d244f3bb97..cc9db5ded114 100644 --- a/clang/lib/Sema/SemaInit.cpp +++ b/clang/lib/Sema/SemaInit.cpp @@ -5512,6 +5512,14 @@ static void TryOrBuildParenListInitialization( } else if (auto *RT = Entity.getType()->getAs<RecordType>()) { bool IsUnion = RT->isUnionType(); const CXXRecordDecl *RD = cast<CXXRecordDecl>(RT->getDecl()); + if (RD->isInvalidDecl()) { + // Exit early to avoid confusion when processing members. + // We do the same for braced list initialization in + // `CheckStructUnionTypes`. + Sequence.SetFailed( + clang::InitializationSequence::FK_ParenthesizedListInitFailed); + return; + } if (!IsUnion) { for (const CXXBaseSpecifier &Base : RD->bases()) { diff --git a/clang/lib/Sema/SemaModule.cpp b/clang/lib/Sema/SemaModule.cpp index db0cbd5ec6d6..ed7f626971f3 100644 --- a/clang/lib/Sema/SemaModule.cpp +++ b/clang/lib/Sema/SemaModule.cpp @@ -529,7 +529,8 @@ DeclResult Sema::ActOnModuleImport(SourceLocation StartLoc, if (!Mod) return true; - if (!Mod->isInterfaceOrPartition() && !ModuleName.empty()) { + if (!Mod->isInterfaceOrPartition() && !ModuleName.empty() && + !getLangOpts().ObjC) { Diag(ImportLoc, diag::err_module_import_non_interface_nor_parition) << ModuleName; return true; diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp index 3826994ef212..f34d2959dc61 100644 --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -12683,7 +12683,7 @@ StmtResult Sema::ActOnOpenMPAtomicDirective(ArrayRef<OMPClause *> Clauses, break; } case OMPC_fail: { - if (AtomicKind != OMPC_compare) { + if (!EncounteredAtomicKinds.contains(OMPC_compare)) { Diag(C->getBeginLoc(), diag::err_omp_atomic_fail_no_compare) << SourceRange(C->getBeginLoc(), C->getEndLoc()); return StmtError(); diff --git a/clang/lib/Serialization/ASTReaderDecl.cpp b/clang/lib/Serialization/ASTReaderDecl.cpp index d989707d5575..547eb77930b4 100644 --- a/clang/lib/Serialization/ASTReaderDecl.cpp +++ b/clang/lib/Serialization/ASTReaderDecl.cpp @@ -1706,10 +1706,10 @@ void ASTDeclReader::VisitImplicitParamDecl(ImplicitParamDecl *PD) { void ASTDeclReader::VisitParmVarDecl(ParmVarDecl *PD) { VisitVarDecl(PD); + unsigned scopeIndex = Record.readInt(); BitsUnpacker ParmVarDeclBits(Record.readInt()); unsigned isObjCMethodParam = ParmVarDeclBits.getNextBit(); unsigned scopeDepth = ParmVarDeclBits.getNextBits(/*Width=*/7); - unsigned scopeIndex = ParmVarDeclBits.getNextBits(/*Width=*/8); unsigned declQualifier = ParmVarDeclBits.getNextBits(/*Width=*/7); if (isObjCMethodParam) { assert(scopeDepth == 0); diff --git a/clang/lib/Serialization/ASTWriterDecl.cpp b/clang/lib/Serialization/ASTWriterDecl.cpp index 2554abc682a1..9e3299f04918 100644 --- a/clang/lib/Serialization/ASTWriterDecl.cpp +++ b/clang/lib/Serialization/ASTWriterDecl.cpp @@ -1163,10 +1163,14 @@ void ASTDeclWriter::VisitImplicitParamDecl(ImplicitParamDecl *D) { void ASTDeclWriter::VisitParmVarDecl(ParmVarDecl *D) { VisitVarDecl(D); + // See the implementation of `ParmVarDecl::getParameterIndex()`, which may + // exceed the size of the normal bitfield. So it may be better to not pack + // these bits. + Record.push_back(D->getFunctionScopeIndex()); + BitsPacker ParmVarDeclBits; ParmVarDeclBits.addBit(D->isObjCMethodParameter()); ParmVarDeclBits.addBits(D->getFunctionScopeDepth(), /*BitsWidth=*/7); - ParmVarDeclBits.addBits(D->getFunctionScopeIndex(), /*BitsWidth=*/8); // FIXME: stable encoding ParmVarDeclBits.addBits(D->getObjCDeclQualifier(), /*BitsWidth=*/7); ParmVarDeclBits.addBit(D->isKNRPromoted()); @@ -2350,10 +2354,11 @@ void ASTWriter::WriteDeclAbbrevs() { // isARCPseudoStrong, Linkage, ModulesCodegen Abv->Add(BitCodeAbbrevOp(0)); // VarKind (local enum) // ParmVarDecl + Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // ScopeIndex Abv->Add(BitCodeAbbrevOp( BitCodeAbbrevOp::Fixed, - 27)); // Packed Parm Var Decl bits: IsObjCMethodParameter, ScopeDepth, - // ScopeIndex, ObjCDeclQualifier, KNRPromoted, + 19)); // Packed Parm Var Decl bits: IsObjCMethodParameter, ScopeDepth, + // ObjCDeclQualifier, KNRPromoted, // HasInheritedDefaultArg, HasUninstantiatedDefaultArg // Type Source Info Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); diff --git a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundChecker.cpp index ce1265412655..c990ad138f89 100644 --- a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundChecker.cpp @@ -25,7 +25,7 @@ using namespace ento; namespace { class ArrayBoundChecker : public Checker<check::Location> { - mutable std::unique_ptr<BugType> BT; + const BugType BT{this, "Out-of-bound array access"}; public: void checkLocation(SVal l, bool isLoad, const Stmt* S, @@ -65,16 +65,13 @@ void ArrayBoundChecker::checkLocation(SVal l, bool isLoad, const Stmt* LoadS, if (!N) return; - if (!BT) - BT.reset(new BugType(this, "Out-of-bound array access")); - // FIXME: It would be nice to eventually make this diagnostic more clear, // e.g., by referencing the original declaration or by saying *why* this // reference is outside the range. // Generate a report for this bug. auto report = std::make_unique<PathSensitiveBugReport>( - *BT, "Access out-of-bound array element (buffer overflow)", N); + BT, "Access out-of-bound array element (buffer overflow)", N); report->addRange(LoadS->getSourceRange()); C.emitReport(std::move(report)); diff --git a/clang/lib/StaticAnalyzer/Checkers/BasicObjCFoundationChecks.cpp b/clang/lib/StaticAnalyzer/Checkers/BasicObjCFoundationChecks.cpp index 5e25153a148f..c72a97cc01e9 100644 --- a/clang/lib/StaticAnalyzer/Checkers/BasicObjCFoundationChecks.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/BasicObjCFoundationChecks.cpp @@ -44,7 +44,7 @@ namespace { class APIMisuse : public BugType { public: APIMisuse(const CheckerBase *checker, const char *name) - : BugType(checker, name, "API Misuse (Apple)") {} + : BugType(checker, name, categories::AppleAPIMisuse) {} }; } // end anonymous namespace diff --git a/clang/lib/StaticAnalyzer/Checkers/BlockInCriticalSectionChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/BlockInCriticalSectionChecker.cpp index 76f091562cd5..66e080adb138 100644 --- a/clang/lib/StaticAnalyzer/Checkers/BlockInCriticalSectionChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/BlockInCriticalSectionChecker.cpp @@ -25,20 +25,31 @@ using namespace clang; using namespace ento; namespace { - class BlockInCriticalSectionChecker : public Checker<check::PostCall> { - - mutable IdentifierInfo *IILockGuard, *IIUniqueLock; - - CallDescription LockFn, UnlockFn, SleepFn, GetcFn, FgetsFn, ReadFn, RecvFn, - PthreadLockFn, PthreadTryLockFn, PthreadUnlockFn, - MtxLock, MtxTimedLock, MtxTryLock, MtxUnlock; - - StringRef ClassLockGuard, ClassUniqueLock; - - mutable bool IdentifierInfoInitialized; - - std::unique_ptr<BugType> BlockInCritSectionBugType; + mutable IdentifierInfo *IILockGuard = nullptr; + mutable IdentifierInfo *IIUniqueLock = nullptr; + mutable bool IdentifierInfoInitialized = false; + + const CallDescription LockFn{{"lock"}}; + const CallDescription UnlockFn{{"unlock"}}; + const CallDescription SleepFn{{"sleep"}}; + const CallDescription GetcFn{{"getc"}}; + const CallDescription FgetsFn{{"fgets"}}; + const CallDescription ReadFn{{"read"}}; + const CallDescription RecvFn{{"recv"}}; + const CallDescription PthreadLockFn{{"pthread_mutex_lock"}}; + const CallDescription PthreadTryLockFn{{"pthread_mutex_trylock"}}; + const CallDescription PthreadUnlockFn{{"pthread_mutex_unlock"}}; + const CallDescription MtxLock{{"mtx_lock"}}; + const CallDescription MtxTimedLock{{"mtx_timedlock"}}; + const CallDescription MtxTryLock{{"mtx_trylock"}}; + const CallDescription MtxUnlock{{"mtx_unlock"}}; + + const llvm::StringLiteral ClassLockGuard{"lock_guard"}; + const llvm::StringLiteral ClassUniqueLock{"unique_lock"}; + + const BugType BlockInCritSectionBugType{ + this, "Call to blocking function in critical section", "Blocking Error"}; void initIdentifierInfo(ASTContext &Ctx) const; @@ -47,8 +58,6 @@ class BlockInCriticalSectionChecker : public Checker<check::PostCall> { CheckerContext &C) const; public: - BlockInCriticalSectionChecker(); - bool isBlockingFunction(const CallEvent &Call) const; bool isLockFunction(const CallEvent &Call) const; bool isUnlockFunction(const CallEvent &Call) const; @@ -63,22 +72,6 @@ public: REGISTER_TRAIT_WITH_PROGRAMSTATE(MutexCounter, unsigned) -BlockInCriticalSectionChecker::BlockInCriticalSectionChecker() - : IILockGuard(nullptr), IIUniqueLock(nullptr), LockFn({"lock"}), - UnlockFn({"unlock"}), SleepFn({"sleep"}), GetcFn({"getc"}), - FgetsFn({"fgets"}), ReadFn({"read"}), RecvFn({"recv"}), - PthreadLockFn({"pthread_mutex_lock"}), - PthreadTryLockFn({"pthread_mutex_trylock"}), - PthreadUnlockFn({"pthread_mutex_unlock"}), MtxLock({"mtx_lock"}), - MtxTimedLock({"mtx_timedlock"}), MtxTryLock({"mtx_trylock"}), - MtxUnlock({"mtx_unlock"}), ClassLockGuard("lock_guard"), - ClassUniqueLock("unique_lock"), IdentifierInfoInitialized(false) { - // Initialize the bug type. - BlockInCritSectionBugType.reset( - new BugType(this, "Call to blocking function in critical section", - "Blocking Error")); -} - void BlockInCriticalSectionChecker::initIdentifierInfo(ASTContext &Ctx) const { if (!IdentifierInfoInitialized) { /* In case of checking C code, or when the corresponding headers are not @@ -151,7 +144,7 @@ void BlockInCriticalSectionChecker::reportBlockInCritSection( llvm::raw_string_ostream os(msg); os << "Call to blocking function '" << Call.getCalleeIdentifier()->getName() << "' inside of critical section"; - auto R = std::make_unique<PathSensitiveBugReport>(*BlockInCritSectionBugType, + auto R = std::make_unique<PathSensitiveBugReport>(BlockInCritSectionBugType, os.str(), ErrNode); R->addRange(Call.getSourceRange()); R->markInteresting(BlockDescSym); diff --git a/clang/lib/StaticAnalyzer/Checkers/BoolAssignmentChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/BoolAssignmentChecker.cpp index 361a4eed9221..a09db6d2d0ec 100644 --- a/clang/lib/StaticAnalyzer/Checkers/BoolAssignmentChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/BoolAssignmentChecker.cpp @@ -24,7 +24,7 @@ using namespace ento; namespace { class BoolAssignmentChecker : public Checker< check::Bind > { - mutable std::unique_ptr<BugType> BT; + const BugType BT{this, "Assignment of a non-Boolean value"}; void emitReport(ProgramStateRef state, CheckerContext &C, bool IsTainted = false) const; @@ -36,12 +36,9 @@ namespace { void BoolAssignmentChecker::emitReport(ProgramStateRef state, CheckerContext &C, bool IsTainted) const { if (ExplodedNode *N = C.generateNonFatalErrorNode(state)) { - if (!BT) - BT.reset(new BugType(this, "Assignment of a non-Boolean value")); - StringRef Msg = IsTainted ? "Might assign a tainted non-Boolean value" : "Assignment of a non-Boolean value"; - C.emitReport(std::make_unique<PathSensitiveBugReport>(*BT, Msg, N)); + C.emitReport(std::make_unique<PathSensitiveBugReport>(BT, Msg, N)); } } diff --git a/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp index 31f5b03dcdeb..b7b64c3da4f6 100644 --- a/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp @@ -121,7 +121,7 @@ public: const CallEvent *Call) const; using FnCheck = std::function<void(const CStringChecker *, CheckerContext &, - const CallExpr *)>; + const CallEvent &)>; CallDescriptionMap<FnCheck> Callbacks = { {{CDF_MaybeBuiltin, {"memcpy"}, 3}, @@ -173,56 +173,53 @@ public: StdCopyBackward{{"std", "copy_backward"}, 3}; FnCheck identifyCall(const CallEvent &Call, CheckerContext &C) const; - void evalMemcpy(CheckerContext &C, const CallExpr *CE, CharKind CK) const; - void evalMempcpy(CheckerContext &C, const CallExpr *CE, CharKind CK) const; - void evalMemmove(CheckerContext &C, const CallExpr *CE, CharKind CK) const; - void evalBcopy(CheckerContext &C, const CallExpr *CE) const; - void evalCopyCommon(CheckerContext &C, const CallExpr *CE, + void evalMemcpy(CheckerContext &C, const CallEvent &Call, CharKind CK) const; + void evalMempcpy(CheckerContext &C, const CallEvent &Call, CharKind CK) const; + void evalMemmove(CheckerContext &C, const CallEvent &Call, CharKind CK) const; + void evalBcopy(CheckerContext &C, const CallEvent &Call) const; + void evalCopyCommon(CheckerContext &C, const CallEvent &Call, ProgramStateRef state, SizeArgExpr Size, DestinationArgExpr Dest, SourceArgExpr Source, bool Restricted, bool IsMempcpy, CharKind CK) const; - void evalMemcmp(CheckerContext &C, const CallExpr *CE, CharKind CK) const; + void evalMemcmp(CheckerContext &C, const CallEvent &Call, CharKind CK) const; - void evalstrLength(CheckerContext &C, const CallExpr *CE) const; - void evalstrnLength(CheckerContext &C, const CallExpr *CE) const; - void evalstrLengthCommon(CheckerContext &C, - const CallExpr *CE, + void evalstrLength(CheckerContext &C, const CallEvent &Call) const; + void evalstrnLength(CheckerContext &C, const CallEvent &Call) const; + void evalstrLengthCommon(CheckerContext &C, const CallEvent &Call, bool IsStrnlen = false) const; - void evalStrcpy(CheckerContext &C, const CallExpr *CE) const; - void evalStrncpy(CheckerContext &C, const CallExpr *CE) const; - void evalStpcpy(CheckerContext &C, const CallExpr *CE) const; - void evalStrlcpy(CheckerContext &C, const CallExpr *CE) const; - void evalStrcpyCommon(CheckerContext &C, const CallExpr *CE, bool ReturnEnd, - bool IsBounded, ConcatFnKind appendK, + void evalStrcpy(CheckerContext &C, const CallEvent &Call) const; + void evalStrncpy(CheckerContext &C, const CallEvent &Call) const; + void evalStpcpy(CheckerContext &C, const CallEvent &Call) const; + void evalStrlcpy(CheckerContext &C, const CallEvent &Call) const; + void evalStrcpyCommon(CheckerContext &C, const CallEvent &Call, + bool ReturnEnd, bool IsBounded, ConcatFnKind appendK, bool returnPtr = true) const; - void evalStrcat(CheckerContext &C, const CallExpr *CE) const; - void evalStrncat(CheckerContext &C, const CallExpr *CE) const; - void evalStrlcat(CheckerContext &C, const CallExpr *CE) const; + void evalStrcat(CheckerContext &C, const CallEvent &Call) const; + void evalStrncat(CheckerContext &C, const CallEvent &Call) const; + void evalStrlcat(CheckerContext &C, const CallEvent &Call) const; - void evalStrcmp(CheckerContext &C, const CallExpr *CE) const; - void evalStrncmp(CheckerContext &C, const CallExpr *CE) const; - void evalStrcasecmp(CheckerContext &C, const CallExpr *CE) const; - void evalStrncasecmp(CheckerContext &C, const CallExpr *CE) const; - void evalStrcmpCommon(CheckerContext &C, - const CallExpr *CE, - bool IsBounded = false, - bool IgnoreCase = false) const; + void evalStrcmp(CheckerContext &C, const CallEvent &Call) const; + void evalStrncmp(CheckerContext &C, const CallEvent &Call) const; + void evalStrcasecmp(CheckerContext &C, const CallEvent &Call) const; + void evalStrncasecmp(CheckerContext &C, const CallEvent &Call) const; + void evalStrcmpCommon(CheckerContext &C, const CallEvent &Call, + bool IsBounded = false, bool IgnoreCase = false) const; - void evalStrsep(CheckerContext &C, const CallExpr *CE) const; + void evalStrsep(CheckerContext &C, const CallEvent &Call) const; - void evalStdCopy(CheckerContext &C, const CallExpr *CE) const; - void evalStdCopyBackward(CheckerContext &C, const CallExpr *CE) const; - void evalStdCopyCommon(CheckerContext &C, const CallExpr *CE) const; - void evalMemset(CheckerContext &C, const CallExpr *CE) const; - void evalBzero(CheckerContext &C, const CallExpr *CE) const; + void evalStdCopy(CheckerContext &C, const CallEvent &Call) const; + void evalStdCopyBackward(CheckerContext &C, const CallEvent &Call) const; + void evalStdCopyCommon(CheckerContext &C, const CallEvent &Call) const; + void evalMemset(CheckerContext &C, const CallEvent &Call) const; + void evalBzero(CheckerContext &C, const CallEvent &Call) const; - void evalSprintf(CheckerContext &C, const CallExpr *CE) const; - void evalSnprintf(CheckerContext &C, const CallExpr *CE) const; - void evalSprintfCommon(CheckerContext &C, const CallExpr *CE, bool IsBounded, - bool IsBuiltin) const; + void evalSprintf(CheckerContext &C, const CallEvent &Call) const; + void evalSnprintf(CheckerContext &C, const CallEvent &Call) const; + void evalSprintfCommon(CheckerContext &C, const CallEvent &Call, + bool IsBounded, bool IsBuiltin) const; // Utility methods std::pair<ProgramStateRef , ProgramStateRef > @@ -1291,7 +1288,7 @@ bool CStringChecker::memsetAux(const Expr *DstBuffer, SVal CharVal, // evaluation of individual function calls. //===----------------------------------------------------------------------===// -void CStringChecker::evalCopyCommon(CheckerContext &C, const CallExpr *CE, +void CStringChecker::evalCopyCommon(CheckerContext &C, const CallEvent &Call, ProgramStateRef state, SizeArgExpr Size, DestinationArgExpr Dest, SourceArgExpr Source, bool Restricted, @@ -1313,7 +1310,8 @@ void CStringChecker::evalCopyCommon(CheckerContext &C, const CallExpr *CE, // If the size is zero, there won't be any actual memory access, so // just bind the return value to the destination buffer and return. if (stateZeroSize && !stateNonZeroSize) { - stateZeroSize = stateZeroSize->BindExpr(CE, LCtx, destVal); + stateZeroSize = + stateZeroSize->BindExpr(Call.getOriginExpr(), LCtx, destVal); C.addTransition(stateZeroSize); return; } @@ -1361,15 +1359,15 @@ void CStringChecker::evalCopyCommon(CheckerContext &C, const CallExpr *CE, // If we don't know how much we copied, we can at least // conjure a return value for later. if (lastElement.isUnknown()) - lastElement = C.getSValBuilder().conjureSymbolVal(nullptr, CE, LCtx, - C.blockCount()); + lastElement = C.getSValBuilder().conjureSymbolVal( + nullptr, Call.getOriginExpr(), LCtx, C.blockCount()); // The byte after the last byte copied is the return value. - state = state->BindExpr(CE, LCtx, lastElement); + state = state->BindExpr(Call.getOriginExpr(), LCtx, lastElement); } else { // All other copies return the destination buffer. // (Well, bcopy() has a void return type, but this won't hurt.) - state = state->BindExpr(CE, LCtx, destVal); + state = state->BindExpr(Call.getOriginExpr(), LCtx, destVal); } // Invalidate the destination (regular invalidation without pointer-escaping @@ -1391,69 +1389,69 @@ void CStringChecker::evalCopyCommon(CheckerContext &C, const CallExpr *CE, } } -void CStringChecker::evalMemcpy(CheckerContext &C, const CallExpr *CE, +void CStringChecker::evalMemcpy(CheckerContext &C, const CallEvent &Call, CharKind CK) const { // void *memcpy(void *restrict dst, const void *restrict src, size_t n); // The return value is the address of the destination buffer. - DestinationArgExpr Dest = {{CE->getArg(0), 0}}; - SourceArgExpr Src = {{CE->getArg(1), 1}}; - SizeArgExpr Size = {{CE->getArg(2), 2}}; + DestinationArgExpr Dest = {{Call.getArgExpr(0), 0}}; + SourceArgExpr Src = {{Call.getArgExpr(1), 1}}; + SizeArgExpr Size = {{Call.getArgExpr(2), 2}}; ProgramStateRef State = C.getState(); constexpr bool IsRestricted = true; constexpr bool IsMempcpy = false; - evalCopyCommon(C, CE, State, Size, Dest, Src, IsRestricted, IsMempcpy, CK); + evalCopyCommon(C, Call, State, Size, Dest, Src, IsRestricted, IsMempcpy, CK); } -void CStringChecker::evalMempcpy(CheckerContext &C, const CallExpr *CE, +void CStringChecker::evalMempcpy(CheckerContext &C, const CallEvent &Call, CharKind CK) const { // void *mempcpy(void *restrict dst, const void *restrict src, size_t n); // The return value is a pointer to the byte following the last written byte. - DestinationArgExpr Dest = {{CE->getArg(0), 0}}; - SourceArgExpr Src = {{CE->getArg(1), 1}}; - SizeArgExpr Size = {{CE->getArg(2), 2}}; + DestinationArgExpr Dest = {{Call.getArgExpr(0), 0}}; + SourceArgExpr Src = {{Call.getArgExpr(1), 1}}; + SizeArgExpr Size = {{Call.getArgExpr(2), 2}}; constexpr bool IsRestricted = true; constexpr bool IsMempcpy = true; - evalCopyCommon(C, CE, C.getState(), Size, Dest, Src, IsRestricted, IsMempcpy, - CK); + evalCopyCommon(C, Call, C.getState(), Size, Dest, Src, IsRestricted, + IsMempcpy, CK); } -void CStringChecker::evalMemmove(CheckerContext &C, const CallExpr *CE, +void CStringChecker::evalMemmove(CheckerContext &C, const CallEvent &Call, CharKind CK) const { // void *memmove(void *dst, const void *src, size_t n); // The return value is the address of the destination buffer. - DestinationArgExpr Dest = {{CE->getArg(0), 0}}; - SourceArgExpr Src = {{CE->getArg(1), 1}}; - SizeArgExpr Size = {{CE->getArg(2), 2}}; + DestinationArgExpr Dest = {{Call.getArgExpr(0), 0}}; + SourceArgExpr Src = {{Call.getArgExpr(1), 1}}; + SizeArgExpr Size = {{Call.getArgExpr(2), 2}}; constexpr bool IsRestricted = false; constexpr bool IsMempcpy = false; - evalCopyCommon(C, CE, C.getState(), Size, Dest, Src, IsRestricted, IsMempcpy, - CK); + evalCopyCommon(C, Call, C.getState(), Size, Dest, Src, IsRestricted, + IsMempcpy, CK); } -void CStringChecker::evalBcopy(CheckerContext &C, const CallExpr *CE) const { +void CStringChecker::evalBcopy(CheckerContext &C, const CallEvent &Call) const { // void bcopy(const void *src, void *dst, size_t n); - SourceArgExpr Src{{CE->getArg(0), 0}}; - DestinationArgExpr Dest = {{CE->getArg(1), 1}}; - SizeArgExpr Size = {{CE->getArg(2), 2}}; + SourceArgExpr Src{{Call.getArgExpr(0), 0}}; + DestinationArgExpr Dest = {{Call.getArgExpr(1), 1}}; + SizeArgExpr Size = {{Call.getArgExpr(2), 2}}; constexpr bool IsRestricted = false; constexpr bool IsMempcpy = false; - evalCopyCommon(C, CE, C.getState(), Size, Dest, Src, IsRestricted, IsMempcpy, - CharKind::Regular); + evalCopyCommon(C, Call, C.getState(), Size, Dest, Src, IsRestricted, + IsMempcpy, CharKind::Regular); } -void CStringChecker::evalMemcmp(CheckerContext &C, const CallExpr *CE, +void CStringChecker::evalMemcmp(CheckerContext &C, const CallEvent &Call, CharKind CK) const { // int memcmp(const void *s1, const void *s2, size_t n); CurrentFunctionDescription = "memory comparison function"; - AnyArgExpr Left = {CE->getArg(0), 0}; - AnyArgExpr Right = {CE->getArg(1), 1}; - SizeArgExpr Size = {{CE->getArg(2), 2}}; + AnyArgExpr Left = {Call.getArgExpr(0), 0}; + AnyArgExpr Right = {Call.getArgExpr(1), 1}; + SizeArgExpr Size = {{Call.getArgExpr(2), 2}}; ProgramStateRef State = C.getState(); SValBuilder &Builder = C.getSValBuilder(); @@ -1471,7 +1469,8 @@ void CStringChecker::evalMemcmp(CheckerContext &C, const CallExpr *CE, // have to check either of the buffers. if (stateZeroSize) { State = stateZeroSize; - State = State->BindExpr(CE, LCtx, Builder.makeZeroVal(CE->getType())); + State = State->BindExpr(Call.getOriginExpr(), LCtx, + Builder.makeZeroVal(Call.getResultType())); C.addTransition(State); } @@ -1497,8 +1496,8 @@ void CStringChecker::evalMemcmp(CheckerContext &C, const CallExpr *CE, State = SameBuffer; State = CheckBufferAccess(C, State, Left, Size, AccessKind::read); if (State) { - State = - SameBuffer->BindExpr(CE, LCtx, Builder.makeZeroVal(CE->getType())); + State = SameBuffer->BindExpr(Call.getOriginExpr(), LCtx, + Builder.makeZeroVal(Call.getResultType())); C.addTransition(State); } return; @@ -1511,33 +1510,35 @@ void CStringChecker::evalMemcmp(CheckerContext &C, const CallExpr *CE, State = CheckBufferAccess(C, State, Left, Size, AccessKind::read, CK); if (State) { // The return value is the comparison result, which we don't know. - SVal CmpV = Builder.conjureSymbolVal(nullptr, CE, LCtx, C.blockCount()); - State = State->BindExpr(CE, LCtx, CmpV); + SVal CmpV = Builder.conjureSymbolVal(nullptr, Call.getOriginExpr(), LCtx, + C.blockCount()); + State = State->BindExpr(Call.getOriginExpr(), LCtx, CmpV); C.addTransition(State); } } } void CStringChecker::evalstrLength(CheckerContext &C, - const CallExpr *CE) const { + const CallEvent &Call) const { // size_t strlen(const char *s); - evalstrLengthCommon(C, CE, /* IsStrnlen = */ false); + evalstrLengthCommon(C, Call, /* IsStrnlen = */ false); } void CStringChecker::evalstrnLength(CheckerContext &C, - const CallExpr *CE) const { + const CallEvent &Call) const { // size_t strnlen(const char *s, size_t maxlen); - evalstrLengthCommon(C, CE, /* IsStrnlen = */ true); + evalstrLengthCommon(C, Call, /* IsStrnlen = */ true); } -void CStringChecker::evalstrLengthCommon(CheckerContext &C, const CallExpr *CE, +void CStringChecker::evalstrLengthCommon(CheckerContext &C, + const CallEvent &Call, bool IsStrnlen) const { CurrentFunctionDescription = "string length function"; ProgramStateRef state = C.getState(); const LocationContext *LCtx = C.getLocationContext(); if (IsStrnlen) { - const Expr *maxlenExpr = CE->getArg(1); + const Expr *maxlenExpr = Call.getArgExpr(1); SVal maxlenVal = state->getSVal(maxlenExpr, LCtx); ProgramStateRef stateZeroSize, stateNonZeroSize; @@ -1547,8 +1548,8 @@ void CStringChecker::evalstrLengthCommon(CheckerContext &C, const CallExpr *CE, // If the size can be zero, the result will be 0 in that case, and we don't // have to check the string itself. if (stateZeroSize) { - SVal zero = C.getSValBuilder().makeZeroVal(CE->getType()); - stateZeroSize = stateZeroSize->BindExpr(CE, LCtx, zero); + SVal zero = C.getSValBuilder().makeZeroVal(Call.getResultType()); + stateZeroSize = stateZeroSize->BindExpr(Call.getOriginExpr(), LCtx, zero); C.addTransition(stateZeroSize); } @@ -1561,7 +1562,7 @@ void CStringChecker::evalstrLengthCommon(CheckerContext &C, const CallExpr *CE, } // Check that the string argument is non-null. - AnyArgExpr Arg = {CE->getArg(0), 0}; + AnyArgExpr Arg = {Call.getArgExpr(0), 0}; SVal ArgVal = state->getSVal(Arg.Expression, LCtx); state = checkNonNull(C, state, Arg, ArgVal); @@ -1584,7 +1585,7 @@ void CStringChecker::evalstrLengthCommon(CheckerContext &C, const CallExpr *CE, // It's a little unfortunate to be getting this again, // but it's not that expensive... - const Expr *maxlenExpr = CE->getArg(1); + const Expr *maxlenExpr = Call.getArgExpr(1); SVal maxlenVal = state->getSVal(maxlenExpr, LCtx); std::optional<NonLoc> strLengthNL = strLength.getAs<NonLoc>(); @@ -1613,8 +1614,8 @@ void CStringChecker::evalstrLengthCommon(CheckerContext &C, const CallExpr *CE, // no guarantee the full string length will actually be returned. // All we know is the return value is the min of the string length // and the limit. This is better than nothing. - result = C.getSValBuilder().conjureSymbolVal(nullptr, CE, LCtx, - C.blockCount()); + result = C.getSValBuilder().conjureSymbolVal( + nullptr, Call.getOriginExpr(), LCtx, C.blockCount()); NonLoc resultNL = result.castAs<NonLoc>(); if (strLengthNL) { @@ -1637,78 +1638,85 @@ void CStringChecker::evalstrLengthCommon(CheckerContext &C, const CallExpr *CE, // If we don't know the length of the string, conjure a return // value, so it can be used in constraints, at least. if (result.isUnknown()) { - result = C.getSValBuilder().conjureSymbolVal(nullptr, CE, LCtx, - C.blockCount()); + result = C.getSValBuilder().conjureSymbolVal( + nullptr, Call.getOriginExpr(), LCtx, C.blockCount()); } } // Bind the return value. assert(!result.isUnknown() && "Should have conjured a value by now"); - state = state->BindExpr(CE, LCtx, result); + state = state->BindExpr(Call.getOriginExpr(), LCtx, result); C.addTransition(state); } -void CStringChecker::evalStrcpy(CheckerContext &C, const CallExpr *CE) const { +void CStringChecker::evalStrcpy(CheckerContext &C, + const CallEvent &Call) const { // char *strcpy(char *restrict dst, const char *restrict src); - evalStrcpyCommon(C, CE, + evalStrcpyCommon(C, Call, /* ReturnEnd = */ false, /* IsBounded = */ false, /* appendK = */ ConcatFnKind::none); } -void CStringChecker::evalStrncpy(CheckerContext &C, const CallExpr *CE) const { +void CStringChecker::evalStrncpy(CheckerContext &C, + const CallEvent &Call) const { // char *strncpy(char *restrict dst, const char *restrict src, size_t n); - evalStrcpyCommon(C, CE, + evalStrcpyCommon(C, Call, /* ReturnEnd = */ false, /* IsBounded = */ true, /* appendK = */ ConcatFnKind::none); } -void CStringChecker::evalStpcpy(CheckerContext &C, const CallExpr *CE) const { +void CStringChecker::evalStpcpy(CheckerContext &C, + const CallEvent &Call) const { // char *stpcpy(char *restrict dst, const char *restrict src); - evalStrcpyCommon(C, CE, + evalStrcpyCommon(C, Call, /* ReturnEnd = */ true, /* IsBounded = */ false, /* appendK = */ ConcatFnKind::none); } -void CStringChecker::evalStrlcpy(CheckerContext &C, const CallExpr *CE) const { +void CStringChecker::evalStrlcpy(CheckerContext &C, + const CallEvent &Call) const { // size_t strlcpy(char *dest, const char *src, size_t size); - evalStrcpyCommon(C, CE, + evalStrcpyCommon(C, Call, /* ReturnEnd = */ true, /* IsBounded = */ true, /* appendK = */ ConcatFnKind::none, /* returnPtr = */ false); } -void CStringChecker::evalStrcat(CheckerContext &C, const CallExpr *CE) const { +void CStringChecker::evalStrcat(CheckerContext &C, + const CallEvent &Call) const { // char *strcat(char *restrict s1, const char *restrict s2); - evalStrcpyCommon(C, CE, + evalStrcpyCommon(C, Call, /* ReturnEnd = */ false, /* IsBounded = */ false, /* appendK = */ ConcatFnKind::strcat); } -void CStringChecker::evalStrncat(CheckerContext &C, const CallExpr *CE) const { +void CStringChecker::evalStrncat(CheckerContext &C, + const CallEvent &Call) const { // char *strncat(char *restrict s1, const char *restrict s2, size_t n); - evalStrcpyCommon(C, CE, + evalStrcpyCommon(C, Call, /* ReturnEnd = */ false, /* IsBounded = */ true, /* appendK = */ ConcatFnKind::strcat); } -void CStringChecker::evalStrlcat(CheckerContext &C, const CallExpr *CE) const { +void CStringChecker::evalStrlcat(CheckerContext &C, + const CallEvent &Call) const { // size_t strlcat(char *dst, const char *src, size_t size); // It will append at most size - strlen(dst) - 1 bytes, // NULL-terminating the result. - evalStrcpyCommon(C, CE, + evalStrcpyCommon(C, Call, /* ReturnEnd = */ false, /* IsBounded = */ true, /* appendK = */ ConcatFnKind::strlcat, /* returnPtr = */ false); } -void CStringChecker::evalStrcpyCommon(CheckerContext &C, const CallExpr *CE, +void CStringChecker::evalStrcpyCommon(CheckerContext &C, const CallEvent &Call, bool ReturnEnd, bool IsBounded, ConcatFnKind appendK, bool returnPtr) const { @@ -1721,14 +1729,14 @@ void CStringChecker::evalStrcpyCommon(CheckerContext &C, const CallExpr *CE, const LocationContext *LCtx = C.getLocationContext(); // Check that the destination is non-null. - DestinationArgExpr Dst = {{CE->getArg(0), 0}}; + DestinationArgExpr Dst = {{Call.getArgExpr(0), 0}}; SVal DstVal = state->getSVal(Dst.Expression, LCtx); state = checkNonNull(C, state, Dst, DstVal); if (!state) return; // Check that the source is non-null. - SourceArgExpr srcExpr = {{CE->getArg(1), 1}}; + SourceArgExpr srcExpr = {{Call.getArgExpr(1), 1}}; SVal srcVal = state->getSVal(srcExpr.Expression, LCtx); state = checkNonNull(C, state, srcExpr, srcVal); if (!state) @@ -1763,8 +1771,8 @@ void CStringChecker::evalStrcpyCommon(CheckerContext &C, const CallExpr *CE, {srcExpr.Expression, srcExpr.ArgumentIndex}}; state = CheckOverlap( C, state, - (IsBounded ? SizeArgExpr{{CE->getArg(2), 2}} : SrcExprAsSizeDummy), Dst, - srcExpr); + (IsBounded ? SizeArgExpr{{Call.getArgExpr(2), 2}} : SrcExprAsSizeDummy), + Dst, srcExpr); if (!state) return; @@ -1772,7 +1780,7 @@ void CStringChecker::evalStrcpyCommon(CheckerContext &C, const CallExpr *CE, // If the function is strncpy, strncat, etc... it is bounded. if (IsBounded) { // Get the max number of characters to copy. - SizeArgExpr lenExpr = {{CE->getArg(2), 2}}; + SizeArgExpr lenExpr = {{Call.getArgExpr(2), 2}}; SVal lenVal = state->getSVal(lenExpr.Expression, LCtx); // Protect against misdeclared strncpy(). @@ -1886,16 +1894,19 @@ void CStringChecker::evalStrcpyCommon(CheckerContext &C, const CallExpr *CE, // If the size is known to be zero, we're done. if (StateZeroSize && !StateNonZeroSize) { if (returnPtr) { - StateZeroSize = StateZeroSize->BindExpr(CE, LCtx, DstVal); + StateZeroSize = + StateZeroSize->BindExpr(Call.getOriginExpr(), LCtx, DstVal); } else { if (appendK == ConcatFnKind::none) { // strlcpy returns strlen(src) - StateZeroSize = StateZeroSize->BindExpr(CE, LCtx, strLength); + StateZeroSize = StateZeroSize->BindExpr(Call.getOriginExpr(), + LCtx, strLength); } else { // strlcat returns strlen(src) + strlen(dst) SVal retSize = svalBuilder.evalBinOp( state, BO_Add, strLength, dstStrLength, sizeTy); - StateZeroSize = StateZeroSize->BindExpr(CE, LCtx, retSize); + StateZeroSize = + StateZeroSize->BindExpr(Call.getOriginExpr(), LCtx, retSize); } } C.addTransition(StateZeroSize); @@ -1964,7 +1975,8 @@ void CStringChecker::evalStrcpyCommon(CheckerContext &C, const CallExpr *CE, if (finalStrLength.isUnknown()) { // Try to get a "hypothetical" string length symbol, which we can later // set as a real value if that turns out to be the case. - finalStrLength = getCStringLength(C, state, CE, DstVal, true); + finalStrLength = + getCStringLength(C, state, Call.getOriginExpr(), DstVal, true); assert(!finalStrLength.isUndef()); if (std::optional<NonLoc> finalStrLengthNL = @@ -2094,51 +2106,54 @@ void CStringChecker::evalStrcpyCommon(CheckerContext &C, const CallExpr *CE, // If this is a stpcpy-style copy, but we were unable to check for a buffer // overflow, we still need a result. Conjure a return value. if (ReturnEnd && Result.isUnknown()) { - Result = svalBuilder.conjureSymbolVal(nullptr, CE, LCtx, C.blockCount()); + Result = svalBuilder.conjureSymbolVal(nullptr, Call.getOriginExpr(), LCtx, + C.blockCount()); } } // Set the return value. - state = state->BindExpr(CE, LCtx, Result); + state = state->BindExpr(Call.getOriginExpr(), LCtx, Result); C.addTransition(state); } -void CStringChecker::evalStrcmp(CheckerContext &C, const CallExpr *CE) const { +void CStringChecker::evalStrcmp(CheckerContext &C, + const CallEvent &Call) const { //int strcmp(const char *s1, const char *s2); - evalStrcmpCommon(C, CE, /* IsBounded = */ false, /* IgnoreCase = */ false); + evalStrcmpCommon(C, Call, /* IsBounded = */ false, /* IgnoreCase = */ false); } -void CStringChecker::evalStrncmp(CheckerContext &C, const CallExpr *CE) const { +void CStringChecker::evalStrncmp(CheckerContext &C, + const CallEvent &Call) const { //int strncmp(const char *s1, const char *s2, size_t n); - evalStrcmpCommon(C, CE, /* IsBounded = */ true, /* IgnoreCase = */ false); + evalStrcmpCommon(C, Call, /* IsBounded = */ true, /* IgnoreCase = */ false); } void CStringChecker::evalStrcasecmp(CheckerContext &C, - const CallExpr *CE) const { + const CallEvent &Call) const { //int strcasecmp(const char *s1, const char *s2); - evalStrcmpCommon(C, CE, /* IsBounded = */ false, /* IgnoreCase = */ true); + evalStrcmpCommon(C, Call, /* IsBounded = */ false, /* IgnoreCase = */ true); } void CStringChecker::evalStrncasecmp(CheckerContext &C, - const CallExpr *CE) const { + const CallEvent &Call) const { //int strncasecmp(const char *s1, const char *s2, size_t n); - evalStrcmpCommon(C, CE, /* IsBounded = */ true, /* IgnoreCase = */ true); + evalStrcmpCommon(C, Call, /* IsBounded = */ true, /* IgnoreCase = */ true); } -void CStringChecker::evalStrcmpCommon(CheckerContext &C, const CallExpr *CE, - bool IsBounded, bool IgnoreCase) const { +void CStringChecker::evalStrcmpCommon(CheckerContext &C, const CallEvent &Call, + bool IsBounded, bool IgnoreCase) const { CurrentFunctionDescription = "string comparison function"; ProgramStateRef state = C.getState(); const LocationContext *LCtx = C.getLocationContext(); // Check that the first string is non-null - AnyArgExpr Left = {CE->getArg(0), 0}; + AnyArgExpr Left = {Call.getArgExpr(0), 0}; SVal LeftVal = state->getSVal(Left.Expression, LCtx); state = checkNonNull(C, state, Left, LeftVal); if (!state) return; // Check that the second string is non-null. - AnyArgExpr Right = {CE->getArg(1), 1}; + AnyArgExpr Right = {Call.getArgExpr(1), 1}; SVal RightVal = state->getSVal(Right.Expression, LCtx); state = checkNonNull(C, state, Right, RightVal); if (!state) @@ -2169,8 +2184,9 @@ void CStringChecker::evalStrcmpCommon(CheckerContext &C, const CallExpr *CE, // If the two arguments might be the same buffer, we know the result is 0, // and we only need to check one size. if (StSameBuf) { - StSameBuf = StSameBuf->BindExpr(CE, LCtx, - svalBuilder.makeZeroVal(CE->getType())); + StSameBuf = + StSameBuf->BindExpr(Call.getOriginExpr(), LCtx, + svalBuilder.makeZeroVal(Call.getResultType())); C.addTransition(StSameBuf); // If the two arguments are GUARANTEED to be the same, we're done! @@ -2190,8 +2206,8 @@ void CStringChecker::evalStrcmpCommon(CheckerContext &C, const CallExpr *CE, const StringLiteral *RightStrLiteral = getCStringLiteral(C, state, Right.Expression, RightVal); bool canComputeResult = false; - SVal resultVal = svalBuilder.conjureSymbolVal(nullptr, CE, LCtx, - C.blockCount()); + SVal resultVal = svalBuilder.conjureSymbolVal(nullptr, Call.getOriginExpr(), + LCtx, C.blockCount()); if (LeftStrLiteral && RightStrLiteral) { StringRef LeftStrRef = LeftStrLiteral->getString(); @@ -2199,7 +2215,7 @@ void CStringChecker::evalStrcmpCommon(CheckerContext &C, const CallExpr *CE, if (IsBounded) { // Get the max number of characters to compare. - const Expr *lenExpr = CE->getArg(2); + const Expr *lenExpr = Call.getArgExpr(2); SVal lenVal = state->getSVal(lenExpr, LCtx); // If the length is known, we can get the right substrings. @@ -2231,10 +2247,10 @@ void CStringChecker::evalStrcmpCommon(CheckerContext &C, const CallExpr *CE, // The strcmp function returns an integer greater than, equal to, or less // than zero, [c11, p7.24.4.2]. if (compareRes == 0) { - resultVal = svalBuilder.makeIntVal(compareRes, CE->getType()); + resultVal = svalBuilder.makeIntVal(compareRes, Call.getResultType()); } else { - DefinedSVal zeroVal = svalBuilder.makeIntVal(0, CE->getType()); + DefinedSVal zeroVal = svalBuilder.makeIntVal(0, Call.getResultType()); // Constrain strcmp's result range based on the result of StringRef's // comparison methods. BinaryOperatorKind op = (compareRes > 0) ? BO_GT : BO_LT; @@ -2247,20 +2263,21 @@ void CStringChecker::evalStrcmpCommon(CheckerContext &C, const CallExpr *CE, } } - state = state->BindExpr(CE, LCtx, resultVal); + state = state->BindExpr(Call.getOriginExpr(), LCtx, resultVal); // Record this as a possible path. C.addTransition(state); } -void CStringChecker::evalStrsep(CheckerContext &C, const CallExpr *CE) const { +void CStringChecker::evalStrsep(CheckerContext &C, + const CallEvent &Call) const { // char *strsep(char **stringp, const char *delim); // Verify whether the search string parameter matches the return type. - SourceArgExpr SearchStrPtr = {{CE->getArg(0), 0}}; + SourceArgExpr SearchStrPtr = {{Call.getArgExpr(0), 0}}; QualType CharPtrTy = SearchStrPtr.Expression->getType()->getPointeeType(); - if (CharPtrTy.isNull() || - CE->getType().getUnqualifiedType() != CharPtrTy.getUnqualifiedType()) + if (CharPtrTy.isNull() || Call.getResultType().getUnqualifiedType() != + CharPtrTy.getUnqualifiedType()) return; CurrentFunctionDescription = "strsep()"; @@ -2275,7 +2292,7 @@ void CStringChecker::evalStrsep(CheckerContext &C, const CallExpr *CE) const { return; // Check that the delimiter string is non-null. - AnyArgExpr DelimStr = {CE->getArg(1), 1}; + AnyArgExpr DelimStr = {Call.getArgExpr(1), 1}; SVal DelimStrVal = State->getSVal(DelimStr.Expression, LCtx); State = checkNonNull(C, State, DelimStr, DelimStrVal); if (!State) @@ -2295,37 +2312,37 @@ void CStringChecker::evalStrsep(CheckerContext &C, const CallExpr *CE) const { // Overwrite the search string pointer. The new value is either an address // further along in the same string, or NULL if there are no more tokens. - State = State->bindLoc(*SearchStrLoc, - SVB.conjureSymbolVal(getTag(), - CE, - LCtx, - CharPtrTy, - C.blockCount()), - LCtx); + State = + State->bindLoc(*SearchStrLoc, + SVB.conjureSymbolVal(getTag(), Call.getOriginExpr(), + LCtx, CharPtrTy, C.blockCount()), + LCtx); } else { assert(SearchStrVal.isUnknown()); // Conjure a symbolic value. It's the best we can do. - Result = SVB.conjureSymbolVal(nullptr, CE, LCtx, C.blockCount()); + Result = SVB.conjureSymbolVal(nullptr, Call.getOriginExpr(), LCtx, + C.blockCount()); } // Set the return value, and finish. - State = State->BindExpr(CE, LCtx, Result); + State = State->BindExpr(Call.getOriginExpr(), LCtx, Result); C.addTransition(State); } // These should probably be moved into a C++ standard library checker. -void CStringChecker::evalStdCopy(CheckerContext &C, const CallExpr *CE) const { - evalStdCopyCommon(C, CE); +void CStringChecker::evalStdCopy(CheckerContext &C, + const CallEvent &Call) const { + evalStdCopyCommon(C, Call); } void CStringChecker::evalStdCopyBackward(CheckerContext &C, - const CallExpr *CE) const { - evalStdCopyCommon(C, CE); + const CallEvent &Call) const { + evalStdCopyCommon(C, Call); } void CStringChecker::evalStdCopyCommon(CheckerContext &C, - const CallExpr *CE) const { - if (!CE->getArg(2)->getType()->isPointerType()) + const CallEvent &Call) const { + if (!Call.getArgExpr(2)->getType()->isPointerType()) return; ProgramStateRef State = C.getState(); @@ -2338,7 +2355,7 @@ void CStringChecker::evalStdCopyCommon(CheckerContext &C, // _OutputIterator __result) // Invalidate the destination buffer - const Expr *Dst = CE->getArg(2); + const Expr *Dst = Call.getArgExpr(2); SVal DstVal = State->getSVal(Dst, LCtx); // FIXME: As we do not know how many items are copied, we also invalidate the // super region containing the target location. @@ -2347,19 +2364,21 @@ void CStringChecker::evalStdCopyCommon(CheckerContext &C, SValBuilder &SVB = C.getSValBuilder(); - SVal ResultVal = SVB.conjureSymbolVal(nullptr, CE, LCtx, C.blockCount()); - State = State->BindExpr(CE, LCtx, ResultVal); + SVal ResultVal = + SVB.conjureSymbolVal(nullptr, Call.getOriginExpr(), LCtx, C.blockCount()); + State = State->BindExpr(Call.getOriginExpr(), LCtx, ResultVal); C.addTransition(State); } -void CStringChecker::evalMemset(CheckerContext &C, const CallExpr *CE) const { +void CStringChecker::evalMemset(CheckerContext &C, + const CallEvent &Call) const { // void *memset(void *s, int c, size_t n); CurrentFunctionDescription = "memory set function"; - DestinationArgExpr Buffer = {{CE->getArg(0), 0}}; - AnyArgExpr CharE = {CE->getArg(1), 1}; - SizeArgExpr Size = {{CE->getArg(2), 2}}; + DestinationArgExpr Buffer = {{Call.getArgExpr(0), 0}}; + AnyArgExpr CharE = {Call.getArgExpr(1), 1}; + SizeArgExpr Size = {{Call.getArgExpr(2), 2}}; ProgramStateRef State = C.getState(); @@ -2377,7 +2396,7 @@ void CStringChecker::evalMemset(CheckerContext &C, const CallExpr *CE) const { // If the size is zero, there won't be any actual memory access, so // just bind the return value to the buffer and return. if (ZeroSize && !NonZeroSize) { - ZeroSize = ZeroSize->BindExpr(CE, LCtx, BufferPtrVal); + ZeroSize = ZeroSize->BindExpr(Call.getOriginExpr(), LCtx, BufferPtrVal); C.addTransition(ZeroSize); return; } @@ -2399,15 +2418,15 @@ void CStringChecker::evalMemset(CheckerContext &C, const CallExpr *CE) const { Size.Expression, C, State)) return; - State = State->BindExpr(CE, LCtx, BufferPtrVal); + State = State->BindExpr(Call.getOriginExpr(), LCtx, BufferPtrVal); C.addTransition(State); } -void CStringChecker::evalBzero(CheckerContext &C, const CallExpr *CE) const { +void CStringChecker::evalBzero(CheckerContext &C, const CallEvent &Call) const { CurrentFunctionDescription = "memory clearance function"; - DestinationArgExpr Buffer = {{CE->getArg(0), 0}}; - SizeArgExpr Size = {{CE->getArg(1), 1}}; + DestinationArgExpr Buffer = {{Call.getArgExpr(0), 0}}; + SizeArgExpr Size = {{Call.getArgExpr(1), 1}}; SVal Zero = C.getSValBuilder().makeZeroVal(C.getASTContext().IntTy); ProgramStateRef State = C.getState(); @@ -2446,24 +2465,29 @@ void CStringChecker::evalBzero(CheckerContext &C, const CallExpr *CE) const { C.addTransition(State); } -void CStringChecker::evalSprintf(CheckerContext &C, const CallExpr *CE) const { +void CStringChecker::evalSprintf(CheckerContext &C, + const CallEvent &Call) const { CurrentFunctionDescription = "'sprintf'"; + const auto *CE = cast<CallExpr>(Call.getOriginExpr()); bool IsBI = CE->getBuiltinCallee() == Builtin::BI__builtin___sprintf_chk; - evalSprintfCommon(C, CE, /* IsBounded */ false, IsBI); + evalSprintfCommon(C, Call, /* IsBounded */ false, IsBI); } -void CStringChecker::evalSnprintf(CheckerContext &C, const CallExpr *CE) const { +void CStringChecker::evalSnprintf(CheckerContext &C, + const CallEvent &Call) const { CurrentFunctionDescription = "'snprintf'"; + const auto *CE = cast<CallExpr>(Call.getOriginExpr()); bool IsBI = CE->getBuiltinCallee() == Builtin::BI__builtin___snprintf_chk; - evalSprintfCommon(C, CE, /* IsBounded */ true, IsBI); + evalSprintfCommon(C, Call, /* IsBounded */ true, IsBI); } -void CStringChecker::evalSprintfCommon(CheckerContext &C, const CallExpr *CE, +void CStringChecker::evalSprintfCommon(CheckerContext &C, const CallEvent &Call, bool IsBounded, bool IsBuiltin) const { ProgramStateRef State = C.getState(); - DestinationArgExpr Dest = {{CE->getArg(0), 0}}; + const auto *CE = cast<CallExpr>(Call.getOriginExpr()); + DestinationArgExpr Dest = {{Call.getArgExpr(0), 0}}; - const auto NumParams = CE->getCalleeDecl()->getAsFunction()->getNumParams(); + const auto NumParams = Call.parameters().size(); assert(CE->getNumArgs() >= NumParams); const auto AllArguments = @@ -2483,7 +2507,7 @@ void CStringChecker::evalSprintfCommon(CheckerContext &C, const CallExpr *CE, {Source.Expression, Source.ArgumentIndex}}; State = CheckOverlap( C, State, - (IsBounded ? SizeArgExpr{{CE->getArg(1), 1}} : SrcExprAsSizeDummy), + (IsBounded ? SizeArgExpr{{Call.getArgExpr(1), 1}} : SrcExprAsSizeDummy), Dest, Source); if (!State) return; @@ -2536,8 +2560,8 @@ bool CStringChecker::evalCall(const CallEvent &Call, CheckerContext &C) const { return false; // Check and evaluate the call. - const auto *CE = cast<CallExpr>(Call.getOriginExpr()); - Callback(this, C, CE); + assert(isa<CallExpr>(Call.getOriginExpr())); + Callback(this, C, Call); // If the evaluate call resulted in no change, chain to the next eval call // handler. diff --git a/clang/lib/StaticAnalyzer/Checkers/CXXDeleteChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/CXXDeleteChecker.cpp index eb265f4dde68..b4dee1e300e8 100644 --- a/clang/lib/StaticAnalyzer/Checkers/CXXDeleteChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/CXXDeleteChecker.cpp @@ -31,6 +31,7 @@ #include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" #include "clang/StaticAnalyzer/Core/BugReporter/BugReporter.h" #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" +#include "clang/StaticAnalyzer/Core/BugReporter/CommonBugCategories.h" #include "clang/StaticAnalyzer/Core/Checker.h" #include "clang/StaticAnalyzer/Core/CheckerManager.h" #include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h" @@ -65,7 +66,8 @@ public: }; class DeleteWithNonVirtualDtorChecker : public CXXDeleteChecker { - mutable std::unique_ptr<BugType> BT; + const BugType BT{ + this, "Destruction of a polymorphic object with no virtual destructor"}; void checkTypedDeleteExpr(const CXXDeleteExpr *DE, CheckerContext &C, @@ -74,7 +76,8 @@ class DeleteWithNonVirtualDtorChecker : public CXXDeleteChecker { }; class CXXArrayDeleteChecker : public CXXDeleteChecker { - mutable std::unique_ptr<BugType> BT; + const BugType BT{this, + "Deleting an array of polymorphic objects is undefined"}; void checkTypedDeleteExpr(const CXXDeleteExpr *DE, CheckerContext &C, @@ -123,17 +126,10 @@ void DeleteWithNonVirtualDtorChecker::checkTypedDeleteExpr( if (!DerivedClass->isDerivedFrom(BaseClass)) return; - if (!BT) - BT.reset(new BugType(this, - "Destruction of a polymorphic object with no " - "virtual destructor", - "Logic error")); - ExplodedNode *N = C.generateNonFatalErrorNode(); if (!N) return; - auto R = - std::make_unique<PathSensitiveBugReport>(*BT, BT->getDescription(), N); + auto R = std::make_unique<PathSensitiveBugReport>(BT, BT.getDescription(), N); // Mark region of problematic base class for later use in the BugVisitor. R->markInteresting(BaseClassRegion); @@ -160,12 +156,6 @@ void CXXArrayDeleteChecker::checkTypedDeleteExpr( if (!DerivedClass->isDerivedFrom(BaseClass)) return; - if (!BT) - BT.reset(new BugType(this, - "Deleting an array of polymorphic objects " - "is undefined", - "Logic error")); - ExplodedNode *N = C.generateNonFatalErrorNode(); if (!N) return; @@ -182,7 +172,7 @@ void CXXArrayDeleteChecker::checkTypedDeleteExpr( << SourceType.getAsString(C.getASTContext().getPrintingPolicy()) << "' is undefined"; - auto R = std::make_unique<PathSensitiveBugReport>(*BT, OS.str(), N); + auto R = std::make_unique<PathSensitiveBugReport>(BT, OS.str(), N); // Mark region of problematic base class for later use in the BugVisitor. R->markInteresting(BaseClassRegion); diff --git a/clang/lib/StaticAnalyzer/Checkers/CallAndMessageChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/CallAndMessageChecker.cpp index ea74256935ca..f2e1f69c32cf 100644 --- a/clang/lib/StaticAnalyzer/Checkers/CallAndMessageChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/CallAndMessageChecker.cpp @@ -125,9 +125,8 @@ private: if (!BT) BT.reset(new BugType(OriginalName, desc)); } - bool uninitRefOrPointer(CheckerContext &C, const SVal &V, - SourceRange ArgRange, const Expr *ArgEx, - std::unique_ptr<BugType> &BT, + bool uninitRefOrPointer(CheckerContext &C, SVal V, SourceRange ArgRange, + const Expr *ArgEx, std::unique_ptr<BugType> &BT, const ParmVarDecl *ParamDecl, const char *BD, int ArgumentNumber) const; }; @@ -185,7 +184,7 @@ static void describeUninitializedArgumentInCall(const CallEvent &Call, } bool CallAndMessageChecker::uninitRefOrPointer( - CheckerContext &C, const SVal &V, SourceRange ArgRange, const Expr *ArgEx, + CheckerContext &C, SVal V, SourceRange ArgRange, const Expr *ArgEx, std::unique_ptr<BugType> &BT, const ParmVarDecl *ParamDecl, const char *BD, int ArgumentNumber) const { @@ -263,7 +262,7 @@ public: if (Find(FR)) return true; } else { - const SVal &V = StoreMgr.getBinding(store, loc::MemRegionVal(FR)); + SVal V = StoreMgr.getBinding(store, loc::MemRegionVal(FR)); if (V.isUndef()) return true; } diff --git a/clang/lib/StaticAnalyzer/Checkers/CastSizeChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/CastSizeChecker.cpp index d1d4f3baf6a8..a50772f881f7 100644 --- a/clang/lib/StaticAnalyzer/Checkers/CastSizeChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/CastSizeChecker.cpp @@ -24,7 +24,7 @@ using namespace ento; namespace { class CastSizeChecker : public Checker< check::PreStmt<CastExpr> > { - mutable std::unique_ptr<BugType> BT; + const BugType BT{this, "Cast region with wrong size."}; public: void checkPreStmt(const CastExpr *CE, CheckerContext &C) const; @@ -131,12 +131,10 @@ void CastSizeChecker::checkPreStmt(const CastExpr *CE,CheckerContext &C) const { return; if (ExplodedNode *errorNode = C.generateErrorNode()) { - if (!BT) - BT.reset(new BugType(this, "Cast region with wrong size.")); constexpr llvm::StringLiteral Msg = "Cast a region whose size is not a multiple of the destination type " "size."; - auto R = std::make_unique<PathSensitiveBugReport>(*BT, Msg, errorNode); + auto R = std::make_unique<PathSensitiveBugReport>(BT, Msg, errorNode); R->addRange(CE->getSourceRange()); C.emitReport(std::move(R)); } diff --git a/clang/lib/StaticAnalyzer/Checkers/CheckObjCDealloc.cpp b/clang/lib/StaticAnalyzer/Checkers/CheckObjCDealloc.cpp index fedc6db3723a..978bc0bb082f 100644 --- a/clang/lib/StaticAnalyzer/Checkers/CheckObjCDealloc.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/CheckObjCDealloc.cpp @@ -99,18 +99,23 @@ class ObjCDeallocChecker check::PointerEscape, check::PreStmt<ReturnStmt>> { - mutable IdentifierInfo *NSObjectII, *SenTestCaseII, *XCTestCaseII, - *Block_releaseII, *CIFilterII; - - mutable Selector DeallocSel, ReleaseSel; - - std::unique_ptr<BugType> MissingReleaseBugType; - std::unique_ptr<BugType> ExtraReleaseBugType; - std::unique_ptr<BugType> MistakenDeallocBugType; + mutable const IdentifierInfo *NSObjectII = nullptr; + mutable const IdentifierInfo *SenTestCaseII = nullptr; + mutable const IdentifierInfo *XCTestCaseII = nullptr; + mutable const IdentifierInfo *Block_releaseII = nullptr; + mutable const IdentifierInfo *CIFilterII = nullptr; + + mutable Selector DeallocSel; + mutable Selector ReleaseSel; + + const BugType MissingReleaseBugType{this, "Missing ivar release (leak)", + categories::MemoryRefCount}; + const BugType ExtraReleaseBugType{this, "Extra ivar release", + categories::MemoryRefCount}; + const BugType MistakenDeallocBugType{this, "Mistaken dealloc", + categories::MemoryRefCount}; public: - ObjCDeallocChecker(); - void checkASTDecl(const ObjCImplementationDecl *D, AnalysisManager& Mgr, BugReporter &BR) const; void checkBeginFunction(CheckerContext &Ctx) const; @@ -579,7 +584,7 @@ void ObjCDeallocChecker::diagnoseMissingReleases(CheckerContext &C) const { OS << " by a synthesized property but not released" " before '[super dealloc]'"; - auto BR = std::make_unique<PathSensitiveBugReport>(*MissingReleaseBugType, + auto BR = std::make_unique<PathSensitiveBugReport>(MissingReleaseBugType, OS.str(), ErrNode); C.emitReport(std::move(BR)); } @@ -701,7 +706,7 @@ bool ObjCDeallocChecker::diagnoseExtraRelease(SymbolRef ReleasedValue, OS << " property but was released in 'dealloc'"; } - auto BR = std::make_unique<PathSensitiveBugReport>(*ExtraReleaseBugType, + auto BR = std::make_unique<PathSensitiveBugReport>(ExtraReleaseBugType, OS.str(), ErrNode); BR->addRange(M.getOriginExpr()->getSourceRange()); @@ -743,7 +748,7 @@ bool ObjCDeallocChecker::diagnoseMistakenDealloc(SymbolRef DeallocedValue, OS << "'" << *PropImpl->getPropertyIvarDecl() << "' should be released rather than deallocated"; - auto BR = std::make_unique<PathSensitiveBugReport>(*MistakenDeallocBugType, + auto BR = std::make_unique<PathSensitiveBugReport>(MistakenDeallocBugType, OS.str(), ErrNode); BR->addRange(M.getOriginExpr()->getSourceRange()); @@ -752,23 +757,6 @@ bool ObjCDeallocChecker::diagnoseMistakenDealloc(SymbolRef DeallocedValue, return true; } -ObjCDeallocChecker::ObjCDeallocChecker() - : NSObjectII(nullptr), SenTestCaseII(nullptr), XCTestCaseII(nullptr), - Block_releaseII(nullptr), CIFilterII(nullptr) { - - MissingReleaseBugType.reset( - new BugType(this, "Missing ivar release (leak)", - categories::MemoryRefCount)); - - ExtraReleaseBugType.reset( - new BugType(this, "Extra ivar release", - categories::MemoryRefCount)); - - MistakenDeallocBugType.reset( - new BugType(this, "Mistaken dealloc", - categories::MemoryRefCount)); -} - void ObjCDeallocChecker::initIdentifierInfoAndSelectors( ASTContext &Ctx) const { if (NSObjectII) diff --git a/clang/lib/StaticAnalyzer/Checkers/CheckSecuritySyntaxOnly.cpp b/clang/lib/StaticAnalyzer/Checkers/CheckSecuritySyntaxOnly.cpp index afc5e6b48008..ce05d2d3c905 100644 --- a/clang/lib/StaticAnalyzer/Checkers/CheckSecuritySyntaxOnly.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/CheckSecuritySyntaxOnly.cpp @@ -140,8 +140,7 @@ void WalkAST::VisitCallExpr(CallExpr *CE) { if (!II) // if no identifier, not a simple C function return; StringRef Name = II->getName(); - if (Name.starts_with("__builtin_")) - Name = Name.substr(10); + Name.consume_front("__builtin_"); // Set the evaluation function by switching on the callee name. FnCheck evalFunction = @@ -763,8 +762,7 @@ void WalkAST::checkDeprecatedOrUnsafeBufferHandling(const CallExpr *CE, enum { DEPR_ONLY = -1, UNKNOWN_CALL = -2 }; StringRef Name = FD->getIdentifier()->getName(); - if (Name.starts_with("__builtin_")) - Name = Name.substr(10); + Name.consume_front("__builtin_"); int ArgIndex = llvm::StringSwitch<int>(Name) diff --git a/clang/lib/StaticAnalyzer/Checkers/ChrootChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/ChrootChecker.cpp index 9e11d8d9ecbc..be7be15022d3 100644 --- a/clang/lib/StaticAnalyzer/Checkers/ChrootChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/ChrootChecker.cpp @@ -41,7 +41,7 @@ bool isRootChanged(intptr_t k) { return k == ROOT_CHANGED; } // bug<--foo()-- JAIL_ENTERED<--foo()-- class ChrootChecker : public Checker<eval::Call, check::PreCall> { // This bug refers to possibly break out of a chroot() jail. - mutable std::unique_ptr<BugType> BT_BreakJail; + const BugType BT_BreakJail{this, "Break out of jail"}; const CallDescription Chroot{{"chroot"}, 1}, Chdir{{"chdir"}, 1}; @@ -124,12 +124,10 @@ void ChrootChecker::checkPreCall(const CallEvent &Call, if (k) if (isRootChanged((intptr_t) *k)) if (ExplodedNode *N = C.generateNonFatalErrorNode()) { - if (!BT_BreakJail) - BT_BreakJail.reset(new BugType(this, "Break out of jail")); constexpr llvm::StringLiteral Msg = "No call of chdir(\"/\") immediately after chroot"; C.emitReport( - std::make_unique<PathSensitiveBugReport>(*BT_BreakJail, Msg, N)); + std::make_unique<PathSensitiveBugReport>(BT_BreakJail, Msg, N)); } } diff --git a/clang/lib/StaticAnalyzer/Checkers/CloneChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/CloneChecker.cpp index 0e21ea7e90c9..6692a45a09f7 100644 --- a/clang/lib/StaticAnalyzer/Checkers/CloneChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/CloneChecker.cpp @@ -35,7 +35,8 @@ public: private: mutable CloneDetector Detector; - mutable std::unique_ptr<BugType> BT_Exact, BT_Suspicious; + const BugType BT_Exact{this, "Exact code clone", "Code clone"}; + const BugType BT_Suspicious{this, "Suspicious code clone", "Code clone"}; public: void checkASTCodeBody(const Decl *D, AnalysisManager &Mgr, @@ -107,15 +108,11 @@ static PathDiagnosticLocation makeLocation(const StmtSequence &S, void CloneChecker::reportClones( BugReporter &BR, AnalysisManager &Mgr, std::vector<CloneDetector::CloneGroup> &CloneGroups) const { - - if (!BT_Exact) - BT_Exact.reset(new BugType(this, "Exact code clone", "Code clone")); - for (const CloneDetector::CloneGroup &Group : CloneGroups) { // We group the clones by printing the first as a warning and all others // as a note. auto R = std::make_unique<BasicBugReport>( - *BT_Exact, "Duplicate code detected", makeLocation(Group.front(), Mgr)); + BT_Exact, "Duplicate code detected", makeLocation(Group.front(), Mgr)); R->addRange(Group.front().getSourceRange()); for (unsigned i = 1; i < Group.size(); ++i) @@ -154,10 +151,6 @@ void CloneChecker::reportSuspiciousClones( } } - if (!BT_Suspicious) - BT_Suspicious.reset( - new BugType(this, "Suspicious code clone", "Code clone")); - ASTContext &ACtx = BR.getContext(); SourceManager &SM = ACtx.getSourceManager(); AnalysisDeclContext *ADC = @@ -170,7 +163,7 @@ void CloneChecker::reportSuspiciousClones( // Think how to perform more accurate suggestions? auto R = std::make_unique<BasicBugReport>( - *BT_Suspicious, + BT_Suspicious, "Potential copy-paste error; did you really mean to use '" + Pair.FirstCloneInfo.Variable->getNameAsString() + "' here?", PathDiagnosticLocation::createBegin(Pair.FirstCloneInfo.Mention, SM, diff --git a/clang/lib/StaticAnalyzer/Checkers/ConversionChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/ConversionChecker.cpp index 8b34b41bab21..eca8d3cc0722 100644 --- a/clang/lib/StaticAnalyzer/Checkers/ConversionChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/ConversionChecker.cpp @@ -42,7 +42,7 @@ public: void checkPreStmt(const ImplicitCastExpr *Cast, CheckerContext &C) const; private: - mutable std::unique_ptr<BugType> BT; + const BugType BT{this, "Conversion"}; bool isLossOfPrecision(const ImplicitCastExpr *Cast, QualType DestType, CheckerContext &C) const; @@ -126,11 +126,8 @@ void ConversionChecker::checkPreStmt(const ImplicitCastExpr *Cast, void ConversionChecker::reportBug(ExplodedNode *N, const Expr *E, CheckerContext &C, const char Msg[]) const { - if (!BT) - BT.reset(new BugType(this, "Conversion")); - // Generate a report for this bug. - auto R = std::make_unique<PathSensitiveBugReport>(*BT, Msg, N); + auto R = std::make_unique<PathSensitiveBugReport>(BT, Msg, N); bugreporter::trackExpressionValue(N, E, *R); C.emitReport(std::move(R)); } diff --git a/clang/lib/StaticAnalyzer/Checkers/DebugContainerModeling.cpp b/clang/lib/StaticAnalyzer/Checkers/DebugContainerModeling.cpp index 832bb78c4803..97f769b1c451 100644 --- a/clang/lib/StaticAnalyzer/Checkers/DebugContainerModeling.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/DebugContainerModeling.cpp @@ -28,7 +28,8 @@ namespace { class DebugContainerModeling : public Checker<eval::Call> { - std::unique_ptr<BugType> DebugMsgBugType; + const BugType DebugMsgBugType{this, "Checking analyzer assumptions", "debug", + /*SuppressOnSink=*/true}; template <typename Getter> void analyzerContainerDataField(const CallExpr *CE, CheckerContext &C, @@ -48,19 +49,11 @@ class DebugContainerModeling }; public: - DebugContainerModeling(); - bool evalCall(const CallEvent &Call, CheckerContext &C) const; }; } //namespace -DebugContainerModeling::DebugContainerModeling() { - DebugMsgBugType.reset( - new BugType(this, "Checking analyzer assumptions", "debug", - /*SuppressOnSink=*/true)); -} - bool DebugContainerModeling::evalCall(const CallEvent &Call, CheckerContext &C) const { const auto *CE = dyn_cast_or_null<CallExpr>(Call.getOriginExpr()); @@ -137,8 +130,8 @@ ExplodedNode *DebugContainerModeling::reportDebugMsg(llvm::StringRef Msg, return nullptr; auto &BR = C.getBugReporter(); - BR.emitReport(std::make_unique<PathSensitiveBugReport>(*DebugMsgBugType, - Msg, N)); + BR.emitReport( + std::make_unique<PathSensitiveBugReport>(DebugMsgBugType, Msg, N)); return N; } diff --git a/clang/lib/StaticAnalyzer/Checkers/DebugIteratorModeling.cpp b/clang/lib/StaticAnalyzer/Checkers/DebugIteratorModeling.cpp index d05298b42c55..ff479c7b0ac8 100644 --- a/clang/lib/StaticAnalyzer/Checkers/DebugIteratorModeling.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/DebugIteratorModeling.cpp @@ -28,7 +28,8 @@ namespace { class DebugIteratorModeling : public Checker<eval::Call> { - std::unique_ptr<BugType> DebugMsgBugType; + const BugType DebugMsgBugType{this, "Checking analyzer assumptions", "debug", + /*SuppressOnSink=*/true}; template <typename Getter> void analyzerIteratorDataField(const CallExpr *CE, CheckerContext &C, @@ -51,19 +52,11 @@ class DebugIteratorModeling }; public: - DebugIteratorModeling(); - bool evalCall(const CallEvent &Call, CheckerContext &C) const; }; } //namespace -DebugIteratorModeling::DebugIteratorModeling() { - DebugMsgBugType.reset( - new BugType(this, "Checking analyzer assumptions", "debug", - /*SuppressOnSink=*/true)); -} - bool DebugIteratorModeling::evalCall(const CallEvent &Call, CheckerContext &C) const { const auto *CE = dyn_cast_or_null<CallExpr>(Call.getOriginExpr()); @@ -131,8 +124,8 @@ ExplodedNode *DebugIteratorModeling::reportDebugMsg(llvm::StringRef Msg, return nullptr; auto &BR = C.getBugReporter(); - BR.emitReport(std::make_unique<PathSensitiveBugReport>(*DebugMsgBugType, - Msg, N)); + BR.emitReport( + std::make_unique<PathSensitiveBugReport>(DebugMsgBugType, Msg, N)); return N; } diff --git a/clang/lib/StaticAnalyzer/Checkers/DivZeroChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/DivZeroChecker.cpp index 5331d9574743..5496f087447f 100644 --- a/clang/lib/StaticAnalyzer/Checkers/DivZeroChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/DivZeroChecker.cpp @@ -14,6 +14,7 @@ #include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" #include "clang/StaticAnalyzer/Checkers/Taint.h" #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" +#include "clang/StaticAnalyzer/Core/BugReporter/CommonBugCategories.h" #include "clang/StaticAnalyzer/Core/Checker.h" #include "clang/StaticAnalyzer/Core/CheckerManager.h" #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" @@ -25,8 +26,8 @@ using namespace taint; namespace { class DivZeroChecker : public Checker< check::PreStmt<BinaryOperator> > { - mutable std::unique_ptr<BugType> BT; - mutable std::unique_ptr<BugType> TaintBT; + const BugType BT{this, "Division by zero"}; + const BugType TaintBT{this, "Division by zero", categories::TaintedData}; void reportBug(StringRef Msg, ProgramStateRef StateZero, CheckerContext &C) const; void reportTaintBug(StringRef Msg, ProgramStateRef StateZero, @@ -48,10 +49,7 @@ static const Expr *getDenomExpr(const ExplodedNode *N) { void DivZeroChecker::reportBug(StringRef Msg, ProgramStateRef StateZero, CheckerContext &C) const { if (ExplodedNode *N = C.generateErrorNode(StateZero)) { - if (!BT) - BT.reset(new BugType(this, "Division by zero", categories::LogicError)); - - auto R = std::make_unique<PathSensitiveBugReport>(*BT, Msg, N); + auto R = std::make_unique<PathSensitiveBugReport>(BT, Msg, N); bugreporter::trackExpressionValue(N, getDenomExpr(N), *R); C.emitReport(std::move(R)); } @@ -61,11 +59,7 @@ void DivZeroChecker::reportTaintBug( StringRef Msg, ProgramStateRef StateZero, CheckerContext &C, llvm::ArrayRef<SymbolRef> TaintedSyms) const { if (ExplodedNode *N = C.generateErrorNode(StateZero)) { - if (!TaintBT) - TaintBT.reset( - new BugType(this, "Division by zero", categories::TaintedData)); - - auto R = std::make_unique<PathSensitiveBugReport>(*TaintBT, Msg, N); + auto R = std::make_unique<PathSensitiveBugReport>(TaintBT, Msg, N); bugreporter::trackExpressionValue(N, getDenomExpr(N), *R); for (auto Sym : TaintedSyms) R->markInteresting(Sym); diff --git a/clang/lib/StaticAnalyzer/Checkers/DynamicTypeChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/DynamicTypeChecker.cpp index dbc930d7d37b..0ad307d3ebd5 100644 --- a/clang/lib/StaticAnalyzer/Checkers/DynamicTypeChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/DynamicTypeChecker.cpp @@ -30,12 +30,7 @@ using namespace ento; namespace { class DynamicTypeChecker : public Checker<check::PostStmt<ImplicitCastExpr>> { - mutable std::unique_ptr<BugType> BT; - void initBugType() const { - if (!BT) - BT.reset( - new BugType(this, "Dynamic and static type mismatch", "Type Error")); - } + const BugType BT{this, "Dynamic and static type mismatch", "Type Error"}; class DynamicTypeBugVisitor : public BugReporterVisitor { public: @@ -70,7 +65,6 @@ void DynamicTypeChecker::reportTypeError(QualType DynamicType, const MemRegion *Reg, const Stmt *ReportedNode, CheckerContext &C) const { - initBugType(); SmallString<192> Buf; llvm::raw_svector_ostream OS(Buf); OS << "Object has a dynamic type '"; @@ -81,7 +75,7 @@ void DynamicTypeChecker::reportTypeError(QualType DynamicType, llvm::Twine()); OS << "'"; auto R = std::make_unique<PathSensitiveBugReport>( - *BT, OS.str(), C.generateNonFatalErrorNode()); + BT, OS.str(), C.generateNonFatalErrorNode()); R->markInteresting(Reg); R->addVisitor(std::make_unique<DynamicTypeBugVisitor>(Reg)); R->addRange(ReportedNode->getSourceRange()); diff --git a/clang/lib/StaticAnalyzer/Checkers/EnumCastOutOfRangeChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/EnumCastOutOfRangeChecker.cpp index 7c51673422a0..0fa20428c1b5 100644 --- a/clang/lib/StaticAnalyzer/Checkers/EnumCastOutOfRangeChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/EnumCastOutOfRangeChecker.cpp @@ -60,7 +60,7 @@ public: // Being conservative, it does not warn if there is slight possibility the // value can be matching. class EnumCastOutOfRangeChecker : public Checker<check::PreStmt<CastExpr>> { - mutable std::unique_ptr<BugType> EnumValueCastOutOfRange; + const BugType EnumValueCastOutOfRange{this, "Enum cast out of range"}; void reportWarning(CheckerContext &C, const CastExpr *CE, const EnumDecl *E) const; @@ -85,10 +85,6 @@ void EnumCastOutOfRangeChecker::reportWarning(CheckerContext &C, const EnumDecl *E) const { assert(E && "valid EnumDecl* is expected"); if (const ExplodedNode *N = C.generateNonFatalErrorNode()) { - if (!EnumValueCastOutOfRange) - EnumValueCastOutOfRange.reset( - new BugType(this, "Enum cast out of range")); - std::string ValueStr = "", NameStr = "the enum"; // Try to add details to the message: @@ -105,7 +101,7 @@ void EnumCastOutOfRangeChecker::reportWarning(CheckerContext &C, "not in the valid range of values for {1}", ValueStr, NameStr); - auto BR = std::make_unique<PathSensitiveBugReport>(*EnumValueCastOutOfRange, + auto BR = std::make_unique<PathSensitiveBugReport>(EnumValueCastOutOfRange, Msg, N); bugreporter::trackExpressionValue(N, CE->getSubExpr(), *BR); BR->addNote("enum declared here", diff --git a/clang/lib/StaticAnalyzer/Checkers/ExprInspectionChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/ExprInspectionChecker.cpp index 2c7bacac33a6..3096999e9fd1 100644 --- a/clang/lib/StaticAnalyzer/Checkers/ExprInspectionChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/ExprInspectionChecker.cpp @@ -25,7 +25,7 @@ using namespace ento; namespace { class ExprInspectionChecker : public Checker<eval::Call, check::DeadSymbols, check::EndAnalysis> { - mutable std::unique_ptr<BugType> BT; + const BugType BT{this, "Checking analyzer assumptions", "debug"}; // These stats are per-analysis, not per-branch, hence they shouldn't // stay inside the program state. @@ -176,11 +176,7 @@ ExprInspectionChecker::reportBug(llvm::StringRef Msg, BugReporter &BR, std::optional<SVal> ExprVal) const { if (!N) return nullptr; - - if (!BT) - BT.reset(new BugType(this, "Checking analyzer assumptions", "debug")); - - auto R = std::make_unique<PathSensitiveBugReport>(*BT, Msg, N); + auto R = std::make_unique<PathSensitiveBugReport>(BT, Msg, N); if (ExprVal) { R->markInteresting(*ExprVal); } diff --git a/clang/lib/StaticAnalyzer/Checkers/FixedAddressChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/FixedAddressChecker.cpp index 2ee201b64008..7aefcdc6d358 100644 --- a/clang/lib/StaticAnalyzer/Checkers/FixedAddressChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/FixedAddressChecker.cpp @@ -24,7 +24,7 @@ using namespace ento; namespace { class FixedAddressChecker : public Checker< check::PreStmt<BinaryOperator> > { - mutable std::unique_ptr<BugType> BT; + const BugType BT{this, "Use fixed address"}; public: void checkPreStmt(const BinaryOperator *B, CheckerContext &C) const; @@ -50,12 +50,10 @@ void FixedAddressChecker::checkPreStmt(const BinaryOperator *B, if (ExplodedNode *N = C.generateNonFatalErrorNode()) { // FIXME: improve grammar in the following strings: - if (!BT) - BT.reset(new BugType(this, "Use fixed address")); constexpr llvm::StringLiteral Msg = "Using a fixed address is not portable because that address will " "probably not be valid in all environments or platforms."; - auto R = std::make_unique<PathSensitiveBugReport>(*BT, Msg, N); + auto R = std::make_unique<PathSensitiveBugReport>(BT, Msg, N); R->addRange(B->getRHS()->getSourceRange()); C.emitReport(std::move(R)); } diff --git a/clang/lib/StaticAnalyzer/Checkers/InvalidatedIteratorChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/InvalidatedIteratorChecker.cpp index 6955ba11a28f..3f5856a3efbe 100644 --- a/clang/lib/StaticAnalyzer/Checkers/InvalidatedIteratorChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/InvalidatedIteratorChecker.cpp @@ -31,14 +31,14 @@ class InvalidatedIteratorChecker check::PreStmt<ArraySubscriptExpr>, check::PreStmt<MemberExpr>> { - std::unique_ptr<BugType> InvalidatedBugType; + const BugType InvalidatedBugType{this, "Iterator invalidated", + "Misuse of STL APIs"}; - void verifyAccess(CheckerContext &C, const SVal &Val) const; - void reportBug(const StringRef &Message, const SVal &Val, - CheckerContext &C, ExplodedNode *ErrNode) const; -public: - InvalidatedIteratorChecker(); + void verifyAccess(CheckerContext &C, SVal Val) const; + void reportBug(StringRef Message, SVal Val, CheckerContext &C, + ExplodedNode *ErrNode) const; +public: void checkPreCall(const CallEvent &Call, CheckerContext &C) const; void checkPreStmt(const UnaryOperator *UO, CheckerContext &C) const; void checkPreStmt(const BinaryOperator *BO, CheckerContext &C) const; @@ -49,11 +49,6 @@ public: } //namespace -InvalidatedIteratorChecker::InvalidatedIteratorChecker() { - InvalidatedBugType.reset( - new BugType(this, "Iterator invalidated", "Misuse of STL APIs")); -} - void InvalidatedIteratorChecker::checkPreCall(const CallEvent &Call, CheckerContext &C) const { // Check for access of invalidated position @@ -114,7 +109,8 @@ void InvalidatedIteratorChecker::checkPreStmt(const MemberExpr *ME, verifyAccess(C, BaseVal); } -void InvalidatedIteratorChecker::verifyAccess(CheckerContext &C, const SVal &Val) const { +void InvalidatedIteratorChecker::verifyAccess(CheckerContext &C, + SVal Val) const { auto State = C.getState(); const auto *Pos = getIteratorPosition(State, Val); if (Pos && !Pos->isValid()) { @@ -126,11 +122,11 @@ void InvalidatedIteratorChecker::verifyAccess(CheckerContext &C, const SVal &Val } } -void InvalidatedIteratorChecker::reportBug(const StringRef &Message, - const SVal &Val, CheckerContext &C, +void InvalidatedIteratorChecker::reportBug(StringRef Message, SVal Val, + CheckerContext &C, ExplodedNode *ErrNode) const { - auto R = std::make_unique<PathSensitiveBugReport>(*InvalidatedBugType, - Message, ErrNode); + auto R = std::make_unique<PathSensitiveBugReport>(InvalidatedBugType, Message, + ErrNode); R->markInteresting(Val); C.emitReport(std::move(R)); } diff --git a/clang/lib/StaticAnalyzer/Checkers/Iterator.cpp b/clang/lib/StaticAnalyzer/Checkers/Iterator.cpp index 90047a2899a7..e8d35aac2efd 100644 --- a/clang/lib/StaticAnalyzer/Checkers/Iterator.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/Iterator.cpp @@ -181,8 +181,7 @@ const ContainerData *getContainerData(ProgramStateRef State, return State->get<ContainerMap>(Cont); } -const IteratorPosition *getIteratorPosition(ProgramStateRef State, - const SVal &Val) { +const IteratorPosition *getIteratorPosition(ProgramStateRef State, SVal Val) { if (auto Reg = Val.getAsRegion()) { Reg = Reg->getMostDerivedObjectRegion(); return State->get<IteratorRegionMap>(Reg); @@ -194,7 +193,7 @@ const IteratorPosition *getIteratorPosition(ProgramStateRef State, return nullptr; } -ProgramStateRef setIteratorPosition(ProgramStateRef State, const SVal &Val, +ProgramStateRef setIteratorPosition(ProgramStateRef State, SVal Val, const IteratorPosition &Pos) { if (auto Reg = Val.getAsRegion()) { Reg = Reg->getMostDerivedObjectRegion(); @@ -207,8 +206,8 @@ ProgramStateRef setIteratorPosition(ProgramStateRef State, const SVal &Val, return nullptr; } -ProgramStateRef createIteratorPosition(ProgramStateRef State, const SVal &Val, - const MemRegion *Cont, const Stmt* S, +ProgramStateRef createIteratorPosition(ProgramStateRef State, SVal Val, + const MemRegion *Cont, const Stmt *S, const LocationContext *LCtx, unsigned blockCount) { auto &StateMgr = State->getStateManager(); @@ -221,9 +220,8 @@ ProgramStateRef createIteratorPosition(ProgramStateRef State, const SVal &Val, IteratorPosition::getPosition(Cont, Sym)); } -ProgramStateRef advancePosition(ProgramStateRef State, const SVal &Iter, - OverloadedOperatorKind Op, - const SVal &Distance) { +ProgramStateRef advancePosition(ProgramStateRef State, SVal Iter, + OverloadedOperatorKind Op, SVal Distance) { const auto *Pos = getIteratorPosition(State, Iter); if (!Pos) return nullptr; diff --git a/clang/lib/StaticAnalyzer/Checkers/Iterator.h b/clang/lib/StaticAnalyzer/Checkers/Iterator.h index 353daf0bed08..46de8ea01d77 100644 --- a/clang/lib/StaticAnalyzer/Checkers/Iterator.h +++ b/clang/lib/StaticAnalyzer/Checkers/Iterator.h @@ -161,18 +161,15 @@ bool isRandomIncrOrDecrOperator(OverloadedOperatorKind OK); bool isRandomIncrOrDecrOperator(BinaryOperatorKind OK); const ContainerData *getContainerData(ProgramStateRef State, const MemRegion *Cont); -const IteratorPosition *getIteratorPosition(ProgramStateRef State, - const SVal &Val); -ProgramStateRef setIteratorPosition(ProgramStateRef State, const SVal &Val, +const IteratorPosition *getIteratorPosition(ProgramStateRef State, SVal Val); +ProgramStateRef setIteratorPosition(ProgramStateRef State, SVal Val, const IteratorPosition &Pos); -ProgramStateRef createIteratorPosition(ProgramStateRef State, const SVal &Val, - const MemRegion *Cont, const Stmt* S, +ProgramStateRef createIteratorPosition(ProgramStateRef State, SVal Val, + const MemRegion *Cont, const Stmt *S, const LocationContext *LCtx, unsigned blockCount); -ProgramStateRef advancePosition(ProgramStateRef State, - const SVal &Iter, - OverloadedOperatorKind Op, - const SVal &Distance); +ProgramStateRef advancePosition(ProgramStateRef State, SVal Iter, + OverloadedOperatorKind Op, SVal Distance); ProgramStateRef assumeNoOverflow(ProgramStateRef State, SymbolRef Sym, long Scale); bool compare(ProgramStateRef State, SymbolRef Sym1, SymbolRef Sym2, diff --git a/clang/lib/StaticAnalyzer/Checkers/IteratorModeling.cpp b/clang/lib/StaticAnalyzer/Checkers/IteratorModeling.cpp index 2d51a000ece3..a95e811c2a41 100644 --- a/clang/lib/StaticAnalyzer/Checkers/IteratorModeling.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/IteratorModeling.cpp @@ -100,18 +100,17 @@ class IteratorModeling const AdvanceFn *Handler) const; void handleComparison(CheckerContext &C, const Expr *CE, SVal RetVal, - const SVal &LVal, const SVal &RVal, - OverloadedOperatorKind Op) const; + SVal LVal, SVal RVal, OverloadedOperatorKind Op) const; void processComparison(CheckerContext &C, ProgramStateRef State, - SymbolRef Sym1, SymbolRef Sym2, const SVal &RetVal, + SymbolRef Sym1, SymbolRef Sym2, SVal RetVal, OverloadedOperatorKind Op) const; - void handleIncrement(CheckerContext &C, const SVal &RetVal, const SVal &Iter, + void handleIncrement(CheckerContext &C, SVal RetVal, SVal Iter, bool Postfix) const; - void handleDecrement(CheckerContext &C, const SVal &RetVal, const SVal &Iter, + void handleDecrement(CheckerContext &C, SVal RetVal, SVal Iter, bool Postfix) const; void handleRandomIncrOrDecr(CheckerContext &C, const Expr *CE, - OverloadedOperatorKind Op, const SVal &RetVal, - const SVal &Iterator, const SVal &Amount) const; + OverloadedOperatorKind Op, SVal RetVal, + SVal Iterator, SVal Amount) const; void handlePtrIncrOrDecr(CheckerContext &C, const Expr *Iterator, OverloadedOperatorKind OK, SVal Offset) const; void handleAdvance(CheckerContext &C, const Expr *CE, SVal RetVal, SVal Iter, @@ -120,7 +119,7 @@ class IteratorModeling SVal Amount) const; void handleNext(CheckerContext &C, const Expr *CE, SVal RetVal, SVal Iter, SVal Amount) const; - void assignToContainer(CheckerContext &C, const Expr *CE, const SVal &RetVal, + void assignToContainer(CheckerContext &C, const Expr *CE, SVal RetVal, const MemRegion *Cont) const; bool noChangeInAdvance(CheckerContext &C, SVal Iter, const Expr *CE) const; void printState(raw_ostream &Out, ProgramStateRef State, const char *NL, @@ -160,7 +159,7 @@ public: bool isSimpleComparisonOperator(OverloadedOperatorKind OK); bool isSimpleComparisonOperator(BinaryOperatorKind OK); -ProgramStateRef removeIteratorPosition(ProgramStateRef State, const SVal &Val); +ProgramStateRef removeIteratorPosition(ProgramStateRef State, SVal Val); ProgramStateRef relateSymbols(ProgramStateRef State, SymbolRef Sym1, SymbolRef Sym2, bool Equal); bool isBoundThroughLazyCompoundVal(const Environment &Env, @@ -283,7 +282,7 @@ void IteratorModeling::checkPostStmt(const BinaryOperator *BO, // The non-iterator side must have an integral or enumeration type. if (!AmountExpr->getType()->isIntegralOrEnumerationType()) return; - const SVal &AmountVal = IsIterOnLHS ? RVal : LVal; + SVal AmountVal = IsIterOnLHS ? RVal : LVal; handlePtrIncrOrDecr(C, IterExpr, BinaryOperator::getOverloadedOperator(OK), AmountVal); } @@ -388,8 +387,8 @@ IteratorModeling::handleOverloadedOperator(CheckerContext &C, const bool IsIterFirst = FirstType->isStructureOrClassType(); const SVal FirstArg = Call.getArgSVal(0); const SVal SecondArg = Call.getArgSVal(1); - const SVal &Iterator = IsIterFirst ? FirstArg : SecondArg; - const SVal &Amount = IsIterFirst ? SecondArg : FirstArg; + SVal Iterator = IsIterFirst ? FirstArg : SecondArg; + SVal Amount = IsIterFirst ? SecondArg : FirstArg; handleRandomIncrOrDecr(C, OrigExpr, Op, Call.getReturnValue(), Iterator, Amount); @@ -444,14 +443,13 @@ IteratorModeling::handleAdvanceLikeFunction(CheckerContext &C, } void IteratorModeling::handleComparison(CheckerContext &C, const Expr *CE, - SVal RetVal, const SVal &LVal, - const SVal &RVal, - OverloadedOperatorKind Op) const { + SVal RetVal, SVal LVal, SVal RVal, + OverloadedOperatorKind Op) const { // Record the operands and the operator of the comparison for the next // evalAssume, if the result is a symbolic expression. If it is a concrete // value (only one branch is possible), then transfer the state between // the operands according to the operator and the result - auto State = C.getState(); + auto State = C.getState(); const auto *LPos = getIteratorPosition(State, LVal); const auto *RPos = getIteratorPosition(State, RVal); const MemRegion *Cont = nullptr; @@ -504,7 +502,7 @@ void IteratorModeling::handleComparison(CheckerContext &C, const Expr *CE, void IteratorModeling::processComparison(CheckerContext &C, ProgramStateRef State, SymbolRef Sym1, - SymbolRef Sym2, const SVal &RetVal, + SymbolRef Sym2, SVal RetVal, OverloadedOperatorKind Op) const { if (const auto TruthVal = RetVal.getAs<nonloc::ConcreteInt>()) { if ((State = relateSymbols(State, Sym1, Sym2, @@ -532,8 +530,8 @@ void IteratorModeling::processComparison(CheckerContext &C, } } -void IteratorModeling::handleIncrement(CheckerContext &C, const SVal &RetVal, - const SVal &Iter, bool Postfix) const { +void IteratorModeling::handleIncrement(CheckerContext &C, SVal RetVal, + SVal Iter, bool Postfix) const { // Increment the symbolic expressions which represents the position of the // iterator auto State = C.getState(); @@ -558,8 +556,8 @@ void IteratorModeling::handleIncrement(CheckerContext &C, const SVal &RetVal, C.addTransition(State); } -void IteratorModeling::handleDecrement(CheckerContext &C, const SVal &RetVal, - const SVal &Iter, bool Postfix) const { +void IteratorModeling::handleDecrement(CheckerContext &C, SVal RetVal, + SVal Iter, bool Postfix) const { // Decrement the symbolic expressions which represents the position of the // iterator auto State = C.getState(); @@ -586,9 +584,8 @@ void IteratorModeling::handleDecrement(CheckerContext &C, const SVal &RetVal, void IteratorModeling::handleRandomIncrOrDecr(CheckerContext &C, const Expr *CE, OverloadedOperatorKind Op, - const SVal &RetVal, - const SVal &Iterator, - const SVal &Amount) const { + SVal RetVal, SVal Iterator, + SVal Amount) const { // Increment or decrement the symbolic expressions which represents the // position of the iterator auto State = C.getState(); @@ -684,7 +681,7 @@ void IteratorModeling::handleNext(CheckerContext &C, const Expr *CE, } void IteratorModeling::assignToContainer(CheckerContext &C, const Expr *CE, - const SVal &RetVal, + SVal RetVal, const MemRegion *Cont) const { Cont = Cont->getMostDerivedObjectRegion(); @@ -772,7 +769,7 @@ bool isSimpleComparisonOperator(BinaryOperatorKind OK) { return OK == BO_EQ || OK == BO_NE; } -ProgramStateRef removeIteratorPosition(ProgramStateRef State, const SVal &Val) { +ProgramStateRef removeIteratorPosition(ProgramStateRef State, SVal Val) { if (auto Reg = Val.getAsRegion()) { Reg = Reg->getMostDerivedObjectRegion(); return State->remove<IteratorRegionMap>(Reg); diff --git a/clang/lib/StaticAnalyzer/Checkers/IteratorRangeChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/IteratorRangeChecker.cpp index 7740c3d4da1e..c8828219dd73 100644 --- a/clang/lib/StaticAnalyzer/Checkers/IteratorRangeChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/IteratorRangeChecker.cpp @@ -32,7 +32,8 @@ class IteratorRangeChecker check::PreStmt<ArraySubscriptExpr>, check::PreStmt<MemberExpr>> { - std::unique_ptr<BugType> OutOfRangeBugType; + const BugType OutOfRangeBugType{this, "Iterator out of range", + "Misuse of STL APIs"}; void verifyDereference(CheckerContext &C, SVal Val) const; void verifyIncrement(CheckerContext &C, SVal Iter) const; @@ -42,12 +43,10 @@ class IteratorRangeChecker void verifyAdvance(CheckerContext &C, SVal LHS, SVal RHS) const; void verifyPrev(CheckerContext &C, SVal LHS, SVal RHS) const; void verifyNext(CheckerContext &C, SVal LHS, SVal RHS) const; - void reportBug(const StringRef &Message, SVal Val, CheckerContext &C, + void reportBug(StringRef Message, SVal Val, CheckerContext &C, ExplodedNode *ErrNode) const; public: - IteratorRangeChecker(); - void checkPreCall(const CallEvent &Call, CheckerContext &C) const; void checkPreStmt(const UnaryOperator *UO, CheckerContext &C) const; void checkPreStmt(const BinaryOperator *BO, CheckerContext &C) const; @@ -67,15 +66,10 @@ public: bool isPastTheEnd(ProgramStateRef State, const IteratorPosition &Pos); bool isAheadOfRange(ProgramStateRef State, const IteratorPosition &Pos); bool isBehindPastTheEnd(ProgramStateRef State, const IteratorPosition &Pos); -bool isZero(ProgramStateRef State, const NonLoc &Val); +bool isZero(ProgramStateRef State, NonLoc Val); } //namespace -IteratorRangeChecker::IteratorRangeChecker() { - OutOfRangeBugType.reset( - new BugType(this, "Iterator out of range", "Misuse of STL APIs")); -} - void IteratorRangeChecker::checkPreCall(const CallEvent &Call, CheckerContext &C) const { // Check for out of range access @@ -275,10 +269,10 @@ void IteratorRangeChecker::verifyNext(CheckerContext &C, SVal LHS, verifyRandomIncrOrDecr(C, OO_Plus, LHS, RHS); } -void IteratorRangeChecker::reportBug(const StringRef &Message, SVal Val, +void IteratorRangeChecker::reportBug(StringRef Message, SVal Val, CheckerContext &C, ExplodedNode *ErrNode) const { - auto R = std::make_unique<PathSensitiveBugReport>(*OutOfRangeBugType, Message, + auto R = std::make_unique<PathSensitiveBugReport>(OutOfRangeBugType, Message, ErrNode); const auto *Pos = getIteratorPosition(C.getState(), Val); @@ -295,7 +289,7 @@ bool isLess(ProgramStateRef State, SymbolRef Sym1, SymbolRef Sym2); bool isGreater(ProgramStateRef State, SymbolRef Sym1, SymbolRef Sym2); bool isEqual(ProgramStateRef State, SymbolRef Sym1, SymbolRef Sym2); -bool isZero(ProgramStateRef State, const NonLoc &Val) { +bool isZero(ProgramStateRef State, NonLoc Val) { auto &BVF = State->getBasicVals(); return compare(State, Val, nonloc::ConcreteInt(BVF.getValue(llvm::APSInt::get(0))), diff --git a/clang/lib/StaticAnalyzer/Checkers/LocalizationChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/LocalizationChecker.cpp index 70f911fc66ab..de9efe17d220 100644 --- a/clang/lib/StaticAnalyzer/Checkers/LocalizationChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/LocalizationChecker.cpp @@ -62,7 +62,8 @@ class NonLocalizedStringChecker check::PostObjCMessage, check::PostStmt<ObjCStringLiteral>> { - mutable std::unique_ptr<BugType> BT; + const BugType BT{this, "Unlocalizable string", + "Localizability Issue (Apple)"}; // Methods that require a localized string mutable llvm::DenseMap<const IdentifierInfo *, @@ -89,8 +90,6 @@ class NonLocalizedStringChecker Selector S) const; public: - NonLocalizedStringChecker(); - // When this parameter is set to true, the checker assumes all // methods that return NSStrings are unlocalized. Thus, more false // positives will be reported. @@ -108,11 +107,6 @@ public: REGISTER_MAP_WITH_PROGRAMSTATE(LocalizedMemMap, const MemRegion *, LocalizedState) -NonLocalizedStringChecker::NonLocalizedStringChecker() { - BT.reset(new BugType(this, "Unlocalizable string", - "Localizability Issue (Apple)")); -} - namespace { class NonLocalizedStringBRVisitor final : public BugReporterVisitor { @@ -764,7 +758,7 @@ void NonLocalizedStringChecker::reportLocalizationError( // Generate the bug report. auto R = std::make_unique<PathSensitiveBugReport>( - *BT, "User-facing text should use localized string macro", ErrNode); + BT, "User-facing text should use localized string macro", ErrNode); if (argumentNumber) { R->addRange(M.getArgExpr(argumentNumber - 1)->getSourceRange()); } else { diff --git a/clang/lib/StaticAnalyzer/Checkers/MPI-Checker/MPIBugReporter.cpp b/clang/lib/StaticAnalyzer/Checkers/MPI-Checker/MPIBugReporter.cpp index bbf2ddec5762..3e374e6c240e 100644 --- a/clang/lib/StaticAnalyzer/Checkers/MPI-Checker/MPIBugReporter.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/MPI-Checker/MPIBugReporter.cpp @@ -31,7 +31,7 @@ void MPIBugReporter::reportDoubleNonblocking( RequestRegion->getDescriptiveName() + ". "; auto Report = std::make_unique<PathSensitiveBugReport>( - *DoubleNonblockingBugType, ErrorText, ExplNode); + DoubleNonblockingBugType, ErrorText, ExplNode); Report->addRange(MPICallEvent.getSourceRange()); SourceRange Range = RequestRegion->sourceRange(); @@ -53,7 +53,7 @@ void MPIBugReporter::reportMissingWait( std::string ErrorText{"Request " + RequestRegion->getDescriptiveName() + " has no matching wait. "}; - auto Report = std::make_unique<PathSensitiveBugReport>(*MissingWaitBugType, + auto Report = std::make_unique<PathSensitiveBugReport>(MissingWaitBugType, ErrorText, ExplNode); SourceRange Range = RequestRegion->sourceRange(); @@ -73,7 +73,7 @@ void MPIBugReporter::reportUnmatchedWait( std::string ErrorText{"Request " + RequestRegion->getDescriptiveName() + " has no matching nonblocking call. "}; - auto Report = std::make_unique<PathSensitiveBugReport>(*UnmatchedWaitBugType, + auto Report = std::make_unique<PathSensitiveBugReport>(UnmatchedWaitBugType, ErrorText, ExplNode); Report->addRange(CE.getSourceRange()); diff --git a/clang/lib/StaticAnalyzer/Checkers/MPI-Checker/MPIBugReporter.h b/clang/lib/StaticAnalyzer/Checkers/MPI-Checker/MPIBugReporter.h index 9871da026b04..0222a2120b34 100644 --- a/clang/lib/StaticAnalyzer/Checkers/MPI-Checker/MPIBugReporter.h +++ b/clang/lib/StaticAnalyzer/Checkers/MPI-Checker/MPIBugReporter.h @@ -17,6 +17,7 @@ #include "MPITypes.h" #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" +#include "llvm/ADT/StringRef.h" namespace clang { namespace ento { @@ -24,12 +25,10 @@ namespace mpi { class MPIBugReporter { public: - MPIBugReporter(const CheckerBase &CB) { - UnmatchedWaitBugType.reset(new BugType(&CB, "Unmatched wait", MPIError)); - DoubleNonblockingBugType.reset( - new BugType(&CB, "Double nonblocking", MPIError)); - MissingWaitBugType.reset(new BugType(&CB, "Missing wait", MPIError)); - } + MPIBugReporter(const CheckerBase &CB) + : UnmatchedWaitBugType(&CB, "Unmatched wait", MPIError), + MissingWaitBugType(&CB, "Missing wait", MPIError), + DoubleNonblockingBugType(&CB, "Double nonblocking", MPIError) {} /// Report duplicate request use by nonblocking calls without intermediate /// wait. @@ -68,12 +67,10 @@ public: BugReporter &BReporter) const; private: - const std::string MPIError = "MPI Error"; - - // path-sensitive bug types - std::unique_ptr<BugType> UnmatchedWaitBugType; - std::unique_ptr<BugType> MissingWaitBugType; - std::unique_ptr<BugType> DoubleNonblockingBugType; + const llvm::StringLiteral MPIError = "MPI Error"; + const BugType UnmatchedWaitBugType; + const BugType MissingWaitBugType; + const BugType DoubleNonblockingBugType; /// Bug visitor class to find the node where the request region was previously /// used in order to include it into the BugReport path. diff --git a/clang/lib/StaticAnalyzer/Checkers/MacOSKeychainAPIChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/MacOSKeychainAPIChecker.cpp index 771c0a5fbb8d..12bf12a0b232 100644 --- a/clang/lib/StaticAnalyzer/Checkers/MacOSKeychainAPIChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/MacOSKeychainAPIChecker.cpp @@ -33,7 +33,8 @@ class MacOSKeychainAPIChecker : public Checker<check::PreStmt<CallExpr>, check::DeadSymbols, check::PointerEscape, eval::Assume> { - mutable std::unique_ptr<BugType> BT; + const BugType BT{this, "Improper use of SecKeychain API", + categories::AppleAPIMisuse}; public: /// AllocationState is a part of the checker specific state together with the @@ -101,12 +102,6 @@ private: /// function. static unsigned getTrackedFunctionIndex(StringRef Name, bool IsAllocator); - inline void initBugType() const { - if (!BT) - BT.reset(new BugType(this, "Improper use of SecKeychain API", - "API Misuse (Apple)")); - } - void generateDeallocatorMismatchReport(const AllocationPair &AP, const Expr *ArgExpr, CheckerContext &C) const; @@ -232,7 +227,6 @@ void MacOSKeychainAPIChecker:: if (!N) return; - initBugType(); SmallString<80> sbuf; llvm::raw_svector_ostream os(sbuf); unsigned int PDeallocIdx = @@ -240,7 +234,7 @@ void MacOSKeychainAPIChecker:: os << "Deallocator doesn't match the allocator: '" << FunctionsToTrack[PDeallocIdx].Name << "' should be used."; - auto Report = std::make_unique<PathSensitiveBugReport>(*BT, os.str(), N); + auto Report = std::make_unique<PathSensitiveBugReport>(BT, os.str(), N); Report->addVisitor(std::make_unique<SecKeychainBugVisitor>(AP.first)); Report->addRange(ArgExpr->getSourceRange()); markInteresting(Report.get(), AP); @@ -276,7 +270,6 @@ void MacOSKeychainAPIChecker::checkPreStmt(const CallExpr *CE, ExplodedNode *N = C.generateNonFatalErrorNode(State); if (!N) return; - initBugType(); SmallString<128> sbuf; llvm::raw_svector_ostream os(sbuf); unsigned int DIdx = FunctionsToTrack[AS->AllocatorIdx].DeallocatorIdx; @@ -284,8 +277,7 @@ void MacOSKeychainAPIChecker::checkPreStmt(const CallExpr *CE, << "the allocator: missing a call to '" << FunctionsToTrack[DIdx].Name << "'."; - auto Report = - std::make_unique<PathSensitiveBugReport>(*BT, os.str(), N); + auto Report = std::make_unique<PathSensitiveBugReport>(BT, os.str(), N); Report->addVisitor(std::make_unique<SecKeychainBugVisitor>(V)); Report->addRange(ArgExpr->getSourceRange()); Report->markInteresting(AS->Region); @@ -338,9 +330,8 @@ void MacOSKeychainAPIChecker::checkPreStmt(const CallExpr *CE, ExplodedNode *N = C.generateNonFatalErrorNode(State); if (!N) return; - initBugType(); auto Report = std::make_unique<PathSensitiveBugReport>( - *BT, "Trying to free data which has not been allocated.", N); + BT, "Trying to free data which has not been allocated.", N); Report->addRange(ArgExpr->getSourceRange()); if (AS) Report->markInteresting(AS->Region); @@ -474,7 +465,6 @@ std::unique_ptr<PathSensitiveBugReport> MacOSKeychainAPIChecker::generateAllocatedDataNotReleasedReport( const AllocationPair &AP, ExplodedNode *N, CheckerContext &C) const { const ADFunctionInfo &FI = FunctionsToTrack[AP.second->AllocatorIdx]; - initBugType(); SmallString<70> sbuf; llvm::raw_svector_ostream os(sbuf); os << "Allocated data is not released: missing a call to '" @@ -493,7 +483,7 @@ MacOSKeychainAPIChecker::generateAllocatedDataNotReleasedReport( AllocNode->getLocationContext()); auto Report = std::make_unique<PathSensitiveBugReport>( - *BT, os.str(), N, LocUsedForUniqueing, + BT, os.str(), N, LocUsedForUniqueing, AllocNode->getLocationContext()->getDecl()); Report->addVisitor(std::make_unique<SecKeychainBugVisitor>(AP.first)); diff --git a/clang/lib/StaticAnalyzer/Checkers/MacOSXAPIChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/MacOSXAPIChecker.cpp index 04e7f8dec8d7..754b16764296 100644 --- a/clang/lib/StaticAnalyzer/Checkers/MacOSXAPIChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/MacOSXAPIChecker.cpp @@ -18,6 +18,7 @@ #include "clang/Basic/TargetInfo.h" #include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" +#include "clang/StaticAnalyzer/Core/BugReporter/CommonBugCategories.h" #include "clang/StaticAnalyzer/Core/Checker.h" #include "clang/StaticAnalyzer/Core/CheckerManager.h" #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" @@ -31,7 +32,8 @@ using namespace ento; namespace { class MacOSXAPIChecker : public Checker< check::PreStmt<CallExpr> > { - mutable std::unique_ptr<BugType> BT_dispatchOnce; + const BugType BT_dispatchOnce{this, "Improper use of 'dispatch_once'", + categories::AppleAPIMisuse}; static const ObjCIvarRegion *getParentIvarRegion(const MemRegion *R); @@ -136,12 +138,8 @@ void MacOSXAPIChecker::CheckDispatchOnce(CheckerContext &C, const CallExpr *CE, if (!N) return; - if (!BT_dispatchOnce) - BT_dispatchOnce.reset(new BugType(this, "Improper use of 'dispatch_once'", - "API Misuse (Apple)")); - auto report = - std::make_unique<PathSensitiveBugReport>(*BT_dispatchOnce, os.str(), N); + std::make_unique<PathSensitiveBugReport>(BT_dispatchOnce, os.str(), N); report->addRange(CE->getArg(0)->getSourceRange()); C.emitReport(std::move(report)); } diff --git a/clang/lib/StaticAnalyzer/Checkers/MismatchedIteratorChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/MismatchedIteratorChecker.cpp index 2020dc7cc791..82a622831817 100644 --- a/clang/lib/StaticAnalyzer/Checkers/MismatchedIteratorChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/MismatchedIteratorChecker.cpp @@ -30,22 +30,18 @@ namespace { class MismatchedIteratorChecker : public Checker<check::PreCall, check::PreStmt<BinaryOperator>> { - std::unique_ptr<BugType> MismatchedBugType; - - void verifyMatch(CheckerContext &C, const SVal &Iter, - const MemRegion *Cont) const; - void verifyMatch(CheckerContext &C, const SVal &Iter1, - const SVal &Iter2) const; - void reportBug(const StringRef &Message, const SVal &Val1, - const SVal &Val2, CheckerContext &C, - ExplodedNode *ErrNode) const; - void reportBug(const StringRef &Message, const SVal &Val, - const MemRegion *Reg, CheckerContext &C, + const BugType MismatchedBugType{this, "Iterator(s) mismatched", + "Misuse of STL APIs", + /*SuppressOnSink=*/true}; + + void verifyMatch(CheckerContext &C, SVal Iter, const MemRegion *Cont) const; + void verifyMatch(CheckerContext &C, SVal Iter1, SVal Iter2) const; + void reportBug(StringRef Message, SVal Val1, SVal Val2, CheckerContext &C, ExplodedNode *ErrNode) const; + void reportBug(StringRef Message, SVal Val, const MemRegion *Reg, + CheckerContext &C, ExplodedNode *ErrNode) const; public: - MismatchedIteratorChecker(); - void checkPreCall(const CallEvent &Call, CheckerContext &C) const; void checkPreStmt(const BinaryOperator *BO, CheckerContext &C) const; @@ -53,12 +49,6 @@ public: } // namespace -MismatchedIteratorChecker::MismatchedIteratorChecker() { - MismatchedBugType.reset( - new BugType(this, "Iterator(s) mismatched", "Misuse of STL APIs", - /*SuppressOnSink=*/true)); -} - void MismatchedIteratorChecker::checkPreCall(const CallEvent &Call, CheckerContext &C) const { // Check for iterator mismatches @@ -202,7 +192,7 @@ void MismatchedIteratorChecker::checkPreStmt(const BinaryOperator *BO, verifyMatch(C, LVal, RVal); } -void MismatchedIteratorChecker::verifyMatch(CheckerContext &C, const SVal &Iter, +void MismatchedIteratorChecker::verifyMatch(CheckerContext &C, SVal Iter, const MemRegion *Cont) const { // Verify match between a container and the container of an iterator Cont = Cont->getMostDerivedObjectRegion(); @@ -238,9 +228,8 @@ void MismatchedIteratorChecker::verifyMatch(CheckerContext &C, const SVal &Iter, } } -void MismatchedIteratorChecker::verifyMatch(CheckerContext &C, - const SVal &Iter1, - const SVal &Iter2) const { +void MismatchedIteratorChecker::verifyMatch(CheckerContext &C, SVal Iter1, + SVal Iter2) const { // Verify match between the containers of two iterators auto State = C.getState(); const auto *Pos1 = getIteratorPosition(State, Iter1); @@ -277,23 +266,21 @@ void MismatchedIteratorChecker::verifyMatch(CheckerContext &C, } } -void MismatchedIteratorChecker::reportBug(const StringRef &Message, - const SVal &Val1, - const SVal &Val2, - CheckerContext &C, +void MismatchedIteratorChecker::reportBug(StringRef Message, SVal Val1, + SVal Val2, CheckerContext &C, ExplodedNode *ErrNode) const { - auto R = std::make_unique<PathSensitiveBugReport>(*MismatchedBugType, Message, + auto R = std::make_unique<PathSensitiveBugReport>(MismatchedBugType, Message, ErrNode); R->markInteresting(Val1); R->markInteresting(Val2); C.emitReport(std::move(R)); } -void MismatchedIteratorChecker::reportBug(const StringRef &Message, - const SVal &Val, const MemRegion *Reg, +void MismatchedIteratorChecker::reportBug(StringRef Message, SVal Val, + const MemRegion *Reg, CheckerContext &C, ExplodedNode *ErrNode) const { - auto R = std::make_unique<PathSensitiveBugReport>(*MismatchedBugType, Message, + auto R = std::make_unique<PathSensitiveBugReport>(MismatchedBugType, Message, ErrNode); R->markInteresting(Val); R->markInteresting(Reg); diff --git a/clang/lib/StaticAnalyzer/Checkers/MmapWriteExecChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/MmapWriteExecChecker.cpp index 8fc44e78be6f..2e31c16e457c 100644 --- a/clang/lib/StaticAnalyzer/Checkers/MmapWriteExecChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/MmapWriteExecChecker.cpp @@ -15,6 +15,7 @@ #include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" +#include "clang/StaticAnalyzer/Core/BugReporter/CommonBugCategories.h" #include "clang/StaticAnalyzer/Core/Checker.h" #include "clang/StaticAnalyzer/Core/CheckerManager.h" #include "clang/StaticAnalyzer/Core/PathSensitive/CallDescription.h" @@ -31,7 +32,9 @@ class MmapWriteExecChecker : public Checker<check::PreCall> { static int ProtWrite; static int ProtExec; static int ProtRead; - mutable std::unique_ptr<BugType> BT; + const BugType BT{this, "W^X check fails, Write Exec prot flags set", + "Security"}; + public: MmapWriteExecChecker() : MmapFn({"mmap"}, 6), MprotectFn({"mprotect"}, 3) {} void checkPreCall(const CallEvent &Call, CheckerContext &C) const; @@ -62,17 +65,16 @@ void MmapWriteExecChecker::checkPreCall(const CallEvent &Call, return; if ((Prot & (ProtWrite | ProtExec)) == (ProtWrite | ProtExec)) { - if (!BT) - BT.reset(new BugType(this, "W^X check fails, Write Exec prot flags set", "Security")); - ExplodedNode *N = C.generateNonFatalErrorNode(); if (!N) return; auto Report = std::make_unique<PathSensitiveBugReport>( - *BT, "Both PROT_WRITE and PROT_EXEC flags are set. This can " - "lead to exploitable memory regions, which could be overwritten " - "with malicious code", N); + BT, + "Both PROT_WRITE and PROT_EXEC flags are set. This can " + "lead to exploitable memory regions, which could be overwritten " + "with malicious code", + N); Report->addRange(Call.getArgSourceRange(2)); C.emitReport(std::move(Report)); } diff --git a/clang/lib/StaticAnalyzer/Checkers/NSAutoreleasePoolChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/NSAutoreleasePoolChecker.cpp index bb01a3b77617..0648084a7d39 100644 --- a/clang/lib/StaticAnalyzer/Checkers/NSAutoreleasePoolChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/NSAutoreleasePoolChecker.cpp @@ -31,7 +31,8 @@ using namespace ento; namespace { class NSAutoreleasePoolChecker : public Checker<check::PreObjCMessage> { - mutable std::unique_ptr<BugType> BT; + const BugType BT{this, "Use -drain instead of -release", + "API Upgrade (Apple)"}; mutable Selector releaseS; public: @@ -57,10 +58,6 @@ void NSAutoreleasePoolChecker::checkPreObjCMessage(const ObjCMethodCall &msg, if (msg.getSelector() != releaseS) return; - if (!BT) - BT.reset(new BugType(this, "Use -drain instead of -release", - "API Upgrade (Apple)")); - ExplodedNode *N = C.generateNonFatalErrorNode(); if (!N) { assert(0); @@ -68,7 +65,7 @@ void NSAutoreleasePoolChecker::checkPreObjCMessage(const ObjCMethodCall &msg, } auto Report = std::make_unique<PathSensitiveBugReport>( - *BT, + BT, "Use -drain instead of -release when using NSAutoreleasePool and " "garbage collection", N); diff --git a/clang/lib/StaticAnalyzer/Checkers/NonNullParamChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/NonNullParamChecker.cpp index 44b69ef31911..a9002ee7c966 100644 --- a/clang/lib/StaticAnalyzer/Checkers/NonNullParamChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/NonNullParamChecker.cpp @@ -18,6 +18,7 @@ #include "clang/Analysis/AnyCall.h" #include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" +#include "clang/StaticAnalyzer/Core/BugReporter/CommonBugCategories.h" #include "clang/StaticAnalyzer/Core/Checker.h" #include "clang/StaticAnalyzer/Core/CheckerManager.h" #include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h" @@ -31,8 +32,9 @@ namespace { class NonNullParamChecker : public Checker<check::PreCall, check::BeginFunction, EventDispatcher<ImplicitNullDerefEvent>> { - mutable std::unique_ptr<BugType> BTAttrNonNull; - mutable std::unique_ptr<BugType> BTNullRefArg; + const BugType BTAttrNonNull{ + this, "Argument with 'nonnull' attribute passed null", "API"}; + const BugType BTNullRefArg{this, "Dereference of null pointer"}; public: void checkPreCall(const CallEvent &Call, CheckerContext &C) const; @@ -278,13 +280,6 @@ std::unique_ptr<PathSensitiveBugReport> NonNullParamChecker::genReportNullAttrNonNull(const ExplodedNode *ErrorNode, const Expr *ArgE, unsigned IdxOfArg) const { - // Lazily allocate the BugType object if it hasn't already been - // created. Ownership is transferred to the BugReporter object once - // the BugReport is passed to 'EmitWarning'. - if (!BTAttrNonNull) - BTAttrNonNull.reset(new BugType( - this, "Argument with 'nonnull' attribute passed null", "API")); - llvm::SmallString<256> SBuf; llvm::raw_svector_ostream OS(SBuf); OS << "Null pointer passed to " @@ -292,7 +287,7 @@ NonNullParamChecker::genReportNullAttrNonNull(const ExplodedNode *ErrorNode, << " parameter expecting 'nonnull'"; auto R = - std::make_unique<PathSensitiveBugReport>(*BTAttrNonNull, SBuf, ErrorNode); + std::make_unique<PathSensitiveBugReport>(BTAttrNonNull, SBuf, ErrorNode); if (ArgE) bugreporter::trackExpressionValue(ErrorNode, ArgE, *R); @@ -302,11 +297,8 @@ NonNullParamChecker::genReportNullAttrNonNull(const ExplodedNode *ErrorNode, std::unique_ptr<PathSensitiveBugReport> NonNullParamChecker::genReportReferenceToNullPointer( const ExplodedNode *ErrorNode, const Expr *ArgE) const { - if (!BTNullRefArg) - BTNullRefArg.reset(new BugType(this, "Dereference of null pointer")); - auto R = std::make_unique<PathSensitiveBugReport>( - *BTNullRefArg, "Forming reference to null pointer", ErrorNode); + BTNullRefArg, "Forming reference to null pointer", ErrorNode); if (ArgE) { const Expr *ArgEDeref = bugreporter::getDerefExpr(ArgE); if (!ArgEDeref) @@ -314,7 +306,6 @@ NonNullParamChecker::genReportReferenceToNullPointer( bugreporter::trackExpressionValue(ErrorNode, ArgEDeref, *R); } return R; - } void ento::registerNonNullParamChecker(CheckerManager &mgr) { diff --git a/clang/lib/StaticAnalyzer/Checkers/ObjCAtSyncChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/ObjCAtSyncChecker.cpp index 7906b787cd53..552c222a251a 100644 --- a/clang/lib/StaticAnalyzer/Checkers/ObjCAtSyncChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/ObjCAtSyncChecker.cpp @@ -25,8 +25,10 @@ using namespace ento; namespace { class ObjCAtSyncChecker : public Checker< check::PreStmt<ObjCAtSynchronizedStmt> > { - mutable std::unique_ptr<BugType> BT_null; - mutable std::unique_ptr<BugType> BT_undef; + const BugType BT_null{this, "Nil value used as mutex for @synchronized() " + "(no synchronization will occur)"}; + const BugType BT_undef{this, "Uninitialized value used as mutex " + "for @synchronized"}; public: void checkPreStmt(const ObjCAtSynchronizedStmt *S, CheckerContext &C) const; @@ -43,11 +45,8 @@ void ObjCAtSyncChecker::checkPreStmt(const ObjCAtSynchronizedStmt *S, // Uninitialized value used for the mutex? if (isa<UndefinedVal>(V)) { if (ExplodedNode *N = C.generateErrorNode()) { - if (!BT_undef) - BT_undef.reset(new BugType(this, "Uninitialized value used as mutex " - "for @synchronized")); auto report = std::make_unique<PathSensitiveBugReport>( - *BT_undef, BT_undef->getDescription(), N); + BT_undef, BT_undef.getDescription(), N); bugreporter::trackExpressionValue(N, Ex, *report); C.emitReport(std::move(report)); } @@ -66,12 +65,8 @@ void ObjCAtSyncChecker::checkPreStmt(const ObjCAtSynchronizedStmt *S, // Generate an error node. This isn't a sink since // a null mutex just means no synchronization occurs. if (ExplodedNode *N = C.generateNonFatalErrorNode(nullState)) { - if (!BT_null) - BT_null.reset( - new BugType(this, "Nil value used as mutex for @synchronized() " - "(no synchronization will occur)")); auto report = std::make_unique<PathSensitiveBugReport>( - *BT_null, BT_null->getDescription(), N); + BT_null, BT_null.getDescription(), N); bugreporter::trackExpressionValue(N, Ex, *report); C.emitReport(std::move(report)); diff --git a/clang/lib/StaticAnalyzer/Checkers/ObjCContainersChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/ObjCContainersChecker.cpp index 0244a7a3ebff..28e88245ca95 100644 --- a/clang/lib/StaticAnalyzer/Checkers/ObjCContainersChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/ObjCContainersChecker.cpp @@ -30,12 +30,7 @@ namespace { class ObjCContainersChecker : public Checker< check::PreStmt<CallExpr>, check::PostStmt<CallExpr>, check::PointerEscape> { - mutable std::unique_ptr<BugType> BT; - inline void initBugType() const { - if (!BT) - BT.reset(new BugType(this, "CFArray API", - categories::CoreFoundationObjectiveC)); - } + const BugType BT{this, "CFArray API", categories::CoreFoundationObjectiveC}; inline SymbolRef getArraySym(const Expr *E, CheckerContext &C) const { SVal ArrayRef = C.getSVal(E); @@ -140,9 +135,9 @@ void ObjCContainersChecker::checkPreStmt(const CallExpr *CE, ExplodedNode *N = C.generateErrorNode(StOutBound); if (!N) return; - initBugType(); + auto R = std::make_unique<PathSensitiveBugReport>( - *BT, "Index is out of bounds", N); + BT, "Index is out of bounds", N); R->addRange(IdxExpr->getSourceRange()); bugreporter::trackExpressionValue(N, IdxExpr, *R, {bugreporter::TrackingKind::Thorough, diff --git a/clang/lib/StaticAnalyzer/Checkers/ObjCSelfInitChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/ObjCSelfInitChecker.cpp index d88d6a94a30f..217c46451f80 100644 --- a/clang/lib/StaticAnalyzer/Checkers/ObjCSelfInitChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/ObjCSelfInitChecker.cpp @@ -61,13 +61,13 @@ class ObjCSelfInitChecker : public Checker< check::PostObjCMessage, check::PostCall, check::Location, check::Bind > { - mutable std::unique_ptr<BugType> BT; + const BugType BT{this, "Missing \"self = [(super or self) init...]\"", + categories::CoreFoundationObjectiveC}; void checkForInvalidSelf(const Expr *E, CheckerContext &C, const char *errorStr) const; public: - ObjCSelfInitChecker() {} void checkPostObjCMessage(const ObjCMethodCall &Msg, CheckerContext &C) const; void checkPostStmt(const ObjCIvarRefExpr *E, CheckerContext &C) const; void checkPreStmt(const ReturnStmt *S, CheckerContext &C) const; @@ -157,10 +157,7 @@ void ObjCSelfInitChecker::checkForInvalidSelf(const Expr *E, CheckerContext &C, if (!N) return; - if (!BT) - BT.reset(new BugType(this, "Missing \"self = [(super or self) init...]\"", - categories::CoreFoundationObjectiveC)); - C.emitReport(std::make_unique<PathSensitiveBugReport>(*BT, errorStr, N)); + C.emitReport(std::make_unique<PathSensitiveBugReport>(BT, errorStr, N)); } void ObjCSelfInitChecker::checkPostObjCMessage(const ObjCMethodCall &Msg, diff --git a/clang/lib/StaticAnalyzer/Checkers/ObjCSuperDeallocChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/ObjCSuperDeallocChecker.cpp index 3547b7bb61a2..eb40711812e1 100644 --- a/clang/lib/StaticAnalyzer/Checkers/ObjCSuperDeallocChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/ObjCSuperDeallocChecker.cpp @@ -26,18 +26,19 @@ namespace { class ObjCSuperDeallocChecker : public Checker<check::PostObjCMessage, check::PreObjCMessage, check::PreCall, check::Location> { - - mutable IdentifierInfo *IIdealloc, *IINSObject; + mutable IdentifierInfo *IIdealloc = nullptr; + mutable IdentifierInfo *IINSObject = nullptr; mutable Selector SELdealloc; - std::unique_ptr<BugType> DoubleSuperDeallocBugType; + const BugType DoubleSuperDeallocBugType{ + this, "[super dealloc] should not be called more than once", + categories::CoreFoundationObjectiveC}; void initIdentifierInfoAndSelectors(ASTContext &Ctx) const; bool isSuperDeallocMessage(const ObjCMethodCall &M) const; public: - ObjCSuperDeallocChecker(); void checkPostObjCMessage(const ObjCMethodCall &M, CheckerContext &C) const; void checkPreObjCMessage(const ObjCMethodCall &M, CheckerContext &C) const; @@ -188,7 +189,7 @@ void ObjCSuperDeallocChecker::reportUseAfterDealloc(SymbolRef Sym, Desc = "Use of 'self' after it has been deallocated"; // Generate the report. - auto BR = std::make_unique<PathSensitiveBugReport>(*DoubleSuperDeallocBugType, + auto BR = std::make_unique<PathSensitiveBugReport>(DoubleSuperDeallocBugType, Desc, ErrNode); BR->addRange(S->getSourceRange()); BR->addVisitor(std::make_unique<SuperDeallocBRVisitor>(Sym)); @@ -213,14 +214,6 @@ void ObjCSuperDeallocChecker::diagnoseCallArguments(const CallEvent &CE, } } -ObjCSuperDeallocChecker::ObjCSuperDeallocChecker() - : IIdealloc(nullptr), IINSObject(nullptr) { - - DoubleSuperDeallocBugType.reset( - new BugType(this, "[super dealloc] should not be called more than once", - categories::CoreFoundationObjectiveC)); -} - void ObjCSuperDeallocChecker::initIdentifierInfoAndSelectors(ASTContext &Ctx) const { if (IIdealloc) diff --git a/clang/lib/StaticAnalyzer/Checkers/PaddingChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/PaddingChecker.cpp index bd6e1ec3a8fc..eee9449f3180 100644 --- a/clang/lib/StaticAnalyzer/Checkers/PaddingChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/PaddingChecker.cpp @@ -32,7 +32,7 @@ using namespace ento; namespace { class PaddingChecker : public Checker<check::ASTDecl<TranslationUnitDecl>> { private: - mutable std::unique_ptr<BugType> PaddingBug; + const BugType PaddingBug{this, "Excessive Padding", "Performance"}; mutable BugReporter *BR; public: @@ -310,10 +310,6 @@ public: void reportRecord( const RecordDecl *RD, CharUnits BaselinePad, CharUnits OptimalPad, const SmallVector<const FieldDecl *, 20> &OptimalFieldsOrder) const { - if (!PaddingBug) - PaddingBug = - std::make_unique<BugType>(this, "Excessive Padding", "Performance"); - SmallString<100> Buf; llvm::raw_svector_ostream Os(Buf); Os << "Excessive padding in '"; @@ -341,8 +337,7 @@ public: PathDiagnosticLocation CELoc = PathDiagnosticLocation::create(RD, BR->getSourceManager()); - auto Report = - std::make_unique<BasicBugReport>(*PaddingBug, Os.str(), CELoc); + auto Report = std::make_unique<BasicBugReport>(PaddingBug, Os.str(), CELoc); Report->setDeclWithIssue(RD); Report->addRange(RD->getSourceRange()); BR->emitReport(std::move(Report)); diff --git a/clang/lib/StaticAnalyzer/Checkers/PointerArithChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/PointerArithChecker.cpp index 1d63c0dd01f3..1141f07428b4 100644 --- a/clang/lib/StaticAnalyzer/Checkers/PointerArithChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/PointerArithChecker.cpp @@ -56,8 +56,8 @@ class PointerArithChecker bool PointedNeeded = false) const; void initAllocIdentifiers(ASTContext &C) const; - mutable std::unique_ptr<BugType> BT_pointerArith; - mutable std::unique_ptr<BugType> BT_polyArray; + const BugType BT_pointerArith{this, "Dangerous pointer arithmetic"}; + const BugType BT_polyArray{this, "Dangerous pointer arithmetic"}; mutable llvm::SmallSet<IdentifierInfo *, 8> AllocFunctions; public: @@ -168,12 +168,10 @@ void PointerArithChecker::reportPointerArithMisuse(const Expr *E, if (!IsPolymorphic) return; if (ExplodedNode *N = C.generateNonFatalErrorNode()) { - if (!BT_polyArray) - BT_polyArray.reset(new BugType(this, "Dangerous pointer arithmetic")); constexpr llvm::StringLiteral Msg = "Pointer arithmetic on a pointer to base class is dangerous " "because derived and base class may have different size."; - auto R = std::make_unique<PathSensitiveBugReport>(*BT_polyArray, Msg, N); + auto R = std::make_unique<PathSensitiveBugReport>(BT_polyArray, Msg, N); R->addRange(E->getSourceRange()); R->markInteresting(ArrayRegion); C.emitReport(std::move(R)); @@ -190,12 +188,10 @@ void PointerArithChecker::reportPointerArithMisuse(const Expr *E, return; if (ExplodedNode *N = C.generateNonFatalErrorNode()) { - if (!BT_pointerArith) - BT_pointerArith.reset(new BugType(this, "Dangerous pointer arithmetic")); constexpr llvm::StringLiteral Msg = "Pointer arithmetic on non-array variables relies on memory layout, " "which is dangerous."; - auto R = std::make_unique<PathSensitiveBugReport>(*BT_pointerArith, Msg, N); + auto R = std::make_unique<PathSensitiveBugReport>(BT_pointerArith, Msg, N); R->addRange(SR); R->markInteresting(Region); C.emitReport(std::move(R)); diff --git a/clang/lib/StaticAnalyzer/Checkers/PointerSubChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/PointerSubChecker.cpp index 96d38eef3c03..2438cf30b39b 100644 --- a/clang/lib/StaticAnalyzer/Checkers/PointerSubChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/PointerSubChecker.cpp @@ -25,7 +25,7 @@ using namespace ento; namespace { class PointerSubChecker : public Checker< check::PreStmt<BinaryOperator> > { - mutable std::unique_ptr<BugType> BT; + const BugType BT{this, "Pointer subtraction"}; public: void checkPreStmt(const BinaryOperator *B, CheckerContext &C) const; @@ -59,12 +59,10 @@ void PointerSubChecker::checkPreStmt(const BinaryOperator *B, return; if (ExplodedNode *N = C.generateNonFatalErrorNode()) { - if (!BT) - BT.reset(new BugType(this, "Pointer subtraction")); constexpr llvm::StringLiteral Msg = "Subtraction of two pointers that do not point to the same memory " "chunk may cause incorrect result."; - auto R = std::make_unique<PathSensitiveBugReport>(*BT, Msg, N); + auto R = std::make_unique<PathSensitiveBugReport>(BT, Msg, N); R->addRange(B->getSourceRange()); C.emitReport(std::move(R)); } diff --git a/clang/lib/StaticAnalyzer/Checkers/ReturnPointerRangeChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/ReturnPointerRangeChecker.cpp index 11dca1ff8831..09d82ebabd4c 100644 --- a/clang/lib/StaticAnalyzer/Checkers/ReturnPointerRangeChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/ReturnPointerRangeChecker.cpp @@ -26,7 +26,9 @@ using namespace ento; namespace { class ReturnPointerRangeChecker : public Checker< check::PreStmt<ReturnStmt> > { - mutable std::unique_ptr<BugType> BT; + // FIXME: This bug correspond to CWE-466. Eventually we should have bug + // types explicitly reference such exploit categories (when applicable). + const BugType BT{this, "Buffer overflow"}; public: void checkPreStmt(const ReturnStmt *RS, CheckerContext &C) const; @@ -76,16 +78,12 @@ void ReturnPointerRangeChecker::checkPreStmt(const ReturnStmt *RS, if (!N) return; - // FIXME: This bug correspond to CWE-466. Eventually we should have bug - // types explicitly reference such exploit categories (when applicable). - if (!BT) - BT.reset(new BugType(this, "Buffer overflow")); constexpr llvm::StringLiteral Msg = "Returned pointer value points outside the original object " "(potential buffer overflow)"; // Generate a report for this bug. - auto Report = std::make_unique<PathSensitiveBugReport>(*BT, Msg, N); + auto Report = std::make_unique<PathSensitiveBugReport>(BT, Msg, N); Report->addRange(RetE->getSourceRange()); const auto ConcreteElementCount = ElementCount.getAs<nonloc::ConcreteInt>(); diff --git a/clang/lib/StaticAnalyzer/Checkers/ReturnUndefChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/ReturnUndefChecker.cpp index 78cd0100bea4..efffbf2ee755 100644 --- a/clang/lib/StaticAnalyzer/Checkers/ReturnUndefChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/ReturnUndefChecker.cpp @@ -24,8 +24,8 @@ using namespace ento; namespace { class ReturnUndefChecker : public Checker< check::PreStmt<ReturnStmt> > { - mutable std::unique_ptr<BugType> BT_Undef; - mutable std::unique_ptr<BugType> BT_NullReference; + const BugType BT_Undef{this, "Garbage return value"}; + const BugType BT_NullReference{this, "Returning null reference"}; void emitUndef(CheckerContext &C, const Expr *RetE) const; void checkReference(CheckerContext &C, const Expr *RetE, @@ -77,7 +77,7 @@ void ReturnUndefChecker::checkPreStmt(const ReturnStmt *RS, } } -static void emitBug(CheckerContext &C, BugType &BT, StringRef Msg, +static void emitBug(CheckerContext &C, const BugType &BT, StringRef Msg, const Expr *RetE, const Expr *TrackingE = nullptr) { ExplodedNode *N = C.generateErrorNode(); if (!N) @@ -92,9 +92,7 @@ static void emitBug(CheckerContext &C, BugType &BT, StringRef Msg, } void ReturnUndefChecker::emitUndef(CheckerContext &C, const Expr *RetE) const { - if (!BT_Undef) - BT_Undef.reset(new BugType(this, "Garbage return value")); - emitBug(C, *BT_Undef, "Undefined or garbage value returned to caller", RetE); + emitBug(C, BT_Undef, "Undefined or garbage value returned to caller", RetE); } void ReturnUndefChecker::checkReference(CheckerContext &C, const Expr *RetE, @@ -109,10 +107,7 @@ void ReturnUndefChecker::checkReference(CheckerContext &C, const Expr *RetE, } // The return value is known to be null. Emit a bug report. - if (!BT_NullReference) - BT_NullReference.reset(new BugType(this, "Returning null reference")); - - emitBug(C, *BT_NullReference, BT_NullReference->getDescription(), RetE, + emitBug(C, BT_NullReference, BT_NullReference.getDescription(), RetE, bugreporter::getDerefExpr(RetE)); } diff --git a/clang/lib/StaticAnalyzer/Checkers/SimpleStreamChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/SimpleStreamChecker.cpp index 2ac9f65c9793..7cbe271dfbf9 100644 --- a/clang/lib/StaticAnalyzer/Checkers/SimpleStreamChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/SimpleStreamChecker.cpp @@ -52,10 +52,13 @@ class SimpleStreamChecker : public Checker<check::PostCall, check::PreCall, check::DeadSymbols, check::PointerEscape> { - CallDescription OpenFn, CloseFn; + const CallDescription OpenFn{{"fopen"}, 2}; + const CallDescription CloseFn{{"fclose"}, 1}; - std::unique_ptr<BugType> DoubleCloseBugType; - std::unique_ptr<BugType> LeakBugType; + const BugType DoubleCloseBugType{this, "Double fclose", + "Unix Stream API Error"}; + const BugType LeakBugType{this, "Resource Leak", "Unix Stream API Error", + /*SuppressOnSink=*/true}; void reportDoubleClose(SymbolRef FileDescSym, const CallEvent &Call, @@ -67,8 +70,6 @@ class SimpleStreamChecker : public Checker<check::PostCall, bool guaranteedNotToCloseFile(const CallEvent &Call) const; public: - SimpleStreamChecker(); - /// Process fopen. void checkPostCall(const CallEvent &Call, CheckerContext &C) const; /// Process fclose. @@ -89,18 +90,6 @@ public: /// state. Let's store it in the ProgramState. REGISTER_MAP_WITH_PROGRAMSTATE(StreamMap, SymbolRef, StreamState) -SimpleStreamChecker::SimpleStreamChecker() - : OpenFn({"fopen"}, 2), CloseFn({"fclose"}, 1) { - // Initialize the bug types. - DoubleCloseBugType.reset( - new BugType(this, "Double fclose", "Unix Stream API Error")); - - // Sinks are higher importance bugs as well as calls to assert() or exit(0). - LeakBugType.reset( - new BugType(this, "Resource Leak", "Unix Stream API Error", - /*SuppressOnSink=*/true)); -} - void SimpleStreamChecker::checkPostCall(const CallEvent &Call, CheckerContext &C) const { if (!Call.isGlobalCFunction()) @@ -192,7 +181,7 @@ void SimpleStreamChecker::reportDoubleClose(SymbolRef FileDescSym, // Generate the report. auto R = std::make_unique<PathSensitiveBugReport>( - *DoubleCloseBugType, "Closing a previously closed file stream", ErrNode); + DoubleCloseBugType, "Closing a previously closed file stream", ErrNode); R->addRange(Call.getSourceRange()); R->markInteresting(FileDescSym); C.emitReport(std::move(R)); @@ -205,7 +194,7 @@ void SimpleStreamChecker::reportLeaks(ArrayRef<SymbolRef> LeakedStreams, // TODO: Identify the leaked file descriptor. for (SymbolRef LeakedStream : LeakedStreams) { auto R = std::make_unique<PathSensitiveBugReport>( - *LeakBugType, "Opened file is never closed; potential resource leak", + LeakBugType, "Opened file is never closed; potential resource leak", ErrNode); R->markInteresting(LeakedStream); C.emitReport(std::move(R)); diff --git a/clang/lib/StaticAnalyzer/Checkers/StdLibraryFunctionsChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/StdLibraryFunctionsChecker.cpp index fffcaf7ed18f..6560fd239ce6 100644 --- a/clang/lib/StaticAnalyzer/Checkers/StdLibraryFunctionsChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/StdLibraryFunctionsChecker.cpp @@ -823,7 +823,7 @@ class StdLibraryFunctionsChecker using FunctionSummaryMapType = llvm::DenseMap<const FunctionDecl *, Summary>; mutable FunctionSummaryMapType FunctionSummaryMap; - mutable std::unique_ptr<BugType> BT_InvalidArg; + const BugType BT_InvalidArg{this, "Function call with invalid argument"}; mutable bool SummariesInitialized = false; static SVal getArgSVal(const CallEvent &Call, ArgNo ArgN) { @@ -875,11 +875,7 @@ private: VC->describe(ValueConstraint::Violation, Call, C.getState(), Summary, MsgOs); Msg[0] = toupper(Msg[0]); - if (!BT_InvalidArg) - BT_InvalidArg = std::make_unique<BugType>( - CheckName, "Function call with invalid argument", - categories::LogicError); - auto R = std::make_unique<PathSensitiveBugReport>(*BT_InvalidArg, Msg, N); + auto R = std::make_unique<PathSensitiveBugReport>(BT_InvalidArg, Msg, N); for (ArgNo ArgN : VC->getArgsToTrack()) { bugreporter::trackExpressionValue(N, Call.getArgExpr(ArgN), *R); @@ -2244,6 +2240,14 @@ void StdLibraryFunctionsChecker::initFunctionSummaries( .ArgConstraint(NotNull(ArgNo(0))) .ArgConstraint(NotNull(ArgNo(1)))); + // int fflush(FILE *stream); + addToFunctionSummaryMap( + "fflush", Signature(ArgTypes{FilePtrTy}, RetType{IntTy}), + Summary(NoEvalCall) + .Case(ReturnsZero, ErrnoMustNotBeChecked, GenericSuccessMsg) + .Case({ReturnValueCondition(WithinRange, SingleValue(EOFv))}, + ErrnoNEZeroIrrelevant, GenericFailureMsg)); + // long ftell(FILE *stream); // From 'The Open Group Base Specifications Issue 7, 2018 edition': // "The ftell() function shall not change the setting of errno if diff --git a/clang/lib/StaticAnalyzer/Checkers/TaggedUnionModeling.h b/clang/lib/StaticAnalyzer/Checkers/TaggedUnionModeling.h index 557e8a76506e..6de33da107a3 100644 --- a/clang/lib/StaticAnalyzer/Checkers/TaggedUnionModeling.h +++ b/clang/lib/StaticAnalyzer/Checkers/TaggedUnionModeling.h @@ -52,7 +52,7 @@ removeInformationStoredForDeadInstances(const CallEvent &Call, template <class TypeMap> void handleConstructorAndAssignment(const CallEvent &Call, CheckerContext &C, - const SVal &ThisSVal) { + SVal ThisSVal) { ProgramStateRef State = Call.getState(); if (!State) diff --git a/clang/lib/StaticAnalyzer/Checkers/TaintTesterChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/TaintTesterChecker.cpp index 614a2b2e4ec7..acf4e833095b 100644 --- a/clang/lib/StaticAnalyzer/Checkers/TaintTesterChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/TaintTesterChecker.cpp @@ -23,8 +23,7 @@ using namespace taint; namespace { class TaintTesterChecker : public Checker<check::PostStmt<Expr>> { - std::unique_ptr<BugType> BT = - std::make_unique<BugType>(this, "Tainted data", "General"); + const BugType BT{this, "Tainted data", "General"}; public: void checkPostStmt(const Expr *E, CheckerContext &C) const; @@ -39,7 +38,7 @@ void TaintTesterChecker::checkPostStmt(const Expr *E, if (isTainted(State, E, C.getLocationContext())) { if (ExplodedNode *N = C.generateNonFatalErrorNode()) { - auto report = std::make_unique<PathSensitiveBugReport>(*BT, "tainted", N); + auto report = std::make_unique<PathSensitiveBugReport>(BT, "tainted", N); report->addRange(E->getSourceRange()); C.emitReport(std::move(report)); } diff --git a/clang/lib/StaticAnalyzer/Checkers/TestAfterDivZeroChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/TestAfterDivZeroChecker.cpp index 5cdcc1075f44..667b19f8120e 100644 --- a/clang/lib/StaticAnalyzer/Checkers/TestAfterDivZeroChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/TestAfterDivZeroChecker.cpp @@ -78,7 +78,7 @@ public: class TestAfterDivZeroChecker : public Checker<check::PreStmt<BinaryOperator>, check::BranchCondition, check::EndFunction> { - mutable std::unique_ptr<BugType> DivZeroBug; + const BugType DivZeroBug{this, "Division by zero"}; void reportBug(SVal Val, CheckerContext &C) const; public: @@ -165,12 +165,10 @@ bool TestAfterDivZeroChecker::hasDivZeroMap(SVal Var, void TestAfterDivZeroChecker::reportBug(SVal Val, CheckerContext &C) const { if (ExplodedNode *N = C.generateErrorNode(C.getState())) { - if (!DivZeroBug) - DivZeroBug.reset(new BugType(this, "Division by zero")); - auto R = std::make_unique<PathSensitiveBugReport>( - *DivZeroBug, "Value being compared against zero has already been used " - "for division", + DivZeroBug, + "Value being compared against zero has already been used " + "for division", N); R->addVisitor(std::make_unique<DivisionBRVisitor>(Val.getAsSymbol(), diff --git a/clang/lib/StaticAnalyzer/Checkers/UndefBranchChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/UndefBranchChecker.cpp index db886501a162..aa478b69aade 100644 --- a/clang/lib/StaticAnalyzer/Checkers/UndefBranchChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/UndefBranchChecker.cpp @@ -27,7 +27,7 @@ using namespace ento; namespace { class UndefBranchChecker : public Checker<check::BranchCondition> { - mutable std::unique_ptr<BugType> BT; + const BugType BT{this, "Branch condition evaluates to a garbage value"}; struct FindUndefExpr { ProgramStateRef St; @@ -64,52 +64,47 @@ void UndefBranchChecker::checkBranchCondition(const Stmt *Condition, // ObjCForCollection is a loop, but has no actual condition. if (isa<ObjCForCollectionStmt>(Condition)) return; - SVal X = Ctx.getSVal(Condition); - if (X.isUndef()) { - // Generate a sink node, which implicitly marks both outgoing branches as - // infeasible. - ExplodedNode *N = Ctx.generateErrorNode(); - if (N) { - if (!BT) - BT.reset( - new BugType(this, "Branch condition evaluates to a garbage value")); - - // What's going on here: we want to highlight the subexpression of the - // condition that is the most likely source of the "uninitialized - // branch condition." We do a recursive walk of the condition's - // subexpressions and roughly look for the most nested subexpression - // that binds to Undefined. We then highlight that expression's range. - - // Get the predecessor node and check if is a PostStmt with the Stmt - // being the terminator condition. We want to inspect the state - // of that node instead because it will contain main information about - // the subexpressions. - - // Note: any predecessor will do. They should have identical state, - // since all the BlockEdge did was act as an error sink since the value - // had to already be undefined. - assert (!N->pred_empty()); - const Expr *Ex = cast<Expr>(Condition); - ExplodedNode *PrevN = *N->pred_begin(); - ProgramPoint P = PrevN->getLocation(); - ProgramStateRef St = N->getState(); - - if (std::optional<PostStmt> PS = P.getAs<PostStmt>()) - if (PS->getStmt() == Ex) - St = PrevN->getState(); - - FindUndefExpr FindIt(St, Ctx.getLocationContext()); - Ex = FindIt.FindExpr(Ex); - - // Emit the bug report. - auto R = std::make_unique<PathSensitiveBugReport>( - *BT, BT->getDescription(), N); - bugreporter::trackExpressionValue(N, Ex, *R); - R->addRange(Ex->getSourceRange()); - - Ctx.emitReport(std::move(R)); - } - } + if (!Ctx.getSVal(Condition).isUndef()) + return; + + // Generate a sink node, which implicitly marks both outgoing branches as + // infeasible. + ExplodedNode *N = Ctx.generateErrorNode(); + if (!N) + return; + // What's going on here: we want to highlight the subexpression of the + // condition that is the most likely source of the "uninitialized + // branch condition." We do a recursive walk of the condition's + // subexpressions and roughly look for the most nested subexpression + // that binds to Undefined. We then highlight that expression's range. + + // Get the predecessor node and check if is a PostStmt with the Stmt + // being the terminator condition. We want to inspect the state + // of that node instead because it will contain main information about + // the subexpressions. + + // Note: any predecessor will do. They should have identical state, + // since all the BlockEdge did was act as an error sink since the value + // had to already be undefined. + assert(!N->pred_empty()); + const Expr *Ex = cast<Expr>(Condition); + ExplodedNode *PrevN = *N->pred_begin(); + ProgramPoint P = PrevN->getLocation(); + ProgramStateRef St = N->getState(); + + if (std::optional<PostStmt> PS = P.getAs<PostStmt>()) + if (PS->getStmt() == Ex) + St = PrevN->getState(); + + FindUndefExpr FindIt(St, Ctx.getLocationContext()); + Ex = FindIt.FindExpr(Ex); + + // Emit the bug report. + auto R = std::make_unique<PathSensitiveBugReport>(BT, BT.getDescription(), N); + bugreporter::trackExpressionValue(N, Ex, *R); + R->addRange(Ex->getSourceRange()); + + Ctx.emitReport(std::move(R)); } void ento::registerUndefBranchChecker(CheckerManager &mgr) { diff --git a/clang/lib/StaticAnalyzer/Checkers/UndefCapturedBlockVarChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/UndefCapturedBlockVarChecker.cpp index ecb6ed36ee40..2839ef0b6d2e 100644 --- a/clang/lib/StaticAnalyzer/Checkers/UndefCapturedBlockVarChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/UndefCapturedBlockVarChecker.cpp @@ -27,7 +27,7 @@ using namespace ento; namespace { class UndefCapturedBlockVarChecker : public Checker< check::PostStmt<BlockExpr> > { - mutable std::unique_ptr<BugType> BT; + const BugType BT{this, "uninitialized variable captured by block"}; public: void checkPostStmt(const BlockExpr *BE, CheckerContext &C) const; @@ -70,10 +70,6 @@ UndefCapturedBlockVarChecker::checkPostStmt(const BlockExpr *BE, if (std::optional<UndefinedVal> V = state->getSVal(Var.getOriginalRegion()).getAs<UndefinedVal>()) { if (ExplodedNode *N = C.generateErrorNode()) { - if (!BT) - BT.reset( - new BugType(this, "uninitialized variable captured by block")); - // Generate a bug report. SmallString<128> buf; llvm::raw_svector_ostream os(buf); @@ -81,7 +77,7 @@ UndefCapturedBlockVarChecker::checkPostStmt(const BlockExpr *BE, os << "Variable '" << VD->getName() << "' is uninitialized when captured by block"; - auto R = std::make_unique<PathSensitiveBugReport>(*BT, os.str(), N); + auto R = std::make_unique<PathSensitiveBugReport>(BT, os.str(), N); if (const Expr *Ex = FindBlockDeclRefExpr(BE->getBody(), VD)) R->addRange(Ex->getSourceRange()); bugreporter::trackStoredValue(*V, VR, *R, diff --git a/clang/lib/StaticAnalyzer/Checkers/UndefResultChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/UndefResultChecker.cpp index d593a6bd74cf..4b845bb3ded2 100644 --- a/clang/lib/StaticAnalyzer/Checkers/UndefResultChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/UndefResultChecker.cpp @@ -28,7 +28,7 @@ namespace { class UndefResultChecker : public Checker< check::PostStmt<BinaryOperator> > { - mutable std::unique_ptr<BugType> BT; + const BugType BT{this, "Result of operation is garbage or undefined"}; public: void checkPostStmt(const BinaryOperator *B, CheckerContext &C) const; @@ -74,10 +74,6 @@ void UndefResultChecker::checkPostStmt(const BinaryOperator *B, if (!N) return; - if (!BT) - BT.reset( - new BugType(this, "Result of operation is garbage or undefined")); - SmallString<256> sbuf; llvm::raw_svector_ostream OS(sbuf); const Expr *Ex = nullptr; @@ -104,7 +100,7 @@ void UndefResultChecker::checkPostStmt(const BinaryOperator *B, << BinaryOperator::getOpcodeStr(B->getOpcode()) << "' expression is undefined"; } - auto report = std::make_unique<PathSensitiveBugReport>(*BT, OS.str(), N); + auto report = std::make_unique<PathSensitiveBugReport>(BT, OS.str(), N); if (Ex) { report->addRange(Ex->getSourceRange()); bugreporter::trackExpressionValue(N, Ex, *report); diff --git a/clang/lib/StaticAnalyzer/Checkers/UndefinedArraySubscriptChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/UndefinedArraySubscriptChecker.cpp index a6cc8cac8c99..baa07fa66764 100644 --- a/clang/lib/StaticAnalyzer/Checkers/UndefinedArraySubscriptChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/UndefinedArraySubscriptChecker.cpp @@ -24,7 +24,7 @@ using namespace ento; namespace { class UndefinedArraySubscriptChecker : public Checker< check::PreStmt<ArraySubscriptExpr> > { - mutable std::unique_ptr<BugType> BT; + const BugType BT{this, "Array subscript is undefined"}; public: void checkPreStmt(const ArraySubscriptExpr *A, CheckerContext &C) const; @@ -48,11 +48,8 @@ UndefinedArraySubscriptChecker::checkPreStmt(const ArraySubscriptExpr *A, ExplodedNode *N = C.generateErrorNode(); if (!N) return; - if (!BT) - BT.reset(new BugType(this, "Array subscript is undefined")); - // Generate a report for this bug. - auto R = std::make_unique<PathSensitiveBugReport>(*BT, BT->getDescription(), N); + auto R = std::make_unique<PathSensitiveBugReport>(BT, BT.getDescription(), N); R->addRange(A->getIdx()->getSourceRange()); bugreporter::trackExpressionValue(N, A->getIdx(), *R); C.emitReport(std::move(R)); diff --git a/clang/lib/StaticAnalyzer/Checkers/UndefinedAssignmentChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/UndefinedAssignmentChecker.cpp index 49ac94f65dd0..ddc6cc9e8202 100644 --- a/clang/lib/StaticAnalyzer/Checkers/UndefinedAssignmentChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/UndefinedAssignmentChecker.cpp @@ -23,7 +23,7 @@ using namespace ento; namespace { class UndefinedAssignmentChecker : public Checker<check::Bind> { - mutable std::unique_ptr<BugType> BT; + const BugType BT{this, "Assigned value is garbage or undefined"}; public: void checkBind(SVal location, SVal val, const Stmt *S, @@ -49,11 +49,6 @@ void UndefinedAssignmentChecker::checkBind(SVal location, SVal val, if (!N) return; - static const char *const DefaultMsg = - "Assigned value is garbage or undefined"; - if (!BT) - BT.reset(new BugType(this, DefaultMsg)); - // Generate a report for this bug. llvm::SmallString<128> Str; llvm::raw_svector_ostream OS(Str); @@ -105,9 +100,9 @@ void UndefinedAssignmentChecker::checkBind(SVal location, SVal val, } if (OS.str().empty()) - OS << DefaultMsg; + OS << BT.getDescription(); - auto R = std::make_unique<PathSensitiveBugReport>(*BT, OS.str(), N); + auto R = std::make_unique<PathSensitiveBugReport>(BT, OS.str(), N); if (ex) { R->addRange(ex->getSourceRange()); bugreporter::trackExpressionValue(N, ex, *R); diff --git a/clang/lib/StaticAnalyzer/Checkers/UninitializedObject/UninitializedObject.h b/clang/lib/StaticAnalyzer/Checkers/UninitializedObject/UninitializedObject.h index 2fcdd6086309..e35778e6480c 100644 --- a/clang/lib/StaticAnalyzer/Checkers/UninitializedObject/UninitializedObject.h +++ b/clang/lib/StaticAnalyzer/Checkers/UninitializedObject/UninitializedObject.h @@ -299,7 +299,7 @@ private: bool isDereferencableUninit(const FieldRegion *FR, FieldChainInfo LocalChain); /// Returns true if the value of a primitive object is uninitialized. - bool isPrimitiveUninit(const SVal &V); + bool isPrimitiveUninit(SVal V); // Note that we don't have a method for arrays -- the elements of an array are // often left uninitialized intentionally even when it is of a C++ record diff --git a/clang/lib/StaticAnalyzer/Checkers/UninitializedObject/UninitializedObjectChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/UninitializedObject/UninitializedObjectChecker.cpp index 3647c49cf3f9..6e1222fedad3 100644 --- a/clang/lib/StaticAnalyzer/Checkers/UninitializedObject/UninitializedObjectChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/UninitializedObject/UninitializedObjectChecker.cpp @@ -38,15 +38,12 @@ namespace { class UninitializedObjectChecker : public Checker<check::EndFunction, check::DeadSymbols> { - std::unique_ptr<BugType> BT_uninitField; + const BugType BT_uninitField{this, "Uninitialized fields"}; public: // The fields of this struct will be initialized when registering the checker. UninitObjCheckerOptions Opts; - UninitializedObjectChecker() - : BT_uninitField(new BugType(this, "Uninitialized fields")) {} - void checkEndFunction(const ReturnStmt *RS, CheckerContext &C) const; void checkDeadSymbols(SymbolReaper &SR, CheckerContext &C) const; }; @@ -186,7 +183,7 @@ void UninitializedObjectChecker::checkEndFunction( for (const auto &Pair : UninitFields) { auto Report = std::make_unique<PathSensitiveBugReport>( - *BT_uninitField, Pair.second, Node, LocUsedForUniqueing, + BT_uninitField, Pair.second, Node, LocUsedForUniqueing, Node->getLocationContext()->getDecl()); Context.emitReport(std::move(Report)); } @@ -200,7 +197,7 @@ void UninitializedObjectChecker::checkEndFunction( << " at the end of the constructor call"; auto Report = std::make_unique<PathSensitiveBugReport>( - *BT_uninitField, WarningOS.str(), Node, LocUsedForUniqueing, + BT_uninitField, WarningOS.str(), Node, LocUsedForUniqueing, Node->getLocationContext()->getDecl()); for (const auto &Pair : UninitFields) { @@ -379,7 +376,7 @@ bool FindUninitializedFields::isUnionUninit(const TypedValueRegion *R) { return false; } -bool FindUninitializedFields::isPrimitiveUninit(const SVal &V) { +bool FindUninitializedFields::isPrimitiveUninit(SVal V) { if (V.isUndef()) return true; diff --git a/clang/lib/StaticAnalyzer/Checkers/UnixAPIChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/UnixAPIChecker.cpp index f503b3e88bb3..b05ce610067c 100644 --- a/clang/lib/StaticAnalyzer/Checkers/UnixAPIChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/UnixAPIChecker.cpp @@ -11,9 +11,10 @@ // //===----------------------------------------------------------------------===// -#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" #include "clang/Basic/TargetInfo.h" +#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" +#include "clang/StaticAnalyzer/Core/BugReporter/CommonBugCategories.h" #include "clang/StaticAnalyzer/Core/Checker.h" #include "clang/StaticAnalyzer/Core/CheckerManager.h" #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" @@ -39,7 +40,9 @@ enum class OpenVariant { namespace { class UnixAPIMisuseChecker : public Checker< check::PreStmt<CallExpr> > { - mutable std::unique_ptr<BugType> BT_open, BT_pthreadOnce; + const BugType BT_open{this, "Improper use of 'open'", categories::UnixAPI}; + const BugType BT_pthreadOnce{this, "Improper use of 'pthread_once'", + categories::UnixAPI}; mutable std::optional<uint64_t> Val_O_CREAT; public: @@ -64,7 +67,9 @@ public: void checkPreStmt(const CallExpr *CE, CheckerContext &C) const; private: - mutable std::unique_ptr<BugType> BT_mallocZero; + const BugType BT_mallocZero{ + this, "Undefined allocation of 0 bytes (CERT MEM04-C; CWE-131)", + categories::UnixAPI}; void CheckCallocZero(CheckerContext &C, const CallExpr *CE) const; void CheckMallocZero(CheckerContext &C, const CallExpr *CE) const; @@ -87,14 +92,6 @@ private: } //end anonymous namespace -static void LazyInitialize(const CheckerBase *Checker, - std::unique_ptr<BugType> &BT, - const char *name) { - if (BT) - return; - BT.reset(new BugType(Checker, name, categories::UnixAPI)); -} - //===----------------------------------------------------------------------===// // "open" (man 2 open) //===----------------------------------------------------------------------===/ @@ -132,9 +129,7 @@ void UnixAPIMisuseChecker::ReportOpenBug(CheckerContext &C, if (!N) return; - LazyInitialize(this, BT_open, "Improper use of 'open'"); - - auto Report = std::make_unique<PathSensitiveBugReport>(*BT_open, Msg, N); + auto Report = std::make_unique<PathSensitiveBugReport>(BT_open, Msg, N); Report->addRange(SR); C.emitReport(std::move(Report)); } @@ -301,10 +296,8 @@ void UnixAPIMisuseChecker::CheckPthreadOnce(CheckerContext &C, if (isa<VarRegion>(R) && isa<StackLocalsSpaceRegion>(R->getMemorySpace())) os << " Perhaps you intended to declare the variable as 'static'?"; - LazyInitialize(this, BT_pthreadOnce, "Improper use of 'pthread_once'"); - auto report = - std::make_unique<PathSensitiveBugReport>(*BT_pthreadOnce, os.str(), N); + std::make_unique<PathSensitiveBugReport>(BT_pthreadOnce, os.str(), N); report->addRange(CE->getArg(0)->getSourceRange()); C.emitReport(std::move(report)); } @@ -341,14 +334,11 @@ bool UnixAPIPortabilityChecker::ReportZeroByteAllocation( if (!N) return false; - LazyInitialize(this, BT_mallocZero, - "Undefined allocation of 0 bytes (CERT MEM04-C; CWE-131)"); - SmallString<256> S; llvm::raw_svector_ostream os(S); os << "Call to '" << fn_name << "' has an allocation size of 0 bytes"; auto report = - std::make_unique<PathSensitiveBugReport>(*BT_mallocZero, os.str(), N); + std::make_unique<PathSensitiveBugReport>(BT_mallocZero, os.str(), N); report->addRange(arg->getSourceRange()); bugreporter::trackExpressionValue(N, arg, *report); diff --git a/clang/lib/StaticAnalyzer/Checkers/VLASizeChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/VLASizeChecker.cpp index 1d03d1656b3c..d76fe4991869 100644 --- a/clang/lib/StaticAnalyzer/Checkers/VLASizeChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/VLASizeChecker.cpp @@ -34,8 +34,10 @@ namespace { class VLASizeChecker : public Checker<check::PreStmt<DeclStmt>, check::PreStmt<UnaryExprOrTypeTraitExpr>> { - mutable std::unique_ptr<BugType> BT; - mutable std::unique_ptr<BugType> TaintBT; + const BugType BT{this, "Dangerous variable-length array (VLA) declaration"}; + const BugType TaintBT{this, + "Dangerous variable-length array (VLA) declaration", + categories::TaintedData}; enum VLASize_Kind { VLA_Garbage, VLA_Zero, VLA_Negative, VLA_Overflow }; /// Check a VLA for validity. @@ -213,17 +215,12 @@ void VLASizeChecker::reportTaintBug(const Expr *SizeE, ProgramStateRef State, if (!N) return; - if (!TaintBT) - TaintBT.reset( - new BugType(this, "Dangerous variable-length array (VLA) declaration", - categories::TaintedData)); - SmallString<256> buf; llvm::raw_svector_ostream os(buf); os << "Declared variable-length array (VLA) "; os << "has tainted size"; - auto report = std::make_unique<PathSensitiveBugReport>(*TaintBT, os.str(), N); + auto report = std::make_unique<PathSensitiveBugReport>(TaintBT, os.str(), N); report->addRange(SizeE->getSourceRange()); bugreporter::trackExpressionValue(N, SizeE, *report); // The vla size may be a complex expression where multiple memory locations @@ -240,11 +237,6 @@ void VLASizeChecker::reportBug(VLASize_Kind Kind, const Expr *SizeE, if (!N) return; - if (!BT) - BT.reset(new BugType(this, - "Dangerous variable-length array (VLA) declaration", - categories::LogicError)); - SmallString<256> buf; llvm::raw_svector_ostream os(buf); os << "Declared variable-length array (VLA) "; @@ -263,7 +255,7 @@ void VLASizeChecker::reportBug(VLASize_Kind Kind, const Expr *SizeE, break; } - auto report = std::make_unique<PathSensitiveBugReport>(*BT, os.str(), N); + auto report = std::make_unique<PathSensitiveBugReport>(BT, os.str(), N); report->addRange(SizeE->getSourceRange()); bugreporter::trackExpressionValue(N, SizeE, *report); C.emitReport(std::move(report)); diff --git a/clang/lib/StaticAnalyzer/Checkers/VforkChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/VforkChecker.cpp index 8a1e02748c9b..cb73ac68edd1 100644 --- a/clang/lib/StaticAnalyzer/Checkers/VforkChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/VforkChecker.cpp @@ -44,7 +44,7 @@ namespace { class VforkChecker : public Checker<check::PreCall, check::PostCall, check::Bind, check::PreStmt<ReturnStmt>> { - mutable std::unique_ptr<BugType> BT; + const BugType BT{this, "Dangerous construct in a vforked process"}; mutable llvm::SmallSet<const IdentifierInfo *, 10> VforkAllowlist; mutable const IdentifierInfo *II_vfork = nullptr; @@ -123,9 +123,6 @@ bool VforkChecker::isCallExplicitelyAllowed(const IdentifierInfo *II, void VforkChecker::reportBug(const char *What, CheckerContext &C, const char *Details) const { if (ExplodedNode *N = C.generateErrorNode(C.getState())) { - if (!BT) - BT.reset(new BugType(this, "Dangerous construct in a vforked process")); - SmallString<256> buf; llvm::raw_svector_ostream os(buf); @@ -134,7 +131,7 @@ void VforkChecker::reportBug(const char *What, CheckerContext &C, if (Details) os << "; " << Details; - auto Report = std::make_unique<PathSensitiveBugReport>(*BT, os.str(), N); + auto Report = std::make_unique<PathSensitiveBugReport>(BT, os.str(), N); // TODO: mark vfork call in BugReportVisitor C.emitReport(std::move(Report)); } diff --git a/clang/lib/StaticAnalyzer/Core/CommonBugCategories.cpp b/clang/lib/StaticAnalyzer/Core/CommonBugCategories.cpp index e9cc080caf5f..66fab523c864 100644 --- a/clang/lib/StaticAnalyzer/Core/CommonBugCategories.cpp +++ b/clang/lib/StaticAnalyzer/Core/CommonBugCategories.cpp @@ -13,6 +13,7 @@ namespace clang { namespace ento { namespace categories { +const char *const AppleAPIMisuse = "API Misuse (Apple)"; const char *const CoreFoundationObjectiveC = "Core Foundation/Objective-C"; const char *const LogicError = "Logic error"; const char *const MemoryRefCount = diff --git a/clang/lib/StaticAnalyzer/Core/Environment.cpp b/clang/lib/StaticAnalyzer/Core/Environment.cpp index 0102f743c911..4f989ed59bee 100644 --- a/clang/lib/StaticAnalyzer/Core/Environment.cpp +++ b/clang/lib/StaticAnalyzer/Core/Environment.cpp @@ -193,7 +193,7 @@ EnvironmentManager::removeDeadBindings(Environment Env, // Iterate over the block-expr bindings. for (Environment::iterator I = Env.begin(), End = Env.end(); I != End; ++I) { const EnvironmentEntry &BlkExpr = I.getKey(); - const SVal &X = I.getData(); + SVal X = I.getData(); const Expr *E = dyn_cast<Expr>(BlkExpr.getStmt()); if (!E) diff --git a/clang/lib/StaticAnalyzer/Core/RegionStore.cpp b/clang/lib/StaticAnalyzer/Core/RegionStore.cpp index c773cef30d5e..da9a1a1a4d1f 100644 --- a/clang/lib/StaticAnalyzer/Core/RegionStore.cpp +++ b/clang/lib/StaticAnalyzer/Core/RegionStore.cpp @@ -2016,7 +2016,7 @@ std::optional<SVal> RegionStoreManager::getBindingForDerivedDefaultValue( const TypedValueRegion *R, QualType Ty) { if (const std::optional<SVal> &D = B.getDefaultBinding(superR)) { - const SVal &val = *D; + SVal val = *D; if (SymbolRef parentSym = val.getAsSymbol()) return svalBuilder.getDerivedRegionValueSymbolVal(parentSym, R); @@ -2331,7 +2331,7 @@ bool RegionStoreManager::includedInBindings(Store store, const ClusterBindings &Cluster = RI.getData(); for (ClusterBindings::iterator CI = Cluster.begin(), CE = Cluster.end(); CI != CE; ++CI) { - const SVal &D = CI.getData(); + SVal D = CI.getData(); if (const MemRegion *R = D.getAsRegion()) if (R->getBaseRegion() == region) return true; @@ -2500,7 +2500,7 @@ RegionStoreManager::bindArray(RegionBindingsConstRef B, if (VI == VE) break; - const NonLoc &Idx = svalBuilder.makeArrayIndex(i); + NonLoc Idx = svalBuilder.makeArrayIndex(i); const ElementRegion *ER = MRMgr.getElementRegion(ElementTy, Idx, R, Ctx); if (ElementTy->isStructureOrClassType()) diff --git a/clang/lib/Support/RISCVVIntrinsicUtils.cpp b/clang/lib/Support/RISCVVIntrinsicUtils.cpp index bf47461b59e0..2de977a3dc72 100644 --- a/clang/lib/Support/RISCVVIntrinsicUtils.cpp +++ b/clang/lib/Support/RISCVVIntrinsicUtils.cpp @@ -203,7 +203,7 @@ void RVVType::initBuiltinStr() { } break; case ScalarTypeKind::BFloat: - BuiltinStr += "b"; + BuiltinStr += "y"; break; default: llvm_unreachable("ScalarType is invalid!"); diff --git a/clang/lib/Tooling/Refactoring/Lookup.cpp b/clang/lib/Tooling/Refactoring/Lookup.cpp index 52799f16fab2..757fba0404e6 100644 --- a/clang/lib/Tooling/Refactoring/Lookup.cpp +++ b/clang/lib/Tooling/Refactoring/Lookup.cpp @@ -98,8 +98,8 @@ static StringRef getBestNamespaceSubstr(const DeclContext *DeclA, // from NewName if it has an identical prefix. std::string NS = "::" + cast<NamespaceDecl>(DeclA)->getQualifiedNameAsString() + "::"; - if (NewName.starts_with(NS)) - return NewName.substr(NS.size()); + if (NewName.consume_front(NS)) + return NewName; // No match yet. Strip of a namespace from the end of the chain and try // again. This allows to get optimal qualifications even if the old and new diff --git a/clang/lib/Tooling/Tooling.cpp b/clang/lib/Tooling/Tooling.cpp index 33bfa8d3d81f..d192c7f42939 100644 --- a/clang/lib/Tooling/Tooling.cpp +++ b/clang/lib/Tooling/Tooling.cpp @@ -255,9 +255,7 @@ llvm::Expected<std::string> getAbsolutePath(llvm::vfs::FileSystem &FS, StringRef File) { StringRef RelativePath(File); // FIXME: Should '.\\' be accepted on Win32? - if (RelativePath.starts_with("./")) { - RelativePath = RelativePath.substr(strlen("./")); - } + RelativePath.consume_front("./"); SmallString<1024> AbsolutePath = RelativePath; if (auto EC = FS.makeAbsolute(AbsolutePath)) diff --git a/clang/tools/clang-format/ClangFormat.cpp b/clang/tools/clang-format/ClangFormat.cpp index d2e3d8d43aef..be34dbbe886a 100644 --- a/clang/tools/clang-format/ClangFormat.cpp +++ b/clang/tools/clang-format/ClangFormat.cpp @@ -12,6 +12,7 @@ /// //===----------------------------------------------------------------------===// +#include "../../lib/Format/MatchFilePath.h" #include "clang/Basic/Diagnostic.h" #include "clang/Basic/DiagnosticOptions.h" #include "clang/Basic/FileManager.h" @@ -570,6 +571,69 @@ static int dumpConfig(bool IsSTDIN) { return 0; } +// Check whether `FilePath` is ignored according to the nearest +// .clang-format-ignore file based on the rules below: +// - A blank line is skipped. +// - Leading and trailing spaces of a line are trimmed. +// - A line starting with a hash (`#`) is a comment. +// - A non-comment line is a single pattern. +// - The slash (`/`) is used as the directory separator. +// - A pattern is relative to the directory of the .clang-format-ignore file (or +// the root directory if the pattern starts with a slash). +// - A pattern is negated if it starts with a bang (`!`). +static bool isIgnored(StringRef FilePath) { + using namespace llvm::sys::fs; + if (!is_regular_file(FilePath)) + return false; + + using namespace llvm::sys::path; + SmallString<128> Path, AbsPath{FilePath}; + + make_absolute(AbsPath); + remove_dots(AbsPath, /*remove_dot_dot=*/true); + + StringRef IgnoreDir{AbsPath}; + do { + IgnoreDir = parent_path(IgnoreDir); + if (IgnoreDir.empty()) + return false; + + Path = IgnoreDir; + append(Path, ".clang-format-ignore"); + } while (!is_regular_file(Path)); + + std::ifstream IgnoreFile{Path.c_str()}; + if (!IgnoreFile.good()) + return false; + + const auto Pathname = convert_to_slash(AbsPath); + for (std::string Line; std::getline(IgnoreFile, Line);) { + auto Pattern = StringRef(Line).trim(); + if (Pattern.empty() || Pattern[0] == '#') + continue; + + const bool IsNegated = Pattern[0] == '!'; + if (IsNegated) + Pattern = Pattern.drop_front(); + + if (Pattern.empty()) + continue; + + Pattern = Pattern.ltrim(); + if (Pattern[0] != '/') { + Path = convert_to_slash(IgnoreDir); + append(Path, Style::posix, Pattern); + remove_dots(Path, /*remove_dot_dot=*/true, Style::posix); + Pattern = Path.str(); + } + + if (clang::format::matchFilePath(Pattern, Pathname) == !IsNegated) + return true; + } + + return false; +} + int main(int argc, const char **argv) { llvm::InitLLVM X(argc, argv); @@ -618,11 +682,14 @@ int main(int argc, const char **argv) { unsigned FileNo = 1; bool Error = false; for (const auto &FileName : FileNames) { + const bool IsSTDIN = FileName == "-"; + if (!IsSTDIN && isIgnored(FileName)) + continue; if (Verbose) { errs() << "Formatting [" << FileNo++ << "/" << FileNames.size() << "] " << FileName << "\n"; } - Error |= clang::format::format(FileName, FileName == "-"); + Error |= clang::format::format(FileName, IsSTDIN); } return Error ? 1 : 0; } diff --git a/clang/utils/TableGen/RISCVVEmitter.cpp b/clang/utils/TableGen/RISCVVEmitter.cpp index da2a885ce851..d570bcae8d86 100644 --- a/clang/utils/TableGen/RISCVVEmitter.cpp +++ b/clang/utils/TableGen/RISCVVEmitter.cpp @@ -151,7 +151,7 @@ static BasicType ParseBasicType(char c) { case 'd': return BasicType::Float64; break; - case 'b': + case 'y': return BasicType::BFloat16; break; default: diff --git a/clang/utils/TableGen/SveEmitter.cpp b/clang/utils/TableGen/SveEmitter.cpp index 311c6b09dc79..6c302da106a2 100644 --- a/clang/utils/TableGen/SveEmitter.cpp +++ b/clang/utils/TableGen/SveEmitter.cpp @@ -1603,6 +1603,25 @@ void SVEEmitter::createSMEHeader(raw_ostream &OS) { OS << "extern \"C\" {\n"; OS << "#endif\n\n"; + OS << "void __arm_za_disable(void) __arm_streaming_compatible;\n\n"; + + OS << "__ai bool __arm_has_sme(void) __arm_streaming_compatible {\n"; + OS << " uint64_t x0, x1;\n"; + OS << " __builtin_arm_get_sme_state(&x0, &x1);\n"; + OS << " return x0 & (1ULL << 63);\n"; + OS << "}\n\n"; + + OS << "__ai bool __arm_in_streaming_mode(void) __arm_streaming_compatible " + "{\n"; + OS << " uint64_t x0, x1;\n"; + OS << " __builtin_arm_get_sme_state(&x0, &x1);\n"; + OS << " return x0 & 1;\n"; + OS << "}\n\n"; + + OS << "__ai __attribute__((target(\"sme\"))) void svundef_za(void) " + "__arm_streaming_compatible __arm_shared_za " + "{ }\n\n"; + createCoreHeaderIntrinsics(OS, *this, ACLEKind::SME); OS << "#ifdef __cplusplus\n"; diff --git a/compiler-rt/lib/asan/asan_linux.cpp b/compiler-rt/lib/asan/asan_linux.cpp index e19b4479aaf3..37d3bad1b1ec 100644 --- a/compiler-rt/lib/asan/asan_linux.cpp +++ b/compiler-rt/lib/asan/asan_linux.cpp @@ -33,7 +33,6 @@ # include "asan_premap_shadow.h" # include "asan_thread.h" # include "sanitizer_common/sanitizer_flags.h" -# include "sanitizer_common/sanitizer_freebsd.h" # include "sanitizer_common/sanitizer_hash.h" # include "sanitizer_common/sanitizer_libc.h" # include "sanitizer_common/sanitizer_procmaps.h" @@ -59,13 +58,6 @@ extern Elf_Dyn _DYNAMIC; extern ElfW(Dyn) _DYNAMIC[]; # endif -// x86-64 FreeBSD 9.2 and older define 'ucontext_t' incorrectly in -// 32-bit mode. -# if SANITIZER_FREEBSD && (SANITIZER_WORDSIZE == 32) && \ - __FreeBSD_version <= 902001 // v9.2 -# define ucontext_t xucontext_t -# endif - typedef enum { ASAN_RT_VERSION_UNDEFINED = 0, ASAN_RT_VERSION_DYNAMIC, @@ -148,6 +140,11 @@ static int FindFirstDSOCallback(struct dl_phdr_info *info, size_t size, internal_strncmp(info->dlpi_name, "linux-", sizeof("linux-") - 1) == 0) return 0; # endif +# if SANITIZER_FREEBSD + // Ignore vDSO. + if (internal_strcmp(info->dlpi_name, "[vdso]") == 0) + return 0; +# endif *name = info->dlpi_name; return 1; diff --git a/compiler-rt/lib/asan/asan_new_delete.cpp b/compiler-rt/lib/asan/asan_new_delete.cpp index 17280129c758..b5b1ced8ac5e 100644 --- a/compiler-rt/lib/asan/asan_new_delete.cpp +++ b/compiler-rt/lib/asan/asan_new_delete.cpp @@ -48,15 +48,6 @@ COMMENT_EXPORT("??_V@YAXPAX@Z") // operator delete[] using namespace __asan; -// FreeBSD prior v9.2 have wrong definition of 'size_t'. -// http://svnweb.freebsd.org/base?view=revision&revision=232261 -#if SANITIZER_FREEBSD && SANITIZER_WORDSIZE == 32 -#include <sys/param.h> -#if __FreeBSD_version <= 902001 // v9.2 -#define size_t unsigned -#endif // __FreeBSD_version -#endif // SANITIZER_FREEBSD && SANITIZER_WORDSIZE == 32 - // This code has issues on OSX. // See https://github.com/google/sanitizers/issues/131. diff --git a/compiler-rt/lib/builtins/cpu_model/aarch64.c b/compiler-rt/lib/builtins/cpu_model/aarch64.c index 8e85de2218f7..44e1cf49d1e9 100644 --- a/compiler-rt/lib/builtins/cpu_model/aarch64.c +++ b/compiler-rt/lib/builtins/cpu_model/aarch64.c @@ -34,6 +34,9 @@ _Bool __aarch64_have_lse_atomics __attribute__((visibility("hidden"), nocommon)) = false; #if defined(__FreeBSD__) +// clang-format off: should not reorder sys/auxv.h alphabetically +#include <sys/auxv.h> +// clang-format on #include "aarch64/hwcap.inc" #include "aarch64/lse_atomics/freebsd.inc" #elif defined(__Fuchsia__) diff --git a/compiler-rt/lib/builtins/cpu_model/x86.c b/compiler-rt/lib/builtins/cpu_model/x86.c index c6a917715e12..9d9a5d3f1542 100644 --- a/compiler-rt/lib/builtins/cpu_model/x86.c +++ b/compiler-rt/lib/builtins/cpu_model/x86.c @@ -676,7 +676,7 @@ static const char *getAMDProcessorTypeAndSubtype(unsigned Family, case 25: CPU = "znver3"; *Type = AMDFAM19H; - if ((Model >= 0x00 && Model <= 0x0f) || (Model >= 0x20 && Model <= 0x2f) || + if ((Model <= 0x0f) || (Model >= 0x20 && Model <= 0x2f) || (Model >= 0x30 && Model <= 0x3f) || (Model >= 0x40 && Model <= 0x4f) || (Model >= 0x50 && Model <= 0x5f)) { // Family 19h Models 00h-0Fh (Genesis, Chagall) Zen 3 diff --git a/compiler-rt/lib/builtins/fp_lib.h b/compiler-rt/lib/builtins/fp_lib.h index 43bbdd5f8736..af406e760497 100644 --- a/compiler-rt/lib/builtins/fp_lib.h +++ b/compiler-rt/lib/builtins/fp_lib.h @@ -26,18 +26,6 @@ #include <stdbool.h> #include <stdint.h> -// x86_64 FreeBSD prior v9.3 define fixed-width types incorrectly in -// 32-bit mode. -#if defined(__FreeBSD__) && defined(__i386__) -#include <sys/param.h> -#if __FreeBSD_version < 903000 // v9.3 -#define uint64_t unsigned long long -#define int64_t long long -#undef UINT64_C -#define UINT64_C(c) (c##ULL) -#endif -#endif - #if defined SINGLE_PRECISION typedef uint16_t half_rep_t; diff --git a/compiler-rt/lib/builtins/int_types.h b/compiler-rt/lib/builtins/int_types.h index 18bf0a7f3bf9..7624c7280615 100644 --- a/compiler-rt/lib/builtins/int_types.h +++ b/compiler-rt/lib/builtins/int_types.h @@ -139,7 +139,6 @@ typedef union { udwords u; double f; } double_bits; -#endif typedef struct { #if _YUGA_LITTLE_ENDIAN @@ -220,7 +219,6 @@ typedef union { #define CRT_HAS_TF_MODE #endif -#if CRT_HAS_FLOATING_POINT #if __STDC_VERSION__ >= 199901L typedef float _Complex Fcomplex; typedef double _Complex Dcomplex; @@ -270,5 +268,5 @@ typedef struct { #define COMPLEXTF_IMAGINARY(x) (x).imaginary #endif -#endif +#endif // CRT_HAS_FLOATING_POINT #endif // INT_TYPES_H diff --git a/compiler-rt/lib/hwasan/hwasan_report.cpp b/compiler-rt/lib/hwasan/hwasan_report.cpp index 5b3a99adfea7..1a018a891b56 100644 --- a/compiler-rt/lib/hwasan/hwasan_report.cpp +++ b/compiler-rt/lib/hwasan/hwasan_report.cpp @@ -260,16 +260,15 @@ static void PrintStackAllocations(const StackAllocationsRingBuffer *sa, Printf("Cause: %s\n", cause); Printf("%s", d.Default()); Printf("%s", d.Location()); - Printf("%p is located %zd bytes %s a %zd-byte region [%p,%p)\n", - untagged_addr, offset, whence, local_end - local_beg, local_beg, - local_end); - Printf("%s", d.Allocation()); StackTracePrinter::GetOrInit()->RenderSourceLocation( &location, local.decl_file, local.decl_line, /* column= */ 0, common_flags()->symbolize_vs_style, common_flags()->strip_path_prefix); - Printf(" %s in %s %s\n", local.name, local.function_name, - location.data()); + Printf( + "%p is located %zd bytes %s a %zd-byte local variable %s [%p,%p) " + "in %s %s\n", + untagged_addr, offset, whence, local_end - local_beg, local.name, + local_beg, local_end, local.function_name, location.data()); location.clear(); Printf("%s\n", d.Default()); } diff --git a/compiler-rt/lib/memprof/memprof_linux.cpp b/compiler-rt/lib/memprof/memprof_linux.cpp index fcd927023f5c..fcb6f662a82e 100644 --- a/compiler-rt/lib/memprof/memprof_linux.cpp +++ b/compiler-rt/lib/memprof/memprof_linux.cpp @@ -20,7 +20,6 @@ #include "memprof_internal.h" #include "memprof_thread.h" #include "sanitizer_common/sanitizer_flags.h" -#include "sanitizer_common/sanitizer_freebsd.h" #include "sanitizer_common/sanitizer_libc.h" #include "sanitizer_common/sanitizer_procmaps.h" diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc b/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc index ba4670751697..77fa1b4965a7 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc +++ b/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc @@ -6785,7 +6785,7 @@ INTERCEPTOR(SSIZE_T, sendto, int fd, void *buf, SIZE_T len, int flags, #endif #if SANITIZER_INTERCEPT_EVENTFD_READ_WRITE -INTERCEPTOR(int, eventfd_read, int fd, u64 *value) { +INTERCEPTOR(int, eventfd_read, int fd, __sanitizer_eventfd_t *value) { void *ctx; COMMON_INTERCEPTOR_ENTER(ctx, eventfd_read, fd, value); COMMON_INTERCEPTOR_FD_ACCESS(ctx, fd); @@ -6796,7 +6796,7 @@ INTERCEPTOR(int, eventfd_read, int fd, u64 *value) { } return res; } -INTERCEPTOR(int, eventfd_write, int fd, u64 value) { +INTERCEPTOR(int, eventfd_write, int fd, __sanitizer_eventfd_t value) { void *ctx; COMMON_INTERCEPTOR_ENTER(ctx, eventfd_write, fd, value); if (fd >= 0) { diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_flat_map.h b/compiler-rt/lib/sanitizer_common/sanitizer_flat_map.h index 8bb8304910c7..d246781fe1df 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_flat_map.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_flat_map.h @@ -109,6 +109,10 @@ class TwoLevelMap { return *AddressSpaceView::LoadWritable(&map2[idx % kSize2]); } + void Lock() SANITIZER_NO_THREAD_SAFETY_ANALYSIS { mu_.Lock(); } + + void Unlock() SANITIZER_NO_THREAD_SAFETY_ANALYSIS { mu_.Unlock(); } + private: constexpr uptr MmapSize() const { return RoundUpTo(kSize2 * sizeof(T), GetPageSizeCached()); diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_freebsd.h b/compiler-rt/lib/sanitizer_common/sanitizer_freebsd.h deleted file mode 100644 index 82b227eab6da..000000000000 --- a/compiler-rt/lib/sanitizer_common/sanitizer_freebsd.h +++ /dev/null @@ -1,137 +0,0 @@ -//===-- sanitizer_freebsd.h -------------------------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file is a part of Sanitizer runtime. It contains FreeBSD-specific -// definitions. -// -//===----------------------------------------------------------------------===// - -#ifndef SANITIZER_FREEBSD_H -#define SANITIZER_FREEBSD_H - -#include "sanitizer_internal_defs.h" - -// x86-64 FreeBSD 9.2 and older define 'ucontext_t' incorrectly in -// 32-bit mode. -#if SANITIZER_FREEBSD && (SANITIZER_WORDSIZE == 32) -#include <osreldate.h> -#if __FreeBSD_version <= 902001 // v9.2 -#include <link.h> -#include <sys/param.h> -#include <ucontext.h> - -namespace __sanitizer { - -typedef unsigned long long __xuint64_t; - -typedef __int32_t __xregister_t; - -typedef struct __xmcontext { - __xregister_t mc_onstack; - __xregister_t mc_gs; - __xregister_t mc_fs; - __xregister_t mc_es; - __xregister_t mc_ds; - __xregister_t mc_edi; - __xregister_t mc_esi; - __xregister_t mc_ebp; - __xregister_t mc_isp; - __xregister_t mc_ebx; - __xregister_t mc_edx; - __xregister_t mc_ecx; - __xregister_t mc_eax; - __xregister_t mc_trapno; - __xregister_t mc_err; - __xregister_t mc_eip; - __xregister_t mc_cs; - __xregister_t mc_eflags; - __xregister_t mc_esp; - __xregister_t mc_ss; - - int mc_len; - int mc_fpformat; - int mc_ownedfp; - __xregister_t mc_flags; - - int mc_fpstate[128] __aligned(16); - __xregister_t mc_fsbase; - __xregister_t mc_gsbase; - __xregister_t mc_xfpustate; - __xregister_t mc_xfpustate_len; - - int mc_spare2[4]; -} xmcontext_t; - -typedef struct __xucontext { - sigset_t uc_sigmask; - xmcontext_t uc_mcontext; - - struct __ucontext *uc_link; - stack_t uc_stack; - int uc_flags; - int __spare__[4]; -} xucontext_t; - -struct xkinfo_vmentry { - int kve_structsize; - int kve_type; - __xuint64_t kve_start; - __xuint64_t kve_end; - __xuint64_t kve_offset; - __xuint64_t kve_vn_fileid; - __uint32_t kve_vn_fsid; - int kve_flags; - int kve_resident; - int kve_private_resident; - int kve_protection; - int kve_ref_count; - int kve_shadow_count; - int kve_vn_type; - __xuint64_t kve_vn_size; - __uint32_t kve_vn_rdev; - __uint16_t kve_vn_mode; - __uint16_t kve_status; - int _kve_ispare[12]; - char kve_path[PATH_MAX]; -}; - -typedef struct { - __uint32_t p_type; - __uint32_t p_offset; - __uint32_t p_vaddr; - __uint32_t p_paddr; - __uint32_t p_filesz; - __uint32_t p_memsz; - __uint32_t p_flags; - __uint32_t p_align; -} XElf32_Phdr; - -struct xdl_phdr_info { - Elf_Addr dlpi_addr; - const char *dlpi_name; - const XElf32_Phdr *dlpi_phdr; - Elf_Half dlpi_phnum; - unsigned long long int dlpi_adds; - unsigned long long int dlpi_subs; - size_t dlpi_tls_modid; - void *dlpi_tls_data; -}; - -typedef int (*__xdl_iterate_hdr_callback)(struct xdl_phdr_info *, size_t, - void *); -typedef int xdl_iterate_phdr_t(__xdl_iterate_hdr_callback, void *); - -#define xdl_iterate_phdr(callback, param) \ - (((xdl_iterate_phdr_t *)dl_iterate_phdr)((callback), (param))) - -} // namespace __sanitizer - -#endif // __FreeBSD_version <= 902001 -#endif // SANITIZER_FREEBSD && (SANITIZER_WORDSIZE == 32) - -#endif // SANITIZER_FREEBSD_H diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp index 841d7c096292..5d2dd3a7a658 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp @@ -58,7 +58,6 @@ # include <sched.h> # include <signal.h> # include <sys/mman.h> -# include <sys/param.h> # if !SANITIZER_SOLARIS # include <sys/ptrace.h> # endif @@ -136,9 +135,7 @@ const int FUTEX_WAKE_PRIVATE = FUTEX_WAKE | FUTEX_PRIVATE_FLAG; # define SANITIZER_LINUX_USES_64BIT_SYSCALLS 0 # endif -// Note : FreeBSD had implemented both -// Linux apis, available from -// future 12.x version most likely +// Note : FreeBSD implemented both Linux and OpenBSD apis. # if SANITIZER_LINUX && defined(__NR_getrandom) # if !defined(GRND_NONBLOCK) # define GRND_NONBLOCK 1 @@ -148,10 +145,8 @@ const int FUTEX_WAKE_PRIVATE = FUTEX_WAKE | FUTEX_PRIVATE_FLAG; # define SANITIZER_USE_GETRANDOM 0 # endif // SANITIZER_LINUX && defined(__NR_getrandom) -# if SANITIZER_FREEBSD && __FreeBSD_version >= 1200000 +# if SANITIZER_FREEBSD # define SANITIZER_USE_GETENTROPY 1 -# else -# define SANITIZER_USE_GETENTROPY 0 # endif namespace __sanitizer { diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_linux_libcdep.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_linux_libcdep.cpp index 8e942b69e6a7..cccbb4d256df 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_linux_libcdep.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_linux_libcdep.cpp @@ -21,7 +21,6 @@ # include "sanitizer_common.h" # include "sanitizer_file.h" # include "sanitizer_flags.h" -# include "sanitizer_freebsd.h" # include "sanitizer_getauxval.h" # include "sanitizer_glibc_version.h" # include "sanitizer_linux.h" @@ -46,7 +45,6 @@ # endif # if SANITIZER_FREEBSD -# include <osreldate.h> # include <pthread_np.h> # include <sys/auxv.h> # include <sys/sysctl.h> @@ -629,11 +627,7 @@ void GetThreadStackAndTls(bool main, uptr *stk_addr, uptr *stk_size, # if !SANITIZER_FREEBSD typedef ElfW(Phdr) Elf_Phdr; -# elif SANITIZER_WORDSIZE == 32 && __FreeBSD_version <= 902001 // v9.2 -# define Elf_Phdr XElf32_Phdr -# define dl_phdr_info xdl_phdr_info -# define dl_iterate_phdr(c, b) xdl_iterate_phdr((c), (b)) -# endif // !SANITIZER_FREEBSD +# endif struct DlIteratePhdrData { InternalMmapVectorNoCtor<LoadedModule> *modules; diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h b/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h index 8c7c00de6d12..289ae661c343 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h @@ -301,7 +301,8 @@ #define SANITIZER_INTERCEPT_CANONICALIZE_FILE_NAME (SI_GLIBC || SI_SOLARIS) #define SANITIZER_INTERCEPT_CONFSTR \ (SI_FREEBSD || SI_NETBSD || SI_MAC || SI_LINUX_NOT_ANDROID || SI_SOLARIS) -#define SANITIZER_INTERCEPT_SCHED_GETAFFINITY SI_LINUX_NOT_ANDROID +#define SANITIZER_INTERCEPT_SCHED_GETAFFINITY \ + (SI_LINUX_NOT_ANDROID || SI_FREEBSD) #define SANITIZER_INTERCEPT_SCHED_GETPARAM SI_LINUX_NOT_ANDROID || SI_SOLARIS #define SANITIZER_INTERCEPT_STRERROR SI_POSIX #define SANITIZER_INTERCEPT_STRERROR_R SI_POSIX @@ -462,7 +463,7 @@ (SI_LINUX || SI_MAC || SI_WINDOWS || SI_FREEBSD || SI_NETBSD || SI_SOLARIS) #define SANITIZER_INTERCEPT_RECV_RECVFROM SI_POSIX #define SANITIZER_INTERCEPT_SEND_SENDTO SI_POSIX -#define SANITIZER_INTERCEPT_EVENTFD_READ_WRITE SI_LINUX +#define SANITIZER_INTERCEPT_EVENTFD_READ_WRITE (SI_LINUX || SI_FREEBSD) #define SI_STAT_LINUX (SI_LINUX && __GLIBC_PREREQ(2, 33)) #define SANITIZER_INTERCEPT_STAT \ diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_freebsd.h b/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_freebsd.h index b119f059007d..43b8a38f39be 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_freebsd.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_freebsd.h @@ -726,6 +726,8 @@ struct __sanitizer_cpuset { typedef struct __sanitizer_cpuset __sanitizer_cpuset_t; extern unsigned struct_cpuset_sz; + +typedef unsigned long long __sanitizer_eventfd_t; } // namespace __sanitizer # define CHECK_TYPE_SIZE(TYPE) \ diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.h b/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.h index 58244c9944a0..34bfef1f7ef4 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.h @@ -523,6 +523,7 @@ typedef long __sanitizer_clock_t; #if SANITIZER_LINUX typedef int __sanitizer_clockid_t; +typedef unsigned long long __sanitizer_eventfd_t; #endif #if SANITIZER_LINUX diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_procmaps_bsd.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_procmaps_bsd.cpp index 36a82c4ac966..dcfd94fe3225 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_procmaps_bsd.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_procmaps_bsd.cpp @@ -13,9 +13,6 @@ #include "sanitizer_platform.h" #if SANITIZER_FREEBSD || SANITIZER_NETBSD #include "sanitizer_common.h" -#if SANITIZER_FREEBSD -#include "sanitizer_freebsd.h" -#endif #include "sanitizer_procmaps.h" // clang-format off @@ -29,14 +26,6 @@ #include <limits.h> -// Fix 'kinfo_vmentry' definition on FreeBSD prior v9.2 in 32-bit mode. -#if SANITIZER_FREEBSD && (SANITIZER_WORDSIZE == 32) -#include <osreldate.h> -#if __FreeBSD_version <= 902001 // v9.2 -#define kinfo_vmentry xkinfo_vmentry -#endif -#endif - namespace __sanitizer { #if SANITIZER_FREEBSD diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_stackdepotbase.h b/compiler-rt/lib/sanitizer_common/sanitizer_stackdepotbase.h index 21d57d9ab2a9..279bc5de3bb9 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_stackdepotbase.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_stackdepotbase.h @@ -161,18 +161,32 @@ StackDepotBase<Node, kReservedBits, kTabSizeLog>::Get(u32 id) { template <class Node, int kReservedBits, int kTabSizeLog> void StackDepotBase<Node, kReservedBits, kTabSizeLog>::LockBeforeFork() { - for (int i = 0; i < kTabSize; ++i) { - lock(&tab[i]); - } + // Do not lock hash table. It's very expensive, but it's not rely needed. The + // parent process will neither lock nor unlock. Child process risks to be + // deadlocked on already locked buckets. To avoid deadlock we will unlock + // every locked buckets in `UnlockAfterFork`. This may affect consistency of + // the hash table, but the only issue is a few items inserted by parent + // process will be not found by child, and the child may insert them again, + // wasting some space in `stackStore`. + + // We still need to lock nodes. + nodes.Lock(); } template <class Node, int kReservedBits, int kTabSizeLog> void StackDepotBase<Node, kReservedBits, kTabSizeLog>::UnlockAfterFork( bool fork_child) { + nodes.Unlock(); + + // Only unlock in child process to avoid deadlock. See `LockBeforeFork`. + if (!fork_child) + return; + for (int i = 0; i < kTabSize; ++i) { atomic_uint32_t *p = &tab[i]; uptr s = atomic_load(p, memory_order_relaxed); - unlock(p, s & kUnlockMask); + if (s & kLockMask) + unlock(p, s & kUnlockMask); } } diff --git a/compiler-rt/lib/tsan/rtl/tsan_platform.h b/compiler-rt/lib/tsan/rtl/tsan_platform.h index 70b9ae09a990..84ff4bfade09 100644 --- a/compiler-rt/lib/tsan/rtl/tsan_platform.h +++ b/compiler-rt/lib/tsan/rtl/tsan_platform.h @@ -622,6 +622,35 @@ struct MappingGoAarch64 { static const uptr kShadowAdd = 0x200000000000ull; }; +/* Go on linux/loongarch64 (47-bit VMA) +0000 0000 1000 - 0000 1000 0000: executable +0000 1000 0000 - 00c0 0000 0000: - +00c0 0000 0000 - 00e0 0000 0000: heap +00e0 0000 0000 - 2000 0000 0000: - +2000 0000 0000 - 2800 0000 0000: shadow +2800 0000 0000 - 3000 0000 0000: - +3000 0000 0000 - 3200 0000 0000: metainfo (memory blocks and sync objects) +3200 0000 0000 - 8000 0000 0000: - +*/ +struct MappingGoLoongArch64_47 { + static const uptr kMetaShadowBeg = 0x300000000000ull; + static const uptr kMetaShadowEnd = 0x320000000000ull; + static const uptr kShadowBeg = 0x200000000000ull; + static const uptr kShadowEnd = 0x280000000000ull; + static const uptr kLoAppMemBeg = 0x000000001000ull; + static const uptr kLoAppMemEnd = 0x00e000000000ull; + static const uptr kMidAppMemBeg = 0; + static const uptr kMidAppMemEnd = 0; + static const uptr kHiAppMemBeg = 0; + static const uptr kHiAppMemEnd = 0; + static const uptr kHeapMemBeg = 0; + static const uptr kHeapMemEnd = 0; + static const uptr kVdsoBeg = 0; + static const uptr kShadowMsk = 0; + static const uptr kShadowXor = 0; + static const uptr kShadowAdd = 0x200000000000ull; +}; + /* Go on linux/mips64 (47-bit VMA) 0000 0000 1000 - 0000 1000 0000: executable @@ -697,6 +726,8 @@ ALWAYS_INLINE auto SelectMapping(Arg arg) { return Func::template Apply<MappingGoS390x>(arg); # elif defined(__aarch64__) return Func::template Apply<MappingGoAarch64>(arg); +# elif defined(__loongarch_lp64) + return Func::template Apply<MappingGoLoongArch64_47>(arg); # elif SANITIZER_WINDOWS return Func::template Apply<MappingGoWindows>(arg); # else @@ -765,6 +796,7 @@ void ForEachMapping() { Func::template Apply<MappingGoPPC64_46>(); Func::template Apply<MappingGoPPC64_47>(); Func::template Apply<MappingGoAarch64>(); + Func::template Apply<MappingGoLoongArch64_47>(); Func::template Apply<MappingGoMips64_47>(); Func::template Apply<MappingGoS390x>(); } diff --git a/compiler-rt/lib/tsan/rtl/tsan_platform_linux.cpp b/compiler-rt/lib/tsan/rtl/tsan_platform_linux.cpp index 369509ed0a60..b45adea45b27 100644 --- a/compiler-rt/lib/tsan/rtl/tsan_platform_linux.cpp +++ b/compiler-rt/lib/tsan/rtl/tsan_platform_linux.cpp @@ -238,7 +238,13 @@ void InitializePlatformEarly() { Printf("FATAL: Found %zd - Supported 47\n", vmaSize); Die(); } -# endif +# else + if (vmaSize != 47) { + Printf("FATAL: ThreadSanitizer: unsupported VMA range\n"); + Printf("FATAL: Found %zd - Supported 47\n", vmaSize); + Die(); + } +# endif #elif defined(__powerpc64__) # if !SANITIZER_GO if (vmaSize != 44 && vmaSize != 46 && vmaSize != 47) { diff --git a/libcxx/include/__config b/libcxx/include/__config index adff13e714cb..40e6da8bc03a 100644 --- a/libcxx/include/__config +++ b/libcxx/include/__config @@ -200,6 +200,16 @@ # define _LIBCPP_ABI_BAD_FUNCTION_CALL_KEY_FUNCTION # endif +// We had some bugs where we use [[no_unique_address]] together with construct_at, +// which causes UB as the call on construct_at could write to overlapping subobjects +// +// https://github.com/llvm/llvm-project/issues/70506 +// https://github.com/llvm/llvm-project/issues/70494 +// +// To fix the bug we had to change the ABI of some classes to remove [[no_unique_address]] under certain conditions. +// The below macro is used for all classes whose ABI have changed as part of fixing these bugs. +# define _LIBCPP_ABI_2023_OVERLAPPING_SUBOBJECT_FIX_TAG __attribute__((__abi_tag__("subobj_fix_2023"))) + // Changes the iterator type of select containers (see below) to a bounded iterator that keeps track of whether it's // within the bounds of the original container and asserts it on every dereference. // diff --git a/libcxx/include/__memory/shared_ptr.h b/libcxx/include/__memory/shared_ptr.h index a868093026c5..9aa938b22031 100644 --- a/libcxx/include/__memory/shared_ptr.h +++ b/libcxx/include/__memory/shared_ptr.h @@ -723,7 +723,9 @@ public: _LIBCPP_HIDE_FROM_ABI long use_count() const _NOEXCEPT { return __cntrl_ ? __cntrl_->use_count() : 0; } - _LIBCPP_HIDE_FROM_ABI bool unique() const _NOEXCEPT { return use_count() == 1; } +#if _LIBCPP_STD_VER < 20 || defined(_LIBCPP_ENABLE_CXX20_REMOVED_SHARED_PTR_UNIQUE) + _LIBCPP_DEPRECATED_IN_CXX17 _LIBCPP_HIDE_FROM_ABI bool unique() const _NOEXCEPT { return use_count() == 1; } +#endif _LIBCPP_HIDE_FROM_ABI explicit operator bool() const _NOEXCEPT { return get() != nullptr; } diff --git a/libcxx/include/__memory/unique_ptr.h b/libcxx/include/__memory/unique_ptr.h index 7bf5e3c5e4e6..db473eaa50a6 100644 --- a/libcxx/include/__memory/unique_ptr.h +++ b/libcxx/include/__memory/unique_ptr.h @@ -132,10 +132,6 @@ public: private: __compressed_pair<pointer, deleter_type> __ptr_; - struct __nat { - int __for_bool_; - }; - typedef _LIBCPP_NODEBUG __unique_ptr_deleter_sfinae<_Dp> _DeleterSFINAE; template <bool _Dummy> diff --git a/libcxx/include/__ranges/chunk_by_view.h b/libcxx/include/__ranges/chunk_by_view.h index e46998687646..3ecc018cac9d 100644 --- a/libcxx/include/__ranges/chunk_by_view.h +++ b/libcxx/include/__ranges/chunk_by_view.h @@ -54,7 +54,8 @@ namespace ranges { template <forward_range _View, indirect_binary_predicate<iterator_t<_View>, iterator_t<_View>> _Pred> requires view<_View> && is_object_v<_Pred> -class chunk_by_view : public view_interface<chunk_by_view<_View, _Pred>> { +class _LIBCPP_ABI_2023_OVERLAPPING_SUBOBJECT_FIX_TAG chunk_by_view + : public view_interface<chunk_by_view<_View, _Pred>> { _LIBCPP_NO_UNIQUE_ADDRESS _View __base_ = _View(); _LIBCPP_NO_UNIQUE_ADDRESS __movable_box<_Pred> __pred_; diff --git a/libcxx/include/__ranges/drop_view.h b/libcxx/include/__ranges/drop_view.h index 2b89c6877a78..83bb598b0a0c 100644 --- a/libcxx/include/__ranges/drop_view.h +++ b/libcxx/include/__ranges/drop_view.h @@ -90,6 +90,10 @@ public: _LIBCPP_HIDE_FROM_ABI constexpr auto begin() requires(!(__simple_view<_View> && random_access_range<const _View> && sized_range<const _View>)) { + if constexpr (random_access_range<_View> && sized_range<_View>) { + const auto __dist = std::min(ranges::distance(__base_), __count_); + return ranges::begin(__base_) + __dist; + } if constexpr (_UseCache) if (__cached_begin_.__has_value()) return *__cached_begin_; @@ -103,7 +107,8 @@ public: _LIBCPP_HIDE_FROM_ABI constexpr auto begin() const requires random_access_range<const _View> && sized_range<const _View> { - return ranges::next(ranges::begin(__base_), __count_, ranges::end(__base_)); + const auto __dist = std::min(ranges::distance(__base_), __count_); + return ranges::begin(__base_) + __dist; } _LIBCPP_HIDE_FROM_ABI constexpr auto end() diff --git a/libcxx/include/__ranges/drop_while_view.h b/libcxx/include/__ranges/drop_while_view.h index 677b5bc66d44..eb3783eb42f1 100644 --- a/libcxx/include/__ranges/drop_while_view.h +++ b/libcxx/include/__ranges/drop_while_view.h @@ -45,7 +45,8 @@ namespace ranges { template <view _View, class _Pred> requires input_range<_View> && is_object_v<_Pred> && indirect_unary_predicate<const _Pred, iterator_t<_View>> -class drop_while_view : public view_interface<drop_while_view<_View, _Pred>> { +class _LIBCPP_ABI_2023_OVERLAPPING_SUBOBJECT_FIX_TAG drop_while_view + : public view_interface<drop_while_view<_View, _Pred>> { public: _LIBCPP_HIDE_FROM_ABI drop_while_view() requires default_initializable<_View> && default_initializable<_Pred> diff --git a/libcxx/include/__ranges/filter_view.h b/libcxx/include/__ranges/filter_view.h index 08d50ab01104..868ad128e894 100644 --- a/libcxx/include/__ranges/filter_view.h +++ b/libcxx/include/__ranges/filter_view.h @@ -51,7 +51,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD namespace ranges { template <input_range _View, indirect_unary_predicate<iterator_t<_View>> _Pred> requires view<_View> && is_object_v<_Pred> -class filter_view : public view_interface<filter_view<_View, _Pred>> { +class _LIBCPP_ABI_2023_OVERLAPPING_SUBOBJECT_FIX_TAG filter_view : public view_interface<filter_view<_View, _Pred>> { _LIBCPP_NO_UNIQUE_ADDRESS _View __base_ = _View(); _LIBCPP_NO_UNIQUE_ADDRESS __movable_box<_Pred> __pred_; diff --git a/libcxx/include/__ranges/movable_box.h b/libcxx/include/__ranges/movable_box.h index 6615533d3743..9b38877494ea 100644 --- a/libcxx/include/__ranges/movable_box.h +++ b/libcxx/include/__ranges/movable_box.h @@ -134,6 +134,20 @@ concept __doesnt_need_empty_state = // 2. Otherwise, movable-box<T> should store only a T if either T models movable or // is_nothrow_move_constructible_v<T> is true. : movable<_Tp> || is_nothrow_move_constructible_v<_Tp>); + +// When _Tp doesn't have an assignment operator, we must implement __movable_box's assignment operator +// by doing destroy_at followed by construct_at. However, that implementation strategy leads to UB if the nested +// _Tp is potentially overlapping, as it is doing a non-transparent replacement of the sub-object, which means that +// we're not considered "nested" inside the movable-box anymore, and since we're not nested within it, [basic.life]/1.5 +// says that we essentially just reused the storage of the movable-box for a completely unrelated object and ended the +// movable-box's lifetime. +// https://github.com/llvm/llvm-project/issues/70494#issuecomment-1845646490 +// +// Hence, when the _Tp doesn't have an assignment operator, we can't risk making it a potentially-overlapping +// subobject because of the above, and we don't use [[no_unique_address]] in that case. +template <class _Tp> +concept __can_use_no_unique_address = (copy_constructible<_Tp> ? copyable<_Tp> : movable<_Tp>); + # else template <class _Tp> @@ -144,23 +158,45 @@ concept __doesnt_need_empty_state_for_move = movable<_Tp> || is_nothrow_move_con template <class _Tp> concept __doesnt_need_empty_state = __doesnt_need_empty_state_for_copy<_Tp> && __doesnt_need_empty_state_for_move<_Tp>; + +template <class _Tp> +concept __can_use_no_unique_address = copyable<_Tp>; # endif +template <class _Tp> +struct __movable_box_holder { + _Tp __val_; + + template <class... _Args> + _LIBCPP_HIDE_FROM_ABI constexpr explicit __movable_box_holder(in_place_t, _Args&&... __args) + : __val_(std::forward<_Args>(__args)...) {} +}; + +template <class _Tp> + requires __can_use_no_unique_address<_Tp> +struct __movable_box_holder<_Tp> { + _LIBCPP_NO_UNIQUE_ADDRESS _Tp __val_; + + template <class... _Args> + _LIBCPP_HIDE_FROM_ABI constexpr explicit __movable_box_holder(in_place_t, _Args&&... __args) + : __val_(std::forward<_Args>(__args)...) {} +}; + template <__movable_box_object _Tp> requires __doesnt_need_empty_state<_Tp> class __movable_box<_Tp> { - _LIBCPP_NO_UNIQUE_ADDRESS _Tp __val_; + _LIBCPP_NO_UNIQUE_ADDRESS __movable_box_holder<_Tp> __holder_; public: template <class... _Args> requires is_constructible_v<_Tp, _Args...> - _LIBCPP_HIDE_FROM_ABI constexpr explicit __movable_box(in_place_t, _Args&&... __args) noexcept( + _LIBCPP_HIDE_FROM_ABI constexpr explicit __movable_box(in_place_t __inplace, _Args&&... __args) noexcept( is_nothrow_constructible_v<_Tp, _Args...>) - : __val_(std::forward<_Args>(__args)...) {} + : __holder_(__inplace, std::forward<_Args>(__args)...) {} _LIBCPP_HIDE_FROM_ABI constexpr __movable_box() noexcept(is_nothrow_default_constructible_v<_Tp>) requires default_initializable<_Tp> - : __val_() {} + : __holder_(in_place_t{}) {} _LIBCPP_HIDE_FROM_ABI __movable_box(__movable_box const&) = default; _LIBCPP_HIDE_FROM_ABI __movable_box(__movable_box&&) = default; @@ -176,27 +212,29 @@ public: // Implementation of assignment operators in case we perform optimization (2) _LIBCPP_HIDE_FROM_ABI constexpr __movable_box& operator=(__movable_box const& __other) noexcept { static_assert(is_nothrow_copy_constructible_v<_Tp>); + static_assert(!__can_use_no_unique_address<_Tp>); if (this != std::addressof(__other)) { - std::destroy_at(std::addressof(__val_)); - std::construct_at(std::addressof(__val_), __other.__val_); + std::destroy_at(std::addressof(__holder_.__val_)); + std::construct_at(std::addressof(__holder_.__val_), __other.__holder_.__val_); } return *this; } _LIBCPP_HIDE_FROM_ABI constexpr __movable_box& operator=(__movable_box&& __other) noexcept { static_assert(is_nothrow_move_constructible_v<_Tp>); + static_assert(!__can_use_no_unique_address<_Tp>); if (this != std::addressof(__other)) { - std::destroy_at(std::addressof(__val_)); - std::construct_at(std::addressof(__val_), std::move(__other.__val_)); + std::destroy_at(std::addressof(__holder_.__val_)); + std::construct_at(std::addressof(__holder_.__val_), std::move(__other.__holder_.__val_)); } return *this; } - _LIBCPP_HIDE_FROM_ABI constexpr _Tp const& operator*() const noexcept { return __val_; } - _LIBCPP_HIDE_FROM_ABI constexpr _Tp& operator*() noexcept { return __val_; } + _LIBCPP_HIDE_FROM_ABI constexpr _Tp const& operator*() const noexcept { return __holder_.__val_; } + _LIBCPP_HIDE_FROM_ABI constexpr _Tp& operator*() noexcept { return __holder_.__val_; } - _LIBCPP_HIDE_FROM_ABI constexpr const _Tp* operator->() const noexcept { return std::addressof(__val_); } - _LIBCPP_HIDE_FROM_ABI constexpr _Tp* operator->() noexcept { return std::addressof(__val_); } + _LIBCPP_HIDE_FROM_ABI constexpr const _Tp* operator->() const noexcept { return std::addressof(__holder_.__val_); } + _LIBCPP_HIDE_FROM_ABI constexpr _Tp* operator->() noexcept { return std::addressof(__holder_.__val_); } _LIBCPP_HIDE_FROM_ABI constexpr bool __has_value() const noexcept { return true; } }; diff --git a/libcxx/include/__ranges/repeat_view.h b/libcxx/include/__ranges/repeat_view.h index 459a1e229613..479eca96acb0 100644 --- a/libcxx/include/__ranges/repeat_view.h +++ b/libcxx/include/__ranges/repeat_view.h @@ -68,7 +68,7 @@ struct __fn; template <move_constructible _Tp, semiregular _Bound = unreachable_sentinel_t> requires(is_object_v<_Tp> && same_as<_Tp, remove_cv_t<_Tp>> && (__integer_like_with_usable_difference_type<_Bound> || same_as<_Bound, unreachable_sentinel_t>)) -class repeat_view : public view_interface<repeat_view<_Tp, _Bound>> { +class _LIBCPP_ABI_2023_OVERLAPPING_SUBOBJECT_FIX_TAG repeat_view : public view_interface<repeat_view<_Tp, _Bound>> { friend struct views::__take::__fn; friend struct views::__drop::__fn; class __iterator; @@ -119,7 +119,7 @@ public: } private: - __movable_box<_Tp> __value_; + _LIBCPP_NO_UNIQUE_ADDRESS __movable_box<_Tp> __value_; _LIBCPP_NO_UNIQUE_ADDRESS _Bound __bound_ = _Bound(); }; diff --git a/libcxx/include/__ranges/single_view.h b/libcxx/include/__ranges/single_view.h index b0b2c1d9f3c0..0ae2036a66a9 100644 --- a/libcxx/include/__ranges/single_view.h +++ b/libcxx/include/__ranges/single_view.h @@ -37,8 +37,8 @@ template <move_constructible _Tp> template <copy_constructible _Tp> # endif requires is_object_v<_Tp> -class single_view : public view_interface<single_view<_Tp>> { - __movable_box<_Tp> __value_; +class _LIBCPP_ABI_2023_OVERLAPPING_SUBOBJECT_FIX_TAG single_view : public view_interface<single_view<_Tp>> { + _LIBCPP_NO_UNIQUE_ADDRESS __movable_box<_Tp> __value_; public: _LIBCPP_HIDE_FROM_ABI single_view() diff --git a/libcxx/include/__ranges/take_while_view.h b/libcxx/include/__ranges/take_while_view.h index a6f7f80ca76b..4534210d9794 100644 --- a/libcxx/include/__ranges/take_while_view.h +++ b/libcxx/include/__ranges/take_while_view.h @@ -43,7 +43,8 @@ namespace ranges { template <view _View, class _Pred> requires input_range<_View> && is_object_v<_Pred> && indirect_unary_predicate<const _Pred, iterator_t<_View>> -class take_while_view : public view_interface<take_while_view<_View, _Pred>> { +class _LIBCPP_ABI_2023_OVERLAPPING_SUBOBJECT_FIX_TAG take_while_view + : public view_interface<take_while_view<_View, _Pred>> { template <bool> class __sentinel; diff --git a/libcxx/include/__ranges/transform_view.h b/libcxx/include/__ranges/transform_view.h index 55c6ce587bd6..744f597ccef5 100644 --- a/libcxx/include/__ranges/transform_view.h +++ b/libcxx/include/__ranges/transform_view.h @@ -67,7 +67,8 @@ template <input_range _View, move_constructible _Fn> template <input_range _View, copy_constructible _Fn> # endif requires __transform_view_constraints<_View, _Fn> -class transform_view : public view_interface<transform_view<_View, _Fn>> { +class _LIBCPP_ABI_2023_OVERLAPPING_SUBOBJECT_FIX_TAG transform_view + : public view_interface<transform_view<_View, _Fn>> { template <bool> class __iterator; template <bool> diff --git a/libcxx/include/memory b/libcxx/include/memory index 71e812064646..ee245d5fd2dc 100644 --- a/libcxx/include/memory +++ b/libcxx/include/memory @@ -629,7 +629,7 @@ public: T& operator*() const noexcept; T* operator->() const noexcept; long use_count() const noexcept; - bool unique() const noexcept; + bool unique() const noexcept; // deprected in C++17, removed in C++20 explicit operator bool() const noexcept; template<class U> bool owner_before(shared_ptr<U> const& b) const noexcept; template<class U> bool owner_before(weak_ptr<U> const& b) const noexcept; diff --git a/lld/COFF/Driver.cpp b/lld/COFF/Driver.cpp index 4f11affb35ed..cd2985b035bc 100644 --- a/lld/COFF/Driver.cpp +++ b/lld/COFF/Driver.cpp @@ -1951,6 +1951,9 @@ void LinkerDriver::linkerMain(ArrayRef<const char *> argsArr) { parseMerge(".00cfg=.rdata"); parseMerge(".bss=.data"); + if (isArm64EC(config->machine)) + parseMerge(".wowthk=.text"); + if (config->mingw) { parseMerge(".ctors=.rdata"); parseMerge(".dtors=.rdata"); diff --git a/lld/COFF/Writer.cpp b/lld/COFF/Writer.cpp index 2982165530c0..2e34a6c5cfa2 100644 --- a/lld/COFF/Writer.cpp +++ b/lld/COFF/Writer.cpp @@ -560,7 +560,7 @@ void Writer::createECCodeMap() { codeMap.clear(); std::optional<chpe_range_type> lastType; - Chunk *first = nullptr, *last = nullptr; + Chunk *first, *last; auto closeRange = [&]() { if (lastType) { diff --git a/llvm/include/llvm/ADT/GenericUniformityImpl.h b/llvm/include/llvm/ADT/GenericUniformityImpl.h index b7d0a1228ebf..d397b937d78c 100644 --- a/llvm/include/llvm/ADT/GenericUniformityImpl.h +++ b/llvm/include/llvm/ADT/GenericUniformityImpl.h @@ -33,6 +33,12 @@ /// the propagation of the impact of divergent control flow on the divergence of /// values (sync dependencies). /// +/// NOTE: In general, no interface exists for a transform to update +/// (Machine)UniformityInfo. Additionally, (Machine)CycleAnalysis is a +/// transitive dependence, but it also does not provide an interface for +/// updating itself. Given that, transforms should not preserve uniformity in +/// their getAnalysisUsage() callback. +/// //===----------------------------------------------------------------------===// #ifndef LLVM_ADT_GENERICUNIFORMITYIMPL_H diff --git a/llvm/include/llvm/ADT/SmallString.h b/llvm/include/llvm/ADT/SmallString.h index 02fa28fc856d..a5b9eec50c82 100644 --- a/llvm/include/llvm/ADT/SmallString.h +++ b/llvm/include/llvm/ADT/SmallString.h @@ -89,30 +89,26 @@ public: /// Check for string equality. This is more efficient than compare() when /// the relative ordering of inequal strings isn't needed. - bool equals(StringRef RHS) const { - return str().equals(RHS); - } + [[nodiscard]] bool equals(StringRef RHS) const { return str().equals(RHS); } /// Check for string equality, ignoring case. - bool equals_insensitive(StringRef RHS) const { + [[nodiscard]] bool equals_insensitive(StringRef RHS) const { return str().equals_insensitive(RHS); } /// compare - Compare two strings; the result is negative, zero, or positive /// if this string is lexicographically less than, equal to, or greater than /// the \p RHS. - int compare(StringRef RHS) const { - return str().compare(RHS); - } + [[nodiscard]] int compare(StringRef RHS) const { return str().compare(RHS); } /// compare_insensitive - Compare two strings, ignoring case. - int compare_insensitive(StringRef RHS) const { + [[nodiscard]] int compare_insensitive(StringRef RHS) const { return str().compare_insensitive(RHS); } /// compare_numeric - Compare two strings, treating sequences of digits as /// numbers. - int compare_numeric(StringRef RHS) const { + [[nodiscard]] int compare_numeric(StringRef RHS) const { return str().compare_numeric(RHS); } @@ -121,10 +117,14 @@ public: /// @{ /// starts_with - Check if this string starts with the given \p Prefix. - bool starts_with(StringRef Prefix) const { return str().starts_with(Prefix); } + [[nodiscard]] bool starts_with(StringRef Prefix) const { + return str().starts_with(Prefix); + } /// ends_with - Check if this string ends with the given \p Suffix. - bool ends_with(StringRef Suffix) const { return str().ends_with(Suffix); } + [[nodiscard]] bool ends_with(StringRef Suffix) const { + return str().ends_with(Suffix); + } /// @} /// @name String Searching @@ -134,7 +134,7 @@ public: /// /// \return - The index of the first occurrence of \p C, or npos if not /// found. - size_t find(char C, size_t From = 0) const { + [[nodiscard]] size_t find(char C, size_t From = 0) const { return str().find(C, From); } @@ -142,7 +142,7 @@ public: /// /// \returns The index of the first occurrence of \p Str, or npos if not /// found. - size_t find(StringRef Str, size_t From = 0) const { + [[nodiscard]] size_t find(StringRef Str, size_t From = 0) const { return str().find(Str, From); } @@ -150,7 +150,7 @@ public: /// /// \returns The index of the last occurrence of \p C, or npos if not /// found. - size_t rfind(char C, size_t From = StringRef::npos) const { + [[nodiscard]] size_t rfind(char C, size_t From = StringRef::npos) const { return str().rfind(C, From); } @@ -158,13 +158,11 @@ public: /// /// \returns The index of the last occurrence of \p Str, or npos if not /// found. - size_t rfind(StringRef Str) const { - return str().rfind(Str); - } + [[nodiscard]] size_t rfind(StringRef Str) const { return str().rfind(Str); } /// Find the first character in the string that is \p C, or npos if not /// found. Same as find. - size_t find_first_of(char C, size_t From = 0) const { + [[nodiscard]] size_t find_first_of(char C, size_t From = 0) const { return str().find_first_of(C, From); } @@ -172,13 +170,13 @@ public: /// not found. /// /// Complexity: O(size() + Chars.size()) - size_t find_first_of(StringRef Chars, size_t From = 0) const { + [[nodiscard]] size_t find_first_of(StringRef Chars, size_t From = 0) const { return str().find_first_of(Chars, From); } /// Find the first character in the string that is not \p C or npos if not /// found. - size_t find_first_not_of(char C, size_t From = 0) const { + [[nodiscard]] size_t find_first_not_of(char C, size_t From = 0) const { return str().find_first_not_of(C, From); } @@ -186,13 +184,15 @@ public: /// \p Chars, or npos if not found. /// /// Complexity: O(size() + Chars.size()) - size_t find_first_not_of(StringRef Chars, size_t From = 0) const { + [[nodiscard]] size_t find_first_not_of(StringRef Chars, + size_t From = 0) const { return str().find_first_not_of(Chars, From); } /// Find the last character in the string that is \p C, or npos if not /// found. - size_t find_last_of(char C, size_t From = StringRef::npos) const { + [[nodiscard]] size_t find_last_of(char C, + size_t From = StringRef::npos) const { return str().find_last_of(C, From); } @@ -200,8 +200,8 @@ public: /// found. /// /// Complexity: O(size() + Chars.size()) - size_t find_last_of( - StringRef Chars, size_t From = StringRef::npos) const { + [[nodiscard]] size_t find_last_of(StringRef Chars, + size_t From = StringRef::npos) const { return str().find_last_of(Chars, From); } @@ -210,15 +210,11 @@ public: /// @{ /// Return the number of occurrences of \p C in the string. - size_t count(char C) const { - return str().count(C); - } + [[nodiscard]] size_t count(char C) const { return str().count(C); } /// Return the number of non-overlapped occurrences of \p Str in the /// string. - size_t count(StringRef Str) const { - return str().count(Str); - } + [[nodiscard]] size_t count(StringRef Str) const { return str().count(Str); } /// @} /// @name Substring Operations @@ -233,7 +229,8 @@ public: /// \param N The number of characters to included in the substring. If \p N /// exceeds the number of characters remaining in the string, the string /// suffix (starting with \p Start) will be returned. - StringRef substr(size_t Start, size_t N = StringRef::npos) const { + [[nodiscard]] StringRef substr(size_t Start, + size_t N = StringRef::npos) const { return str().substr(Start, N); } @@ -247,14 +244,16 @@ public: /// substring. If this is npos, or less than \p Start, or exceeds the /// number of characters remaining in the string, the string suffix /// (starting with \p Start) will be returned. - StringRef slice(size_t Start, size_t End) const { + [[nodiscard]] StringRef slice(size_t Start, size_t End) const { return str().slice(Start, End); } // Extra methods. /// Explicit conversion to StringRef. - StringRef str() const { return StringRef(this->data(), this->size()); } + [[nodiscard]] StringRef str() const { + return StringRef(this->data(), this->size()); + } // TODO: Make this const, if it's safe... const char* c_str() { diff --git a/llvm/include/llvm/Analysis/ConstraintSystem.h b/llvm/include/llvm/Analysis/ConstraintSystem.h index 5d3bc64bf8b4..7b02b618f7cb 100644 --- a/llvm/include/llvm/Analysis/ConstraintSystem.h +++ b/llvm/include/llvm/Analysis/ConstraintSystem.h @@ -54,9 +54,6 @@ class ConstraintSystem { /// constraint system. DenseMap<Value *, unsigned> Value2Index; - /// Current greatest common divisor for all coefficients in the system. - uint32_t GCD = 1; - // Eliminate constraints from the system using Fourier–Motzkin elimination. bool eliminateUsingFM(); @@ -88,10 +85,6 @@ public: for (const auto &[Idx, C] : enumerate(R)) { if (C == 0) continue; - auto A = std::abs(C); - GCD = APIntOps::GreatestCommonDivisor({32, (uint32_t)A}, {32, GCD}) - .getZExtValue(); - NewRow.emplace_back(C, Idx); } if (Constraints.empty()) diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h index 735be3680aea..048912beaba5 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfo.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -1243,6 +1243,18 @@ public: ArrayRef<const Value *> Args = ArrayRef<const Value *>(), const Instruction *CxtI = nullptr) const; + /// Returns the cost estimation for alternating opcode pattern that can be + /// lowered to a single instruction on the target. In X86 this is for the + /// addsub instruction which corrsponds to a Shuffle + Fadd + FSub pattern in + /// IR. This function expects two opcodes: \p Opcode1 and \p Opcode2 being + /// selected by \p OpcodeMask. The mask contains one bit per lane and is a `0` + /// when \p Opcode0 is selected and `1` when Opcode1 is selected. + /// \p VecTy is the vector type of the instruction to be generated. + InstructionCost getAltInstrCost( + VectorType *VecTy, unsigned Opcode0, unsigned Opcode1, + const SmallBitVector &OpcodeMask, + TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const; + /// \return The cost of a shuffle instruction of kind Kind and of type Tp. /// The exact mask may be passed as Mask, or else the array will be empty. /// The index and subtype parameters are used by the subvector insertion and @@ -1944,6 +1956,10 @@ public: unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, OperandValueInfo Opd1Info, OperandValueInfo Opd2Info, ArrayRef<const Value *> Args, const Instruction *CxtI = nullptr) = 0; + virtual InstructionCost getAltInstrCost( + VectorType *VecTy, unsigned Opcode0, unsigned Opcode1, + const SmallBitVector &OpcodeMask, + TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const = 0; virtual InstructionCost getShuffleCost(ShuffleKind Kind, VectorType *Tp, ArrayRef<int> Mask, @@ -2555,6 +2571,12 @@ public: return Impl.getArithmeticInstrCost(Opcode, Ty, CostKind, Opd1Info, Opd2Info, Args, CxtI); } + InstructionCost getAltInstrCost(VectorType *VecTy, unsigned Opcode0, + unsigned Opcode1, + const SmallBitVector &OpcodeMask, + TTI::TargetCostKind CostKind) const override { + return Impl.getAltInstrCost(VecTy, Opcode0, Opcode1, OpcodeMask, CostKind); + } InstructionCost getShuffleCost(ShuffleKind Kind, VectorType *Tp, ArrayRef<int> Mask, diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h index 1d8f523e9792..7ad3ce512a35 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -554,6 +554,13 @@ public: return 1; } + InstructionCost getAltInstrCost(VectorType *VecTy, unsigned Opcode0, + unsigned Opcode1, + const SmallBitVector &OpcodeMask, + TTI::TargetCostKind CostKind) const { + return InstructionCost::getInvalid(); + } + InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Ty, ArrayRef<int> Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h index e7debc652a0a..dcc1a4580b14 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h @@ -769,9 +769,6 @@ public: bool matchCombineFSubFpExtFNegFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo); - /// Fold boolean selects to logical operations. - bool matchSelectToLogical(MachineInstr &MI, BuildFnTy &MatchInfo); - bool matchCombineFMinMaxNaN(MachineInstr &MI, unsigned &Info); /// Transform G_ADD(x, G_SUB(y, x)) to y. @@ -814,6 +811,9 @@ public: // Given a binop \p MI, commute operands 1 and 2. void applyCommuteBinOpOperands(MachineInstr &MI); + /// Combine selects. + bool matchSelect(MachineInstr &MI, BuildFnTy &MatchInfo); + private: /// Checks for legality of an indexed variant of \p LdSt. bool isIndexedLoadStoreLegal(GLoadStore &LdSt) const; @@ -904,6 +904,18 @@ private: /// select (fcmp uge x, 1.0) 1.0, x -> fminnm x, 1.0 bool matchFPSelectToMinMax(Register Dst, Register Cond, Register TrueVal, Register FalseVal, BuildFnTy &MatchInfo); + + /// Try to fold selects to logical operations. + bool tryFoldBoolSelectToLogic(GSelect *Select, BuildFnTy &MatchInfo); + + bool tryFoldSelectOfConstants(GSelect *Select, BuildFnTy &MatchInfo); + + bool isOneOrOneSplat(Register Src, bool AllowUndefs); + bool isZeroOrZeroSplat(Register Src, bool AllowUndefs); + bool isConstantSplatVector(Register Src, int64_t SplatValue, + bool AllowUndefs); + + std::optional<APInt> getConstantOrConstantSplatVector(Register Src); }; } // namespace llvm diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h index abbef03d02cb..669104307fa0 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h @@ -2562,6 +2562,13 @@ public: AtomicOpValue &V, AtomicOpValue &R, Value *E, Value *D, AtomicOrdering AO, omp::OMPAtomicCompareOp Op, bool IsXBinopExpr, bool IsPostfixUpdate, bool IsFailOnly); + InsertPointTy createAtomicCompare(const LocationDescription &Loc, + AtomicOpValue &X, AtomicOpValue &V, + AtomicOpValue &R, Value *E, Value *D, + AtomicOrdering AO, + omp::OMPAtomicCompareOp Op, + bool IsXBinopExpr, bool IsPostfixUpdate, + bool IsFailOnly, AtomicOrdering Failure); /// Create the control flow structure of a canonical OpenMP loop. /// diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td index cb48f54b13a6..531b11123545 100644 --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -10,6 +10,8 @@ // //===----------------------------------------------------------------------===// +def global_ptr_ty : LLVMQualPointerType<1>; + class AMDGPUReadPreloadRegisterIntrinsic : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable]>; @@ -2353,10 +2355,10 @@ def int_amdgcn_s_get_waveid_in_workgroup : Intrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrHasSideEffects, IntrWillReturn, IntrNoCallback, IntrNoFree]>; -class AMDGPUAtomicRtn<LLVMType vt> : Intrinsic < +class AMDGPUAtomicRtn<LLVMType vt, LLVMType pt = llvm_anyptr_ty> : Intrinsic < [vt], - [llvm_anyptr_ty, // vaddr - vt], // vdata(VGPR) + [pt, // vaddr + vt], // vdata(VGPR) [IntrArgMemOnly, IntrWillReturn, NoCapture<ArgIndex<0>>, IntrNoCallback, IntrNoFree], "", [SDNPMemOperand]>; @@ -2486,6 +2488,8 @@ def int_amdgcn_permlanex16_var : ClangBuiltin<"__builtin_amdgcn_permlanex16_var" [IntrNoMem, IntrConvergent, IntrWillReturn, ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, IntrNoCallback, IntrNoFree]>; +def int_amdgcn_global_atomic_ordered_add_b64 : AMDGPUAtomicRtn<llvm_i64_ty, global_ptr_ty>; + def int_amdgcn_flat_atomic_fmin_num : AMDGPUAtomicRtn<llvm_anyfloat_ty>; def int_amdgcn_flat_atomic_fmax_num : AMDGPUAtomicRtn<llvm_anyfloat_ty>; def int_amdgcn_global_atomic_fmin_num : AMDGPUAtomicRtn<llvm_anyfloat_ty>; diff --git a/llvm/include/llvm/Support/AMDHSAKernelDescriptor.h b/llvm/include/llvm/Support/AMDHSAKernelDescriptor.h index 2de2cf4185d8..84cac3ef700e 100644 --- a/llvm/include/llvm/Support/AMDHSAKernelDescriptor.h +++ b/llvm/include/llvm/Support/AMDHSAKernelDescriptor.h @@ -127,12 +127,20 @@ enum : int32_t { #undef COMPUTE_PGM_RSRC1 // Compute program resource register 2. Must match hardware definition. +// GFX6+. #define COMPUTE_PGM_RSRC2(NAME, SHIFT, WIDTH) \ AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC2_ ## NAME, SHIFT, WIDTH) +// [GFX6-GFX11]. +#define COMPUTE_PGM_RSRC2_GFX6_GFX11(NAME, SHIFT, WIDTH) \ + AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC2_GFX6_GFX11_##NAME, SHIFT, WIDTH) +// GFX12+. +#define COMPUTE_PGM_RSRC2_GFX12_PLUS(NAME, SHIFT, WIDTH) \ + AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC2_GFX12_PLUS_##NAME, SHIFT, WIDTH) enum : int32_t { COMPUTE_PGM_RSRC2(ENABLE_PRIVATE_SEGMENT, 0, 1), COMPUTE_PGM_RSRC2(USER_SGPR_COUNT, 1, 5), - COMPUTE_PGM_RSRC2(ENABLE_TRAP_HANDLER, 6, 1), + COMPUTE_PGM_RSRC2_GFX6_GFX11(ENABLE_TRAP_HANDLER, 6, 1), + COMPUTE_PGM_RSRC2_GFX12_PLUS(RESERVED1, 6, 1), COMPUTE_PGM_RSRC2(ENABLE_SGPR_WORKGROUP_ID_X, 7, 1), COMPUTE_PGM_RSRC2(ENABLE_SGPR_WORKGROUP_ID_Y, 8, 1), COMPUTE_PGM_RSRC2(ENABLE_SGPR_WORKGROUP_ID_Z, 9, 1), @@ -166,23 +174,37 @@ enum : int32_t { // Compute program resource register 3 for GFX10+. Must match hardware // definition. -// [GFX10]. -#define COMPUTE_PGM_RSRC3_GFX10(NAME, SHIFT, WIDTH) \ - AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC3_GFX10_ ## NAME, SHIFT, WIDTH) // GFX10+. #define COMPUTE_PGM_RSRC3_GFX10_PLUS(NAME, SHIFT, WIDTH) \ AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC3_GFX10_PLUS_ ## NAME, SHIFT, WIDTH) +// [GFX10]. +#define COMPUTE_PGM_RSRC3_GFX10(NAME, SHIFT, WIDTH) \ + AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC3_GFX10_##NAME, SHIFT, WIDTH) +// [GFX10-GFX11]. +#define COMPUTE_PGM_RSRC3_GFX10_GFX11(NAME, SHIFT, WIDTH) \ + AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC3_GFX10_GFX11_##NAME, SHIFT, WIDTH) // GFX11+. #define COMPUTE_PGM_RSRC3_GFX11_PLUS(NAME, SHIFT, WIDTH) \ AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC3_GFX11_PLUS_ ## NAME, SHIFT, WIDTH) +// [GFX11]. +#define COMPUTE_PGM_RSRC3_GFX11(NAME, SHIFT, WIDTH) \ + AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC3_GFX11_##NAME, SHIFT, WIDTH) +// GFX12+. +#define COMPUTE_PGM_RSRC3_GFX12_PLUS(NAME, SHIFT, WIDTH) \ + AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC3_GFX12_PLUS_##NAME, SHIFT, WIDTH) enum : int32_t { - COMPUTE_PGM_RSRC3_GFX10_PLUS(SHARED_VGPR_COUNT, 0, 4), - COMPUTE_PGM_RSRC3_GFX10(RESERVED0, 4, 8), - COMPUTE_PGM_RSRC3_GFX11_PLUS(INST_PREF_SIZE, 4, 6), - COMPUTE_PGM_RSRC3_GFX11_PLUS(TRAP_ON_START, 10, 1), - COMPUTE_PGM_RSRC3_GFX11_PLUS(TRAP_ON_END, 11, 1), - COMPUTE_PGM_RSRC3_GFX10_PLUS(RESERVED1, 12, 19), - COMPUTE_PGM_RSRC3_GFX10(RESERVED2, 31, 1), + COMPUTE_PGM_RSRC3_GFX10_GFX11(SHARED_VGPR_COUNT, 0, 4), + COMPUTE_PGM_RSRC3_GFX12_PLUS(RESERVED0, 0, 4), + COMPUTE_PGM_RSRC3_GFX10(RESERVED1, 4, 8), + COMPUTE_PGM_RSRC3_GFX11(INST_PREF_SIZE, 4, 6), + COMPUTE_PGM_RSRC3_GFX11(TRAP_ON_START, 10, 1), + COMPUTE_PGM_RSRC3_GFX11(TRAP_ON_END, 11, 1), + COMPUTE_PGM_RSRC3_GFX12_PLUS(INST_PREF_SIZE, 4, 8), + COMPUTE_PGM_RSRC3_GFX10_PLUS(RESERVED2, 12, 1), + COMPUTE_PGM_RSRC3_GFX10_GFX11(RESERVED3, 13, 1), + COMPUTE_PGM_RSRC3_GFX12_PLUS(GLG_EN, 13, 1), + COMPUTE_PGM_RSRC3_GFX10_PLUS(RESERVED4, 14, 17), + COMPUTE_PGM_RSRC3_GFX10(RESERVED5, 31, 1), COMPUTE_PGM_RSRC3_GFX11_PLUS(IMAGE_OP, 31, 1), }; #undef COMPUTE_PGM_RSRC3_GFX10_PLUS diff --git a/llvm/include/llvm/Support/X86DisassemblerDecoderCommon.h b/llvm/include/llvm/Support/X86DisassemblerDecoderCommon.h index b0683ac2e32c..3aceb247a26c 100644 --- a/llvm/include/llvm/Support/X86DisassemblerDecoderCommon.h +++ b/llvm/include/llvm/Support/X86DisassemblerDecoderCommon.h @@ -70,7 +70,8 @@ enum attributeBits { ATTR_EVEXKZ = 0x1 << 11, ATTR_EVEXB = 0x1 << 12, ATTR_REX2 = 0x1 << 13, - ATTR_max = 0x1 << 14, + ATTR_EVEXNF = 0x1 << 14, + ATTR_max = 0x1 << 15, }; // Combinations of the above attributes that are relevant to instruction @@ -137,12 +138,15 @@ enum attributeBits { ENUM_ENTRY(IC_VEX_L_W_XD, 5, "requires VEX, L, W and XD prefix") \ ENUM_ENTRY(IC_VEX_L_W_OPSIZE, 5, "requires VEX, L, W and OpSize") \ ENUM_ENTRY(IC_EVEX, 1, "requires an EVEX prefix") \ + ENUM_ENTRY(IC_EVEX_NF, 2, "requires EVEX and NF prefix") \ ENUM_ENTRY(IC_EVEX_XS, 2, "requires EVEX and the XS prefix") \ ENUM_ENTRY(IC_EVEX_XD, 2, "requires EVEX and the XD prefix") \ ENUM_ENTRY(IC_EVEX_OPSIZE, 2, "requires EVEX and the OpSize prefix") \ + ENUM_ENTRY(IC_EVEX_OPSIZE_NF, 3, "requires EVEX, NF and the OpSize prefix") \ ENUM_ENTRY(IC_EVEX_OPSIZE_ADSIZE, 3, \ "requires EVEX, OPSIZE and the ADSIZE prefix") \ ENUM_ENTRY(IC_EVEX_W, 3, "requires EVEX and the W prefix") \ + ENUM_ENTRY(IC_EVEX_W_NF, 4, "requires EVEX, W and NF prefix") \ ENUM_ENTRY(IC_EVEX_W_XS, 4, "requires EVEX, W, and XS prefix") \ ENUM_ENTRY(IC_EVEX_W_XD, 4, "requires EVEX, W, and XD prefix") \ ENUM_ENTRY(IC_EVEX_W_OPSIZE, 4, "requires EVEX, W, and OpSize") \ @@ -187,10 +191,13 @@ enum attributeBits { ENUM_ENTRY(IC_EVEX_L2_W_XD_K, 4, "requires EVEX_K, L2, W and XD prefix") \ ENUM_ENTRY(IC_EVEX_L2_W_OPSIZE_K, 4, "requires EVEX_K, L2, W and OpSize") \ ENUM_ENTRY(IC_EVEX_B, 1, "requires an EVEX_B prefix") \ + ENUM_ENTRY(IC_EVEX_B_NF, 2, "requires EVEX_NF and EVEX_B prefix") \ ENUM_ENTRY(IC_EVEX_XS_B, 2, "requires EVEX_B and the XS prefix") \ ENUM_ENTRY(IC_EVEX_XD_B, 2, "requires EVEX_B and the XD prefix") \ ENUM_ENTRY(IC_EVEX_OPSIZE_B, 2, "requires EVEX_B and the OpSize prefix") \ + ENUM_ENTRY(IC_EVEX_OPSIZE_B_NF, 3, "requires EVEX_B, NF and Opsize prefix") \ ENUM_ENTRY(IC_EVEX_W_B, 3, "requires EVEX_B and the W prefix") \ + ENUM_ENTRY(IC_EVEX_W_B_NF, 4, "requires EVEX_NF, EVEX_B and the W prefix") \ ENUM_ENTRY(IC_EVEX_W_XS_B, 4, "requires EVEX_B, W, and XS prefix") \ ENUM_ENTRY(IC_EVEX_W_XD_B, 4, "requires EVEX_B, W, and XD prefix") \ ENUM_ENTRY(IC_EVEX_W_OPSIZE_B, 4, "requires EVEX_B, W, and OpSize") \ diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td index 77db371adaf7..6bda80681432 100644 --- a/llvm/include/llvm/Target/GlobalISel/Combine.td +++ b/llvm/include/llvm/Target/GlobalISel/Combine.td @@ -437,13 +437,6 @@ def select_constant_cmp: GICombineRule< (apply [{ Helper.replaceSingleDefInstWithOperand(*${root}, ${matchinfo}); }]) >; -def select_to_logical : GICombineRule< - (defs root:$root, build_fn_matchinfo:$matchinfo), - (match (wip_match_opcode G_SELECT):$root, - [{ return Helper.matchSelectToLogical(*${root}, ${matchinfo}); }]), - (apply [{ Helper.applyBuildFn(*${root}, ${matchinfo}); }]) ->; - // Fold (C op x) -> (x op C) // TODO: handle more isCommutable opcodes // TODO: handle compares (currently not marked as isCommutable) @@ -1242,6 +1235,12 @@ def select_to_minmax: GICombineRule< [{ return Helper.matchSimplifySelectToMinMax(*${root}, ${info}); }]), (apply [{ Helper.applyBuildFn(*${root}, ${info}); }])>; +def match_selects : GICombineRule< + (defs root:$root, build_fn_matchinfo:$matchinfo), + (match (wip_match_opcode G_SELECT):$root, + [{ return Helper.matchSelect(*${root}, ${matchinfo}); }]), + (apply [{ Helper.applyBuildFn(*${root}, ${matchinfo}); }])>; + // FIXME: These should use the custom predicate feature once it lands. def undef_combines : GICombineGroup<[undef_to_fp_zero, undef_to_int_zero, undef_to_negative_one, @@ -1282,7 +1281,7 @@ def width_reduction_combines : GICombineGroup<[reduce_shl_of_extend, def phi_combines : GICombineGroup<[extend_through_phis]>; def select_combines : GICombineGroup<[select_undef_cmp, select_constant_cmp, - select_to_logical]>; + match_selects]>; def trivial_combines : GICombineGroup<[copy_prop, mul_to_shl, add_p2i_to_ptradd, mul_by_neg_one, idempotent_prop]>; diff --git a/llvm/lib/Analysis/ConstraintSystem.cpp b/llvm/lib/Analysis/ConstraintSystem.cpp index 8a802515b6f4..35bdd869a88d 100644 --- a/llvm/lib/Analysis/ConstraintSystem.cpp +++ b/llvm/lib/Analysis/ConstraintSystem.cpp @@ -29,7 +29,6 @@ bool ConstraintSystem::eliminateUsingFM() { assert(!Constraints.empty() && "should only be called for non-empty constraint systems"); - uint32_t NewGCD = 1; unsigned LastIdx = NumVariables - 1; // First, either remove the variable in place if it is 0 or add the row to @@ -96,24 +95,20 @@ bool ConstraintSystem::eliminateUsingFM() { IdxUpper++; } - if (MulOverflow(UpperV, ((-1) * LowerLast / GCD), M1)) + if (MulOverflow(UpperV, ((-1) * LowerLast), M1)) return false; if (IdxLower < LowerRow.size() && LowerRow[IdxLower].Id == CurrentId) { LowerV = LowerRow[IdxLower].Coefficient; IdxLower++; } - if (MulOverflow(LowerV, (UpperLast / GCD), M2)) + if (MulOverflow(LowerV, (UpperLast), M2)) return false; if (AddOverflow(M1, M2, N)) return false; if (N == 0) continue; NR.emplace_back(N, CurrentId); - - NewGCD = - APIntOps::GreatestCommonDivisor({32, (uint32_t)N}, {32, NewGCD}) - .getZExtValue(); } if (NR.empty()) continue; @@ -124,7 +119,6 @@ bool ConstraintSystem::eliminateUsingFM() { } } NumVariables -= 1; - GCD = NewGCD; return true; } diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp index 5beac5547d65..78a833476334 100644 --- a/llvm/lib/Analysis/InstructionSimplify.cpp +++ b/llvm/lib/Analysis/InstructionSimplify.cpp @@ -1189,14 +1189,26 @@ static Value *simplifyDiv(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1, if (Value *V = simplifyDivRem(Opcode, Op0, Op1, Q, MaxRecurse)) return V; - // If this is an exact divide by a constant, then the dividend (Op0) must have - // at least as many trailing zeros as the divisor to divide evenly. If it has - // less trailing zeros, then the result must be poison. const APInt *DivC; - if (IsExact && match(Op1, m_APInt(DivC)) && DivC->countr_zero()) { - KnownBits KnownOp0 = computeKnownBits(Op0, /* Depth */ 0, Q); - if (KnownOp0.countMaxTrailingZeros() < DivC->countr_zero()) - return PoisonValue::get(Op0->getType()); + if (IsExact && match(Op1, m_APInt(DivC))) { + // If this is an exact divide by a constant, then the dividend (Op0) must + // have at least as many trailing zeros as the divisor to divide evenly. If + // it has less trailing zeros, then the result must be poison. + if (DivC->countr_zero()) { + KnownBits KnownOp0 = computeKnownBits(Op0, /* Depth */ 0, Q); + if (KnownOp0.countMaxTrailingZeros() < DivC->countr_zero()) + return PoisonValue::get(Op0->getType()); + } + + // udiv exact (mul nsw X, C), C --> X + // sdiv exact (mul nuw X, C), C --> X + // where C is not a power of 2. + Value *X; + if (!DivC->isPowerOf2() && + (Opcode == Instruction::UDiv + ? match(Op0, m_NSWMul(m_Value(X), m_Specific(Op1))) + : match(Op0, m_NUWMul(m_Value(X), m_Specific(Op1))))) + return X; } return nullptr; @@ -4857,14 +4869,12 @@ static Value *simplifySelectInst(Value *Cond, Value *TrueVal, Value *FalseVal, // select ?, poison, X -> X // select ?, undef, X -> X if (isa<PoisonValue>(TrueVal) || - (Q.isUndefValue(TrueVal) && - isGuaranteedNotToBePoison(FalseVal, Q.AC, Q.CxtI, Q.DT))) + (Q.isUndefValue(TrueVal) && impliesPoison(FalseVal, Cond))) return FalseVal; // select ?, X, poison -> X // select ?, X, undef -> X if (isa<PoisonValue>(FalseVal) || - (Q.isUndefValue(FalseVal) && - isGuaranteedNotToBePoison(TrueVal, Q.AC, Q.CxtI, Q.DT))) + (Q.isUndefValue(FalseVal) && impliesPoison(TrueVal, Cond))) return TrueVal; // Deal with partial undef vector constants: select ?, VecC, VecC' --> VecC'' diff --git a/llvm/lib/Analysis/LazyValueInfo.cpp b/llvm/lib/Analysis/LazyValueInfo.cpp index 89cc7ea15ec1..360fc594ef7c 100644 --- a/llvm/lib/Analysis/LazyValueInfo.cpp +++ b/llvm/lib/Analysis/LazyValueInfo.cpp @@ -434,6 +434,28 @@ class LazyValueInfoImpl { void solve(); + // For the following methods, if UseBlockValue is true, the function may + // push additional values to the worklist and return nullopt. If + // UseBlockValue is false, it will never return nullopt. + + std::optional<ValueLatticeElement> + getValueFromSimpleICmpCondition(CmpInst::Predicate Pred, Value *RHS, + const APInt &Offset, Instruction *CxtI, + bool UseBlockValue); + + std::optional<ValueLatticeElement> + getValueFromICmpCondition(Value *Val, ICmpInst *ICI, bool isTrueDest, + bool UseBlockValue); + + std::optional<ValueLatticeElement> + getValueFromCondition(Value *Val, Value *Cond, bool IsTrueDest, + bool UseBlockValue, unsigned Depth = 0); + + std::optional<ValueLatticeElement> getEdgeValueLocal(Value *Val, + BasicBlock *BBFrom, + BasicBlock *BBTo, + bool UseBlockValue); + public: /// This is the query interface to determine the lattice value for the /// specified Value* at the context instruction (if specified) or at the @@ -755,14 +777,10 @@ LazyValueInfoImpl::solveBlockValuePHINode(PHINode *PN, BasicBlock *BB) { return Result; } -static ValueLatticeElement getValueFromCondition(Value *Val, Value *Cond, - bool isTrueDest = true, - unsigned Depth = 0); - // If we can determine a constraint on the value given conditions assumed by // the program, intersect those constraints with BBLV void LazyValueInfoImpl::intersectAssumeOrGuardBlockValueConstantRange( - Value *Val, ValueLatticeElement &BBLV, Instruction *BBI) { + Value *Val, ValueLatticeElement &BBLV, Instruction *BBI) { BBI = BBI ? BBI : dyn_cast<Instruction>(Val); if (!BBI) return; @@ -779,17 +797,21 @@ void LazyValueInfoImpl::intersectAssumeOrGuardBlockValueConstantRange( if (I->getParent() != BB || !isValidAssumeForContext(I, BBI)) continue; - BBLV = intersect(BBLV, getValueFromCondition(Val, I->getArgOperand(0))); + BBLV = intersect(BBLV, *getValueFromCondition(Val, I->getArgOperand(0), + /*IsTrueDest*/ true, + /*UseBlockValue*/ false)); } // If guards are not used in the module, don't spend time looking for them if (GuardDecl && !GuardDecl->use_empty() && BBI->getIterator() != BB->begin()) { - for (Instruction &I : make_range(std::next(BBI->getIterator().getReverse()), - BB->rend())) { + for (Instruction &I : + make_range(std::next(BBI->getIterator().getReverse()), BB->rend())) { Value *Cond = nullptr; if (match(&I, m_Intrinsic<Intrinsic::experimental_guard>(m_Value(Cond)))) - BBLV = intersect(BBLV, getValueFromCondition(Val, Cond)); + BBLV = intersect(BBLV, + *getValueFromCondition(Val, Cond, /*IsTrueDest*/ true, + /*UseBlockValue*/ false)); } } @@ -886,10 +908,14 @@ LazyValueInfoImpl::solveBlockValueSelect(SelectInst *SI, BasicBlock *BB) { // If the value is undef, a different value may be chosen in // the select condition. if (isGuaranteedNotToBeUndef(Cond, AC)) { - TrueVal = intersect(TrueVal, - getValueFromCondition(SI->getTrueValue(), Cond, true)); - FalseVal = intersect( - FalseVal, getValueFromCondition(SI->getFalseValue(), Cond, false)); + TrueVal = + intersect(TrueVal, *getValueFromCondition(SI->getTrueValue(), Cond, + /*IsTrueDest*/ true, + /*UseBlockValue*/ false)); + FalseVal = + intersect(FalseVal, *getValueFromCondition(SI->getFalseValue(), Cond, + /*IsTrueDest*/ false, + /*UseBlockValue*/ false)); } ValueLatticeElement Result = TrueVal; @@ -950,9 +976,11 @@ LazyValueInfoImpl::solveBlockValueBinaryOpImpl( // lets us pick up facts from expressions like "and i32 (call i32 // @foo()), 32" std::optional<ConstantRange> LHSRes = getRangeFor(I->getOperand(0), I, BB); + if (!LHSRes) + return std::nullopt; + std::optional<ConstantRange> RHSRes = getRangeFor(I->getOperand(1), I, BB); - if (!LHSRes || !RHSRes) - // More work to do before applying this transfer rule. + if (!RHSRes) return std::nullopt; const ConstantRange &LHSRange = *LHSRes; @@ -1068,15 +1096,26 @@ static bool matchICmpOperand(APInt &Offset, Value *LHS, Value *Val, } /// Get value range for a "(Val + Offset) Pred RHS" condition. -static ValueLatticeElement getValueFromSimpleICmpCondition( - CmpInst::Predicate Pred, Value *RHS, const APInt &Offset) { +std::optional<ValueLatticeElement> +LazyValueInfoImpl::getValueFromSimpleICmpCondition(CmpInst::Predicate Pred, + Value *RHS, + const APInt &Offset, + Instruction *CxtI, + bool UseBlockValue) { ConstantRange RHSRange(RHS->getType()->getIntegerBitWidth(), /*isFullSet=*/true); - if (ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) + if (ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) { RHSRange = ConstantRange(CI->getValue()); - else if (Instruction *I = dyn_cast<Instruction>(RHS)) + } else if (UseBlockValue) { + std::optional<ValueLatticeElement> R = + getBlockValue(RHS, CxtI->getParent(), CxtI); + if (!R) + return std::nullopt; + RHSRange = toConstantRange(*R, RHS->getType()); + } else if (Instruction *I = dyn_cast<Instruction>(RHS)) { if (auto *Ranges = I->getMetadata(LLVMContext::MD_range)) RHSRange = getConstantRangeFromMetadata(*Ranges); + } ConstantRange TrueValues = ConstantRange::makeAllowedICmpRegion(Pred, RHSRange); @@ -1103,8 +1142,8 @@ getRangeViaSLT(CmpInst::Predicate Pred, APInt RHS, return std::nullopt; } -static ValueLatticeElement getValueFromICmpCondition(Value *Val, ICmpInst *ICI, - bool isTrueDest) { +std::optional<ValueLatticeElement> LazyValueInfoImpl::getValueFromICmpCondition( + Value *Val, ICmpInst *ICI, bool isTrueDest, bool UseBlockValue) { Value *LHS = ICI->getOperand(0); Value *RHS = ICI->getOperand(1); @@ -1128,11 +1167,13 @@ static ValueLatticeElement getValueFromICmpCondition(Value *Val, ICmpInst *ICI, unsigned BitWidth = Ty->getScalarSizeInBits(); APInt Offset(BitWidth, 0); if (matchICmpOperand(Offset, LHS, Val, EdgePred)) - return getValueFromSimpleICmpCondition(EdgePred, RHS, Offset); + return getValueFromSimpleICmpCondition(EdgePred, RHS, Offset, ICI, + UseBlockValue); CmpInst::Predicate SwappedPred = CmpInst::getSwappedPredicate(EdgePred); if (matchICmpOperand(Offset, RHS, Val, SwappedPred)) - return getValueFromSimpleICmpCondition(SwappedPred, LHS, Offset); + return getValueFromSimpleICmpCondition(SwappedPred, LHS, Offset, ICI, + UseBlockValue); const APInt *Mask, *C; if (match(LHS, m_And(m_Specific(Val), m_APInt(Mask))) && @@ -1212,10 +1253,12 @@ static ValueLatticeElement getValueFromOverflowCondition( return ValueLatticeElement::getRange(NWR); } -static ValueLatticeElement getValueFromCondition( - Value *Val, Value *Cond, bool IsTrueDest, unsigned Depth) { +std::optional<ValueLatticeElement> +LazyValueInfoImpl::getValueFromCondition(Value *Val, Value *Cond, + bool IsTrueDest, bool UseBlockValue, + unsigned Depth) { if (ICmpInst *ICI = dyn_cast<ICmpInst>(Cond)) - return getValueFromICmpCondition(Val, ICI, IsTrueDest); + return getValueFromICmpCondition(Val, ICI, IsTrueDest, UseBlockValue); if (auto *EVI = dyn_cast<ExtractValueInst>(Cond)) if (auto *WO = dyn_cast<WithOverflowInst>(EVI->getAggregateOperand())) @@ -1227,7 +1270,7 @@ static ValueLatticeElement getValueFromCondition( Value *N; if (match(Cond, m_Not(m_Value(N)))) - return getValueFromCondition(Val, N, !IsTrueDest, Depth); + return getValueFromCondition(Val, N, !IsTrueDest, UseBlockValue, Depth); Value *L, *R; bool IsAnd; @@ -1238,19 +1281,25 @@ static ValueLatticeElement getValueFromCondition( else return ValueLatticeElement::getOverdefined(); - ValueLatticeElement LV = getValueFromCondition(Val, L, IsTrueDest, Depth); - ValueLatticeElement RV = getValueFromCondition(Val, R, IsTrueDest, Depth); + std::optional<ValueLatticeElement> LV = + getValueFromCondition(Val, L, IsTrueDest, UseBlockValue, Depth); + if (!LV) + return std::nullopt; + std::optional<ValueLatticeElement> RV = + getValueFromCondition(Val, R, IsTrueDest, UseBlockValue, Depth); + if (!RV) + return std::nullopt; // if (L && R) -> intersect L and R // if (!(L || R)) -> intersect !L and !R // if (L || R) -> union L and R // if (!(L && R)) -> union !L and !R if (IsTrueDest ^ IsAnd) { - LV.mergeIn(RV); - return LV; + LV->mergeIn(*RV); + return *LV; } - return intersect(LV, RV); + return intersect(*LV, *RV); } // Return true if Usr has Op as an operand, otherwise false. @@ -1302,8 +1351,9 @@ static ValueLatticeElement constantFoldUser(User *Usr, Value *Op, } /// Compute the value of Val on the edge BBFrom -> BBTo. -static ValueLatticeElement getEdgeValueLocal(Value *Val, BasicBlock *BBFrom, - BasicBlock *BBTo) { +std::optional<ValueLatticeElement> +LazyValueInfoImpl::getEdgeValueLocal(Value *Val, BasicBlock *BBFrom, + BasicBlock *BBTo, bool UseBlockValue) { // TODO: Handle more complex conditionals. If (v == 0 || v2 < 1) is false, we // know that v != 0. if (BranchInst *BI = dyn_cast<BranchInst>(BBFrom->getTerminator())) { @@ -1324,13 +1374,16 @@ static ValueLatticeElement getEdgeValueLocal(Value *Val, BasicBlock *BBFrom, // If the condition of the branch is an equality comparison, we may be // able to infer the value. - ValueLatticeElement Result = getValueFromCondition(Val, Condition, - isTrueDest); - if (!Result.isOverdefined()) + std::optional<ValueLatticeElement> Result = + getValueFromCondition(Val, Condition, isTrueDest, UseBlockValue); + if (!Result) + return std::nullopt; + + if (!Result->isOverdefined()) return Result; if (User *Usr = dyn_cast<User>(Val)) { - assert(Result.isOverdefined() && "Result isn't overdefined"); + assert(Result->isOverdefined() && "Result isn't overdefined"); // Check with isOperationFoldable() first to avoid linearly iterating // over the operands unnecessarily which can be expensive for // instructions with many operands. @@ -1356,8 +1409,8 @@ static ValueLatticeElement getEdgeValueLocal(Value *Val, BasicBlock *BBFrom, // br i1 %Condition, label %then, label %else for (unsigned i = 0; i < Usr->getNumOperands(); ++i) { Value *Op = Usr->getOperand(i); - ValueLatticeElement OpLatticeVal = - getValueFromCondition(Op, Condition, isTrueDest); + ValueLatticeElement OpLatticeVal = *getValueFromCondition( + Op, Condition, isTrueDest, /*UseBlockValue*/ false); if (std::optional<APInt> OpConst = OpLatticeVal.asConstantInteger()) { Result = constantFoldUser(Usr, Op, *OpConst, DL); @@ -1367,7 +1420,7 @@ static ValueLatticeElement getEdgeValueLocal(Value *Val, BasicBlock *BBFrom, } } } - if (!Result.isOverdefined()) + if (!Result->isOverdefined()) return Result; } } @@ -1432,8 +1485,12 @@ LazyValueInfoImpl::getEdgeValue(Value *Val, BasicBlock *BBFrom, if (Constant *VC = dyn_cast<Constant>(Val)) return ValueLatticeElement::get(VC); - ValueLatticeElement LocalResult = getEdgeValueLocal(Val, BBFrom, BBTo); - if (hasSingleValue(LocalResult)) + std::optional<ValueLatticeElement> LocalResult = + getEdgeValueLocal(Val, BBFrom, BBTo, /*UseBlockValue*/ true); + if (!LocalResult) + return std::nullopt; + + if (hasSingleValue(*LocalResult)) // Can't get any more precise here return LocalResult; @@ -1453,7 +1510,7 @@ LazyValueInfoImpl::getEdgeValue(Value *Val, BasicBlock *BBFrom, // but then the result is not cached. intersectAssumeOrGuardBlockValueConstantRange(Val, InBlock, CxtI); - return intersect(LocalResult, InBlock); + return intersect(*LocalResult, InBlock); } ValueLatticeElement LazyValueInfoImpl::getValueInBlock(Value *V, BasicBlock *BB, @@ -1499,10 +1556,12 @@ getValueOnEdge(Value *V, BasicBlock *FromBB, BasicBlock *ToBB, std::optional<ValueLatticeElement> Result = getEdgeValue(V, FromBB, ToBB, CxtI); - if (!Result) { + while (!Result) { + // As the worklist only explicitly tracks block values (but not edge values) + // we may have to call solve() multiple times, as the edge value calculation + // may request additional block values. solve(); Result = getEdgeValue(V, FromBB, ToBB, CxtI); - assert(Result && "More work to do after problem solved?"); } LLVM_DEBUG(dbgs() << " Result = " << *Result << "\n"); @@ -1528,13 +1587,17 @@ ValueLatticeElement LazyValueInfoImpl::getValueAtUse(const Use &U) { if (!isGuaranteedNotToBeUndef(SI->getCondition(), AC)) break; if (CurrU->getOperandNo() == 1) - CondVal = getValueFromCondition(V, SI->getCondition(), true); + CondVal = + *getValueFromCondition(V, SI->getCondition(), /*IsTrueDest*/ true, + /*UseBlockValue*/ false); else if (CurrU->getOperandNo() == 2) - CondVal = getValueFromCondition(V, SI->getCondition(), false); + CondVal = + *getValueFromCondition(V, SI->getCondition(), /*IsTrueDest*/ false, + /*UseBlockValue*/ false); } else if (auto *PHI = dyn_cast<PHINode>(CurrI)) { // TODO: Use non-local query? - CondVal = - getEdgeValueLocal(V, PHI->getIncomingBlock(*CurrU), PHI->getParent()); + CondVal = *getEdgeValueLocal(V, PHI->getIncomingBlock(*CurrU), + PHI->getParent(), /*UseBlockValue*/ false); } if (CondVal) VL = intersect(VL, *CondVal); diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp index 3f76dfdaac31..67246afa2314 100644 --- a/llvm/lib/Analysis/TargetTransformInfo.cpp +++ b/llvm/lib/Analysis/TargetTransformInfo.cpp @@ -862,6 +862,15 @@ InstructionCost TargetTransformInfo::getArithmeticInstrCost( return Cost; } +InstructionCost TargetTransformInfo::getAltInstrCost( + VectorType *VecTy, unsigned Opcode0, unsigned Opcode1, + const SmallBitVector &OpcodeMask, TTI::TargetCostKind CostKind) const { + InstructionCost Cost = + TTIImpl->getAltInstrCost(VecTy, Opcode0, Opcode1, OpcodeMask, CostKind); + assert(Cost >= 0 && "TTI should not produce negative costs!"); + return Cost; +} + InstructionCost TargetTransformInfo::getShuffleCost( ShuffleKind Kind, VectorType *Ty, ArrayRef<int> Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp index cac2602d455f..16d78c1ded6d 100644 --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -983,45 +983,11 @@ static void computeKnownBitsFromOperator(const Operator *I, break; } case Instruction::Select: { - const Value *LHS = nullptr, *RHS = nullptr; - SelectPatternFlavor SPF = matchSelectPattern(I, LHS, RHS).Flavor; - if (SelectPatternResult::isMinOrMax(SPF)) { - computeKnownBits(RHS, Known, Depth + 1, Q); - computeKnownBits(LHS, Known2, Depth + 1, Q); - switch (SPF) { - default: - llvm_unreachable("Unhandled select pattern flavor!"); - case SPF_SMAX: - Known = KnownBits::smax(Known, Known2); - break; - case SPF_SMIN: - Known = KnownBits::smin(Known, Known2); - break; - case SPF_UMAX: - Known = KnownBits::umax(Known, Known2); - break; - case SPF_UMIN: - Known = KnownBits::umin(Known, Known2); - break; - } - break; - } - computeKnownBits(I->getOperand(2), Known, Depth + 1, Q); computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q); // Only known if known in both the LHS and RHS. Known = Known.intersectWith(Known2); - - if (SPF == SPF_ABS) { - // RHS from matchSelectPattern returns the negation part of abs pattern. - // If the negate has an NSW flag we can assume the sign bit of the result - // will be 0 because that makes abs(INT_MIN) undefined. - if (match(RHS, m_Neg(m_Specific(LHS))) && - Q.IIQ.hasNoSignedWrap(cast<OverflowingBinaryOperator>(RHS))) - Known.Zero.setSignBit(); - } - break; } case Instruction::FPTrunc: diff --git a/llvm/lib/Analysis/VectorUtils.cpp b/llvm/lib/Analysis/VectorUtils.cpp index f90fca9d937f..5b57f0a25cec 100644 --- a/llvm/lib/Analysis/VectorUtils.cpp +++ b/llvm/lib/Analysis/VectorUtils.cpp @@ -123,6 +123,8 @@ bool llvm::isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, bool llvm::isVectorIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID, int OpdIdx) { + assert(ID != Intrinsic::not_intrinsic && "Not an intrinsic!"); + switch (ID) { case Intrinsic::fptosi_sat: case Intrinsic::fptoui_sat: diff --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp index 8907f6fa4ff3..a027d0c21ba0 100644 --- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp @@ -4218,6 +4218,9 @@ Error BitcodeReader::parseGlobalIndirectSymbolRecord( // Check whether we have enough values to read a partition name. if (OpNum + 1 < Record.size()) { + // Check Strtab has enough values for the partition. + if (Record[OpNum] + Record[OpNum + 1] > Strtab.size()) + return error("Malformed partition, too large."); NewGA->setPartition( StringRef(Strtab.data() + Record[OpNum], Record[OpNum + 1])); OpNum += 2; diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp index 91a64d59e154..8b15bdb0aca3 100644 --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -5940,62 +5940,6 @@ bool CombinerHelper::matchCombineFSubFpExtFNegFMulToFMadOrFMA( return false; } -bool CombinerHelper::matchSelectToLogical(MachineInstr &MI, - BuildFnTy &MatchInfo) { - GSelect &Sel = cast<GSelect>(MI); - Register DstReg = Sel.getReg(0); - Register Cond = Sel.getCondReg(); - Register TrueReg = Sel.getTrueReg(); - Register FalseReg = Sel.getFalseReg(); - - auto *TrueDef = getDefIgnoringCopies(TrueReg, MRI); - auto *FalseDef = getDefIgnoringCopies(FalseReg, MRI); - - const LLT CondTy = MRI.getType(Cond); - const LLT OpTy = MRI.getType(TrueReg); - if (CondTy != OpTy || OpTy.getScalarSizeInBits() != 1) - return false; - - // We have a boolean select. - - // select Cond, Cond, F --> or Cond, F - // select Cond, 1, F --> or Cond, F - auto MaybeCstTrue = isConstantOrConstantSplatVector(*TrueDef, MRI); - if (Cond == TrueReg || (MaybeCstTrue && MaybeCstTrue->isOne())) { - MatchInfo = [=](MachineIRBuilder &MIB) { - MIB.buildOr(DstReg, Cond, FalseReg); - }; - return true; - } - - // select Cond, T, Cond --> and Cond, T - // select Cond, T, 0 --> and Cond, T - auto MaybeCstFalse = isConstantOrConstantSplatVector(*FalseDef, MRI); - if (Cond == FalseReg || (MaybeCstFalse && MaybeCstFalse->isZero())) { - MatchInfo = [=](MachineIRBuilder &MIB) { - MIB.buildAnd(DstReg, Cond, TrueReg); - }; - return true; - } - - // select Cond, T, 1 --> or (not Cond), T - if (MaybeCstFalse && MaybeCstFalse->isOne()) { - MatchInfo = [=](MachineIRBuilder &MIB) { - MIB.buildOr(DstReg, MIB.buildNot(OpTy, Cond), TrueReg); - }; - return true; - } - - // select Cond, 0, F --> and (not Cond), F - if (MaybeCstTrue && MaybeCstTrue->isZero()) { - MatchInfo = [=](MachineIRBuilder &MIB) { - MIB.buildAnd(DstReg, MIB.buildNot(OpTy, Cond), FalseReg); - }; - return true; - } - return false; -} - bool CombinerHelper::matchCombineFMinMaxNaN(MachineInstr &MI, unsigned &IdxToPropagate) { bool PropagateNaN; @@ -6318,3 +6262,300 @@ void CombinerHelper::applyCommuteBinOpOperands(MachineInstr &MI) { MI.getOperand(2).setReg(LHSReg); Observer.changedInstr(MI); } + +bool CombinerHelper::isOneOrOneSplat(Register Src, bool AllowUndefs) { + LLT SrcTy = MRI.getType(Src); + if (SrcTy.isFixedVector()) + return isConstantSplatVector(Src, 1, AllowUndefs); + if (SrcTy.isScalar()) { + if (AllowUndefs && getOpcodeDef<GImplicitDef>(Src, MRI) != nullptr) + return true; + auto IConstant = getIConstantVRegValWithLookThrough(Src, MRI); + return IConstant && IConstant->Value == 1; + } + return false; // scalable vector +} + +bool CombinerHelper::isZeroOrZeroSplat(Register Src, bool AllowUndefs) { + LLT SrcTy = MRI.getType(Src); + if (SrcTy.isFixedVector()) + return isConstantSplatVector(Src, 0, AllowUndefs); + if (SrcTy.isScalar()) { + if (AllowUndefs && getOpcodeDef<GImplicitDef>(Src, MRI) != nullptr) + return true; + auto IConstant = getIConstantVRegValWithLookThrough(Src, MRI); + return IConstant && IConstant->Value == 0; + } + return false; // scalable vector +} + +// Ignores COPYs during conformance checks. +// FIXME scalable vectors. +bool CombinerHelper::isConstantSplatVector(Register Src, int64_t SplatValue, + bool AllowUndefs) { + GBuildVector *BuildVector = getOpcodeDef<GBuildVector>(Src, MRI); + if (!BuildVector) + return false; + unsigned NumSources = BuildVector->getNumSources(); + + for (unsigned I = 0; I < NumSources; ++I) { + GImplicitDef *ImplicitDef = + getOpcodeDef<GImplicitDef>(BuildVector->getSourceReg(I), MRI); + if (ImplicitDef && AllowUndefs) + continue; + if (ImplicitDef && !AllowUndefs) + return false; + std::optional<ValueAndVReg> IConstant = + getIConstantVRegValWithLookThrough(BuildVector->getSourceReg(I), MRI); + if (IConstant && IConstant->Value == SplatValue) + continue; + return false; + } + return true; +} + +// Ignores COPYs during lookups. +// FIXME scalable vectors +std::optional<APInt> +CombinerHelper::getConstantOrConstantSplatVector(Register Src) { + auto IConstant = getIConstantVRegValWithLookThrough(Src, MRI); + if (IConstant) + return IConstant->Value; + + GBuildVector *BuildVector = getOpcodeDef<GBuildVector>(Src, MRI); + if (!BuildVector) + return std::nullopt; + unsigned NumSources = BuildVector->getNumSources(); + + std::optional<APInt> Value = std::nullopt; + for (unsigned I = 0; I < NumSources; ++I) { + std::optional<ValueAndVReg> IConstant = + getIConstantVRegValWithLookThrough(BuildVector->getSourceReg(I), MRI); + if (!IConstant) + return std::nullopt; + if (!Value) + Value = IConstant->Value; + else if (*Value != IConstant->Value) + return std::nullopt; + } + return Value; +} + +// TODO: use knownbits to determine zeros +bool CombinerHelper::tryFoldSelectOfConstants(GSelect *Select, + BuildFnTy &MatchInfo) { + uint32_t Flags = Select->getFlags(); + Register Dest = Select->getReg(0); + Register Cond = Select->getCondReg(); + Register True = Select->getTrueReg(); + Register False = Select->getFalseReg(); + LLT CondTy = MRI.getType(Select->getCondReg()); + LLT TrueTy = MRI.getType(Select->getTrueReg()); + + // We only do this combine for scalar boolean conditions. + if (CondTy != LLT::scalar(1)) + return false; + + // Both are scalars. + std::optional<ValueAndVReg> TrueOpt = + getIConstantVRegValWithLookThrough(True, MRI); + std::optional<ValueAndVReg> FalseOpt = + getIConstantVRegValWithLookThrough(False, MRI); + + if (!TrueOpt || !FalseOpt) + return false; + + APInt TrueValue = TrueOpt->Value; + APInt FalseValue = FalseOpt->Value; + + // select Cond, 1, 0 --> zext (Cond) + if (TrueValue.isOne() && FalseValue.isZero()) { + MatchInfo = [=](MachineIRBuilder &B) { + B.setInstrAndDebugLoc(*Select); + B.buildZExtOrTrunc(Dest, Cond); + }; + return true; + } + + // select Cond, -1, 0 --> sext (Cond) + if (TrueValue.isAllOnes() && FalseValue.isZero()) { + MatchInfo = [=](MachineIRBuilder &B) { + B.setInstrAndDebugLoc(*Select); + B.buildSExtOrTrunc(Dest, Cond); + }; + return true; + } + + // select Cond, 0, 1 --> zext (!Cond) + if (TrueValue.isZero() && FalseValue.isOne()) { + MatchInfo = [=](MachineIRBuilder &B) { + B.setInstrAndDebugLoc(*Select); + Register Inner = MRI.createGenericVirtualRegister(CondTy); + B.buildNot(Inner, Cond); + B.buildZExtOrTrunc(Dest, Inner); + }; + return true; + } + + // select Cond, 0, -1 --> sext (!Cond) + if (TrueValue.isZero() && FalseValue.isAllOnes()) { + MatchInfo = [=](MachineIRBuilder &B) { + B.setInstrAndDebugLoc(*Select); + Register Inner = MRI.createGenericVirtualRegister(CondTy); + B.buildNot(Inner, Cond); + B.buildSExtOrTrunc(Dest, Inner); + }; + return true; + } + + // select Cond, C1, C1-1 --> add (zext Cond), C1-1 + if (TrueValue - 1 == FalseValue) { + MatchInfo = [=](MachineIRBuilder &B) { + B.setInstrAndDebugLoc(*Select); + Register Inner = MRI.createGenericVirtualRegister(TrueTy); + B.buildZExtOrTrunc(Inner, Cond); + B.buildAdd(Dest, Inner, False); + }; + return true; + } + + // select Cond, C1, C1+1 --> add (sext Cond), C1+1 + if (TrueValue + 1 == FalseValue) { + MatchInfo = [=](MachineIRBuilder &B) { + B.setInstrAndDebugLoc(*Select); + Register Inner = MRI.createGenericVirtualRegister(TrueTy); + B.buildSExtOrTrunc(Inner, Cond); + B.buildAdd(Dest, Inner, False); + }; + return true; + } + + // select Cond, Pow2, 0 --> (zext Cond) << log2(Pow2) + if (TrueValue.isPowerOf2() && FalseValue.isZero()) { + MatchInfo = [=](MachineIRBuilder &B) { + B.setInstrAndDebugLoc(*Select); + Register Inner = MRI.createGenericVirtualRegister(TrueTy); + B.buildZExtOrTrunc(Inner, Cond); + // The shift amount must be scalar. + LLT ShiftTy = TrueTy.isVector() ? TrueTy.getElementType() : TrueTy; + auto ShAmtC = B.buildConstant(ShiftTy, TrueValue.exactLogBase2()); + B.buildShl(Dest, Inner, ShAmtC, Flags); + }; + return true; + } + // select Cond, -1, C --> or (sext Cond), C + if (TrueValue.isAllOnes()) { + MatchInfo = [=](MachineIRBuilder &B) { + B.setInstrAndDebugLoc(*Select); + Register Inner = MRI.createGenericVirtualRegister(TrueTy); + B.buildSExtOrTrunc(Inner, Cond); + B.buildOr(Dest, Inner, False, Flags); + }; + return true; + } + + // select Cond, C, -1 --> or (sext (not Cond)), C + if (FalseValue.isAllOnes()) { + MatchInfo = [=](MachineIRBuilder &B) { + B.setInstrAndDebugLoc(*Select); + Register Not = MRI.createGenericVirtualRegister(CondTy); + B.buildNot(Not, Cond); + Register Inner = MRI.createGenericVirtualRegister(TrueTy); + B.buildSExtOrTrunc(Inner, Not); + B.buildOr(Dest, Inner, True, Flags); + }; + return true; + } + + return false; +} + +// TODO: use knownbits to determine zeros +bool CombinerHelper::tryFoldBoolSelectToLogic(GSelect *Select, + BuildFnTy &MatchInfo) { + uint32_t Flags = Select->getFlags(); + Register DstReg = Select->getReg(0); + Register Cond = Select->getCondReg(); + Register True = Select->getTrueReg(); + Register False = Select->getFalseReg(); + LLT CondTy = MRI.getType(Select->getCondReg()); + LLT TrueTy = MRI.getType(Select->getTrueReg()); + + // Boolean or fixed vector of booleans. + if (CondTy.isScalableVector() || + (CondTy.isFixedVector() && + CondTy.getElementType().getScalarSizeInBits() != 1) || + CondTy.getScalarSizeInBits() != 1) + return false; + + if (CondTy != TrueTy) + return false; + + // select Cond, Cond, F --> or Cond, F + // select Cond, 1, F --> or Cond, F + if ((Cond == True) || isOneOrOneSplat(True, /* AllowUndefs */ true)) { + MatchInfo = [=](MachineIRBuilder &B) { + B.setInstrAndDebugLoc(*Select); + Register Ext = MRI.createGenericVirtualRegister(TrueTy); + B.buildZExtOrTrunc(Ext, Cond); + B.buildOr(DstReg, Ext, False, Flags); + }; + return true; + } + + // select Cond, T, Cond --> and Cond, T + // select Cond, T, 0 --> and Cond, T + if ((Cond == False) || isZeroOrZeroSplat(False, /* AllowUndefs */ true)) { + MatchInfo = [=](MachineIRBuilder &B) { + B.setInstrAndDebugLoc(*Select); + Register Ext = MRI.createGenericVirtualRegister(TrueTy); + B.buildZExtOrTrunc(Ext, Cond); + B.buildAnd(DstReg, Ext, True); + }; + return true; + } + + // select Cond, T, 1 --> or (not Cond), T + if (isOneOrOneSplat(False, /* AllowUndefs */ true)) { + MatchInfo = [=](MachineIRBuilder &B) { + B.setInstrAndDebugLoc(*Select); + // First the not. + Register Inner = MRI.createGenericVirtualRegister(CondTy); + B.buildNot(Inner, Cond); + // Then an ext to match the destination register. + Register Ext = MRI.createGenericVirtualRegister(TrueTy); + B.buildZExtOrTrunc(Ext, Inner); + B.buildOr(DstReg, Ext, True, Flags); + }; + return true; + } + + // select Cond, 0, F --> and (not Cond), F + if (isZeroOrZeroSplat(True, /* AllowUndefs */ true)) { + MatchInfo = [=](MachineIRBuilder &B) { + B.setInstrAndDebugLoc(*Select); + // First the not. + Register Inner = MRI.createGenericVirtualRegister(CondTy); + B.buildNot(Inner, Cond); + // Then an ext to match the destination register. + Register Ext = MRI.createGenericVirtualRegister(TrueTy); + B.buildZExtOrTrunc(Ext, Inner); + B.buildAnd(DstReg, Ext, False); + }; + return true; + } + + return false; +} + +bool CombinerHelper::matchSelect(MachineInstr &MI, BuildFnTy &MatchInfo) { + GSelect *Select = cast<GSelect>(&MI); + + if (tryFoldSelectOfConstants(Select, MatchInfo)) + return true; + + if (tryFoldBoolSelectToLogic(Select, MatchInfo)) + return true; + + return false; +} diff --git a/llvm/lib/CodeGen/MachineCopyPropagation.cpp b/llvm/lib/CodeGen/MachineCopyPropagation.cpp index a032b31a1fc7..51e944d0279f 100644 --- a/llvm/lib/CodeGen/MachineCopyPropagation.cpp +++ b/llvm/lib/CodeGen/MachineCopyPropagation.cpp @@ -175,8 +175,46 @@ public: if (MachineInstr *MI = I->second.MI) { std::optional<DestSourcePair> CopyOperands = isCopyInstr(*MI, TII, UseCopyInstr); - markRegsUnavailable({CopyOperands->Destination->getReg().asMCReg()}, - TRI); + + MCRegister Def = CopyOperands->Destination->getReg().asMCReg(); + MCRegister Src = CopyOperands->Source->getReg().asMCReg(); + + markRegsUnavailable(Def, TRI); + + // Since we clobber the destination of a copy, the semantic of Src's + // "DefRegs" to contain Def is no longer effectual. We will also need + // to remove the record from the copy maps that indicates Src defined + // Def. Failing to do so might cause the target to miss some + // opportunities to further eliminate redundant copy instructions. + // Consider the following sequence during the + // ForwardCopyPropagateBlock procedure: + // L1: r0 = COPY r9 <- TrackMI + // L2: r0 = COPY r8 <- TrackMI (Remove r9 defined r0 from tracker) + // L3: use r0 <- Remove L2 from MaybeDeadCopies + // L4: early-clobber r9 <- Clobber r9 (L2 is still valid in tracker) + // L5: r0 = COPY r8 <- Remove NopCopy + for (MCRegUnit SrcUnit : TRI.regunits(Src)) { + auto SrcCopy = Copies.find(SrcUnit); + if (SrcCopy != Copies.end() && SrcCopy->second.LastSeenUseInCopy) { + // If SrcCopy defines multiple values, we only need + // to erase the record for Def in DefRegs. + for (auto itr = SrcCopy->second.DefRegs.begin(); + itr != SrcCopy->second.DefRegs.end(); itr++) { + if (*itr == Def) { + SrcCopy->second.DefRegs.erase(itr); + // If DefReg becomes empty after removal, we can remove the + // SrcCopy from the tracker's copy maps. We only remove those + // entries solely record the Def is defined by Src. If an + // entry also contains the definition record of other Def' + // registers, it cannot be cleared. + if (SrcCopy->second.DefRegs.empty() && !SrcCopy->second.MI) { + Copies.erase(SrcCopy); + } + break; + } + } + } + } } // Now we can erase the copy. Copies.erase(I); diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 0d46c7868d87..eafa95ce7fcf 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -546,6 +546,7 @@ namespace { SDValue visitFP_TO_FP16(SDNode *N); SDValue visitFP16_TO_FP(SDNode *N); SDValue visitFP_TO_BF16(SDNode *N); + SDValue visitBF16_TO_FP(SDNode *N); SDValue visitVECREDUCE(SDNode *N); SDValue visitVPOp(SDNode *N); SDValue visitGET_FPENV_MEM(SDNode *N); @@ -2047,6 +2048,7 @@ SDValue DAGCombiner::visit(SDNode *N) { case ISD::FP_TO_FP16: return visitFP_TO_FP16(N); case ISD::FP16_TO_FP: return visitFP16_TO_FP(N); case ISD::FP_TO_BF16: return visitFP_TO_BF16(N); + case ISD::BF16_TO_FP: return visitBF16_TO_FP(N); case ISD::FREEZE: return visitFREEZE(N); case ISD::GET_FPENV_MEM: return visitGET_FPENV_MEM(N); case ISD::SET_FPENV_MEM: return visitSET_FPENV_MEM(N); @@ -26256,14 +26258,17 @@ SDValue DAGCombiner::visitFP_TO_FP16(SDNode *N) { } SDValue DAGCombiner::visitFP16_TO_FP(SDNode *N) { + auto Op = N->getOpcode(); + assert((Op == ISD::FP16_TO_FP || Op == ISD::BF16_TO_FP) && + "opcode should be FP16_TO_FP or BF16_TO_FP."); SDValue N0 = N->getOperand(0); - // fold fp16_to_fp(op & 0xffff) -> fp16_to_fp(op) + // fold fp16_to_fp(op & 0xffff) -> fp16_to_fp(op) or + // fold bf16_to_fp(op & 0xffff) -> bf16_to_fp(op) if (!TLI.shouldKeepZExtForFP16Conv() && N0->getOpcode() == ISD::AND) { ConstantSDNode *AndConst = getAsNonOpaqueConstant(N0.getOperand(1)); if (AndConst && AndConst->getAPIntValue() == 0xffff) { - return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), N->getValueType(0), - N0.getOperand(0)); + return DAG.getNode(Op, SDLoc(N), N->getValueType(0), N0.getOperand(0)); } } @@ -26280,6 +26285,11 @@ SDValue DAGCombiner::visitFP_TO_BF16(SDNode *N) { return SDValue(); } +SDValue DAGCombiner::visitBF16_TO_FP(SDNode *N) { + // fold bf16_to_fp(op & 0xffff) -> bf16_to_fp(op) + return visitFP16_TO_FP(N); +} + SDValue DAGCombiner::visitVECREDUCE(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N0.getValueType(); diff --git a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp index a27febe15db8..34fa1f5a7ed1 100644 --- a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -495,7 +495,7 @@ void InstrEmitter::EmitSubregNode(SDNode *Node, // EXTRACT_SUBREG is lowered as %dst = COPY %src:sub. There are no // constraints on the %dst register, COPY can target all legal register // classes. - unsigned SubIdx = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue(); + unsigned SubIdx = Node->getConstantOperandVal(1); const TargetRegisterClass *TRC = TLI->getRegClassFor(Node->getSimpleValueType(0), Node->isDivergent()); @@ -611,7 +611,7 @@ InstrEmitter::EmitCopyToRegClassNode(SDNode *Node, unsigned VReg = getVR(Node->getOperand(0), VRBaseMap); // Create the new VReg in the destination class and emit a copy. - unsigned DstRCIdx = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue(); + unsigned DstRCIdx = Node->getConstantOperandVal(1); const TargetRegisterClass *DstRC = TRI->getAllocatableClass(TRI->getRegClass(DstRCIdx)); Register NewVReg = MRI->createVirtualRegister(DstRC); @@ -629,7 +629,7 @@ InstrEmitter::EmitCopyToRegClassNode(SDNode *Node, void InstrEmitter::EmitRegSequence(SDNode *Node, DenseMap<SDValue, Register> &VRBaseMap, bool IsClone, bool IsCloned) { - unsigned DstRCIdx = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue(); + unsigned DstRCIdx = Node->getConstantOperandVal(0); const TargetRegisterClass *RC = TRI->getRegClass(DstRCIdx); Register NewVReg = MRI->createVirtualRegister(TRI->getAllocatableClass(RC)); const MCInstrDesc &II = TII->get(TargetOpcode::REG_SEQUENCE); @@ -1309,8 +1309,7 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, // Add all of the operand registers to the instruction. for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) { - unsigned Flags = - cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue(); + unsigned Flags = Node->getConstantOperandVal(i); const InlineAsm::Flag F(Flags); const unsigned NumVals = F.getNumOperandRegisters(); diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp index f73ddfee2b90..e3acb58327a8 100644 --- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp @@ -492,8 +492,7 @@ bool ScheduleDAGFast::DelayForLiveRegsBottomUp(SUnit *SU, --NumOps; // Ignore the glue operand. for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) { - unsigned Flags = - cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue(); + unsigned Flags = Node->getConstantOperandVal(i); const InlineAsm::Flag F(Flags); unsigned NumVals = F.getNumOperandRegisters(); diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp index 47c137d2bcad..dcecb2e0e7fa 100644 --- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -331,7 +331,7 @@ static void GetCostForDef(const ScheduleDAGSDNodes::RegDefIter &RegDefPos, unsigned Opcode = Node->getMachineOpcode(); if (Opcode == TargetOpcode::REG_SEQUENCE) { - unsigned DstRCIdx = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue(); + unsigned DstRCIdx = Node->getConstantOperandVal(0); const TargetRegisterClass *RC = TRI->getRegClass(DstRCIdx); RegClass = RC->getID(); Cost = RegSequenceCost; @@ -1369,8 +1369,7 @@ DelayForLiveRegsBottomUp(SUnit *SU, SmallVectorImpl<unsigned> &LRegs) { --NumOps; // Ignore the glue operand. for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) { - unsigned Flags = - cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue(); + unsigned Flags = Node->getConstantOperandVal(i); const InlineAsm::Flag F(Flags); unsigned NumVals = F.getNumOperandRegisters(); @@ -2298,8 +2297,7 @@ void RegReductionPQBase::unscheduledNode(SUnit *SU) { continue; } if (POpc == TargetOpcode::REG_SEQUENCE) { - unsigned DstRCIdx = - cast<ConstantSDNode>(PN->getOperand(0))->getZExtValue(); + unsigned DstRCIdx = PN->getConstantOperandVal(0); const TargetRegisterClass *RC = TRI->getRegClass(DstRCIdx); unsigned RCId = RC->getID(); // REG_SEQUENCE is untyped, so getRepRegClassCostFor could not be used diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 81facf92e55a..0e17bba2398e 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -5470,7 +5470,7 @@ static SDValue FoldBUILD_VECTOR(const SDLoc &DL, EVT VT, Ops[i].getOperand(0).getValueType() != VT || (IdentitySrc && Ops[i].getOperand(0) != IdentitySrc) || !isa<ConstantSDNode>(Ops[i].getOperand(1)) || - cast<ConstantSDNode>(Ops[i].getOperand(1))->getAPIntValue() != i) { + Ops[i].getConstantOperandAPInt(1) != i) { IsIdentity = false; break; } @@ -7408,7 +7408,7 @@ static bool isMemSrcFromConstant(SDValue Src, ConstantDataArraySlice &Slice) { Src.getOperand(0).getOpcode() == ISD::GlobalAddress && Src.getOperand(1).getOpcode() == ISD::Constant) { G = cast<GlobalAddressSDNode>(Src.getOperand(0)); - SrcDelta = cast<ConstantSDNode>(Src.getOperand(1))->getZExtValue(); + SrcDelta = Src.getConstantOperandVal(1); } if (!G) return false; diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 3dc6e4bbcf46..f28211ac113c 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -4181,8 +4181,7 @@ void SelectionDAGISel::CannotYetSelect(SDNode *N) { Msg << "\nIn function: " << MF->getName(); } else { bool HasInputChain = N->getOperand(0).getValueType() == MVT::Other; - unsigned iid = - cast<ConstantSDNode>(N->getOperand(HasInputChain))->getZExtValue(); + unsigned iid = N->getConstantOperandVal(HasInputChain); if (iid < Intrinsic::num_intrinsics) Msg << "intrinsic %" << Intrinsic::getBaseName((Intrinsic::ID)iid); else if (const TargetIntrinsicInfo *TII = TM.getIntrinsicInfo()) diff --git a/llvm/lib/ExecutionEngine/Orc/Debugging/DebugInfoSupport.cpp b/llvm/lib/ExecutionEngine/Orc/Debugging/DebugInfoSupport.cpp index f65ec27ff875..5a058bd712a3 100644 --- a/llvm/lib/ExecutionEngine/Orc/Debugging/DebugInfoSupport.cpp +++ b/llvm/lib/ExecutionEngine/Orc/Debugging/DebugInfoSupport.cpp @@ -105,8 +105,7 @@ llvm::orc::createDWARFContext(LinkGraph &G) { auto SecData = getSectionData(Sec); auto Name = Sec.getName(); // DWARFContext expects the section name to not start with a dot - if (Name.starts_with(".")) - Name = Name.drop_front(); + Name.consume_front("."); LLVM_DEBUG(dbgs() << "Creating DWARFContext section " << Name << " with size " << SecData.size() << "\n"); DWARFSectionData[Name] = diff --git a/llvm/lib/ExecutionEngine/Orc/LLJIT.cpp b/llvm/lib/ExecutionEngine/Orc/LLJIT.cpp index a19e17029810..e259c393d07e 100644 --- a/llvm/lib/ExecutionEngine/Orc/LLJIT.cpp +++ b/llvm/lib/ExecutionEngine/Orc/LLJIT.cpp @@ -768,11 +768,11 @@ Error LLJITBuilderState::prepareForConstruction() { // create a default one. if (!SetupProcessSymbolsJITDylib && LinkProcessSymbolsByDefault) { LLVM_DEBUG(dbgs() << "Creating default Process JD setup function\n"); - SetupProcessSymbolsJITDylib = [this](LLJIT &J) -> Expected<JITDylibSP> { + SetupProcessSymbolsJITDylib = [](LLJIT &J) -> Expected<JITDylibSP> { auto &JD = J.getExecutionSession().createBareJITDylib("<Process Symbols>"); - auto G = orc::DynamicLibrarySearchGenerator::GetForCurrentProcess( - DL->getGlobalPrefix()); + auto G = EPCDynamicLibrarySearchGenerator::GetForTargetProcess( + J.getExecutionSession()); if (!G) return G.takeError(); JD.addGenerator(std::move(*G)); diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index ce428f78dc84..f6cf358119fb 100644 --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -6026,6 +6026,17 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createAtomicCompare( omp::OMPAtomicCompareOp Op, bool IsXBinopExpr, bool IsPostfixUpdate, bool IsFailOnly) { + AtomicOrdering Failure = AtomicCmpXchgInst::getStrongestFailureOrdering(AO); + return createAtomicCompare(Loc, X, V, R, E, D, AO, Op, IsXBinopExpr, + IsPostfixUpdate, IsFailOnly, Failure); +} + +OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createAtomicCompare( + const LocationDescription &Loc, AtomicOpValue &X, AtomicOpValue &V, + AtomicOpValue &R, Value *E, Value *D, AtomicOrdering AO, + omp::OMPAtomicCompareOp Op, bool IsXBinopExpr, bool IsPostfixUpdate, + bool IsFailOnly, AtomicOrdering Failure) { + if (!updateToLocation(Loc)) return Loc.IP; @@ -6040,7 +6051,6 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createAtomicCompare( bool IsInteger = E->getType()->isIntegerTy(); if (Op == OMPAtomicCompareOp::EQ) { - AtomicOrdering Failure = AtomicCmpXchgInst::getStrongestFailureOrdering(AO); AtomicCmpXchgInst *Result = nullptr; if (!IsInteger) { IntegerType *IntCastTy = diff --git a/llvm/lib/FuzzMutate/FuzzerCLI.cpp b/llvm/lib/FuzzMutate/FuzzerCLI.cpp index c64e9c04e199..58e4b74f4b22 100644 --- a/llvm/lib/FuzzMutate/FuzzerCLI.cpp +++ b/llvm/lib/FuzzMutate/FuzzerCLI.cpp @@ -86,13 +86,12 @@ void llvm::handleExecNameEncodedOptimizerOpts(StringRef ExecName) { Args.push_back("-passes=gvn"); } else if (Opt == "sccp") { Args.push_back("-passes=sccp"); - } else if (Opt == "loop_predication") { Args.push_back("-passes=loop-predication"); } else if (Opt == "guard_widening") { Args.push_back("-passes=guard-widening"); } else if (Opt == "loop_rotate") { - Args.push_back("-passes=loop(rotate)"); + Args.push_back("-passes=loop-rotate"); } else if (Opt == "loop_unswitch") { Args.push_back("-passes=loop(simple-loop-unswitch)"); } else if (Opt == "loop_unroll") { @@ -107,7 +106,18 @@ void llvm::handleExecNameEncodedOptimizerOpts(StringRef ExecName) { Args.push_back("-passes=loop-reduce"); } else if (Opt == "irce") { Args.push_back("-passes=irce"); - + } else if (Opt == "dse") { + Args.push_back("-passes=dse"); + } else if (Opt == "loop_idiom") { + Args.push_back("-passes=loop-idiom"); + } else if (Opt == "reassociate") { + Args.push_back("-passes=reassociate"); + } else if (Opt == "lower_matrix_intrinsics") { + Args.push_back("-passes=lower-matrix-intrinsics"); + } else if (Opt == "memcpyopt") { + Args.push_back("-passes=memcpyopt"); + } else if (Opt == "sroa") { + Args.push_back("-passes=sroa"); } else if (Triple(Opt).getArch()) { Args.push_back("-mtriple=" + Opt.str()); } else { diff --git a/llvm/lib/IR/DebugInfo.cpp b/llvm/lib/IR/DebugInfo.cpp index eab05eed428e..c6dc42e8ac88 100644 --- a/llvm/lib/IR/DebugInfo.cpp +++ b/llvm/lib/IR/DebugInfo.cpp @@ -2115,6 +2115,10 @@ bool AssignmentTrackingPass::runOnFunction(Function &F) { if (F.hasFnAttribute(Attribute::OptimizeNone)) return /*Changed*/ false; + // FIXME: https://github.com/llvm/llvm-project/issues/76545 + if (F.hasFnAttribute(Attribute::SanitizeHWAddress)) + return /*Changed*/ false; + bool Changed = false; auto *DL = &F.getParent()->getDataLayout(); // Collect a map of {backing storage : dbg.declares} (currently "backing diff --git a/llvm/lib/MC/WasmObjectWriter.cpp b/llvm/lib/MC/WasmObjectWriter.cpp index fd48d5080ff6..e43f111113b4 100644 --- a/llvm/lib/MC/WasmObjectWriter.cpp +++ b/llvm/lib/MC/WasmObjectWriter.cpp @@ -1526,8 +1526,7 @@ uint64_t WasmObjectWriter::writeOneObject(MCAssembler &Asm, StringRef Name = SectionName; // For user-defined custom sections, strip the prefix - if (Name.starts_with(".custom_section.")) - Name = Name.substr(strlen(".custom_section.")); + Name.consume_front(".custom_section."); MCSymbol *Begin = Sec.getBeginSymbol(); if (Begin) { diff --git a/llvm/lib/Object/WasmObjectFile.cpp b/llvm/lib/Object/WasmObjectFile.cpp index dfe86a45df32..ccc29d0cb73d 100644 --- a/llvm/lib/Object/WasmObjectFile.cpp +++ b/llvm/lib/Object/WasmObjectFile.cpp @@ -1484,6 +1484,11 @@ Error WasmObjectFile::parseCodeSection(ReadContext &Ctx) { } uint32_t BodySize = FunctionEnd - Ctx.Ptr; + // Ensure that Function is within Ctx's buffer. + if (Ctx.Ptr + BodySize > Ctx.End) { + return make_error<GenericBinaryError>("Function extends beyond buffer", + object_error::parse_failed); + } Function.Body = ArrayRef<uint8_t>(Ctx.Ptr, BodySize); // This will be set later when reading in the linking metadata section. Function.Comdat = UINT32_MAX; @@ -1662,10 +1667,18 @@ Expected<StringRef> WasmObjectFile::getSymbolName(DataRefImpl Symb) const { Expected<uint64_t> WasmObjectFile::getSymbolAddress(DataRefImpl Symb) const { auto &Sym = getWasmSymbol(Symb); if (Sym.Info.Kind == wasm::WASM_SYMBOL_TYPE_FUNCTION && - isDefinedFunctionIndex(Sym.Info.ElementIndex)) - return getDefinedFunction(Sym.Info.ElementIndex).CodeSectionOffset; - else - return getSymbolValue(Symb); + isDefinedFunctionIndex(Sym.Info.ElementIndex)) { + // For object files, use the section offset. The linker relies on this. + // For linked files, use the file offset. This behavior matches the way + // browsers print stack traces and is useful for binary size analysis. + // (see https://webassembly.github.io/spec/web-api/index.html#conventions) + uint32_t Adjustment = isRelocatableObject() || isSharedObject() + ? 0 + : Sections[CodeSection].Offset; + return getDefinedFunction(Sym.Info.ElementIndex).CodeSectionOffset + + Adjustment; + } + return getSymbolValue(Symb); } uint64_t WasmObjectFile::getWasmSymbolValue(const WasmSymbol &Sym) const { diff --git a/llvm/lib/ProfileData/InstrProfReader.cpp b/llvm/lib/ProfileData/InstrProfReader.cpp index 8f62df79d5b7..b547cf7181b1 100644 --- a/llvm/lib/ProfileData/InstrProfReader.cpp +++ b/llvm/lib/ProfileData/InstrProfReader.cpp @@ -539,7 +539,7 @@ Error RawInstrProfReader<IntPtrT>::createSymtab(InstrProfSymtab &Symtab) { const IntPtrT FPtr = swap(I->FunctionPointer); if (!FPtr) continue; - Symtab.mapAddress(FPtr, I->NameRef); + Symtab.mapAddress(FPtr, swap(I->NameRef)); } return success(); } diff --git a/llvm/lib/Support/RISCVISAInfo.cpp b/llvm/lib/Support/RISCVISAInfo.cpp index 7256e9a29329..a9b7e209915a 100644 --- a/llvm/lib/Support/RISCVISAInfo.cpp +++ b/llvm/lib/Support/RISCVISAInfo.cpp @@ -75,7 +75,6 @@ static const RISCVSupportedExtension SupportedExtensions[] = { {"xcvmac", RISCVExtensionVersion{1, 0}}, {"xcvmem", RISCVExtensionVersion{1, 0}}, {"xcvsimd", RISCVExtensionVersion{1, 0}}, - {"xsfcie", RISCVExtensionVersion{1, 0}}, {"xsfvcp", RISCVExtensionVersion{1, 0}}, {"xsfvfnrclipxfqf", RISCVExtensionVersion{1, 0}}, {"xsfvfwmaccqqq", RISCVExtensionVersion{1, 0}}, @@ -191,11 +190,17 @@ static const RISCVSupportedExtension SupportedExtensions[] = { static const RISCVSupportedExtension SupportedExperimentalExtensions[] = { {"zacas", RISCVExtensionVersion{1, 0}}, + {"zcmop", RISCVExtensionVersion{0, 2}}, + {"zfbfmin", RISCVExtensionVersion{0, 8}}, {"zicfilp", RISCVExtensionVersion{0, 4}}, + {"zicfiss", RISCVExtensionVersion{0, 4}}, + {"zicond", RISCVExtensionVersion{1, 0}}, + {"zimop", RISCVExtensionVersion{0, 1}}, + {"ztso", RISCVExtensionVersion{0, 1}}, {"zvfbfmin", RISCVExtensionVersion{0, 8}}, @@ -1006,6 +1011,7 @@ static const char *ImpliedExtsZcb[] = {"zca"}; static const char *ImpliedExtsZcd[] = {"d", "zca"}; static const char *ImpliedExtsZce[] = {"zcb", "zcmp", "zcmt"}; static const char *ImpliedExtsZcf[] = {"f", "zca"}; +static const char *ImpliedExtsZcmop[] = {"zca"}; static const char *ImpliedExtsZcmp[] = {"zca"}; static const char *ImpliedExtsZcmt[] = {"zca", "zicsr"}; static const char *ImpliedExtsZdinx[] = {"zfinx"}; @@ -1017,6 +1023,7 @@ static const char *ImpliedExtsZfinx[] = {"zicsr"}; static const char *ImpliedExtsZhinx[] = {"zhinxmin"}; static const char *ImpliedExtsZhinxmin[] = {"zfinx"}; static const char *ImpliedExtsZicntr[] = {"zicsr"}; +static const char *ImpliedExtsZicfiss[] = {"zicsr", "zimop"}; static const char *ImpliedExtsZihpm[] = {"zicsr"}; static const char *ImpliedExtsZk[] = {"zkn", "zkt", "zkr"}; static const char *ImpliedExtsZkn[] = {"zbkb", "zbkc", "zbkx", @@ -1078,6 +1085,7 @@ static constexpr ImpliedExtsEntry ImpliedExts[] = { {{"zcd"}, {ImpliedExtsZcd}}, {{"zce"}, {ImpliedExtsZce}}, {{"zcf"}, {ImpliedExtsZcf}}, + {{"zcmop"}, {ImpliedExtsZcmop}}, {{"zcmp"}, {ImpliedExtsZcmp}}, {{"zcmt"}, {ImpliedExtsZcmt}}, {{"zdinx"}, {ImpliedExtsZdinx}}, @@ -1088,6 +1096,7 @@ static constexpr ImpliedExtsEntry ImpliedExts[] = { {{"zfinx"}, {ImpliedExtsZfinx}}, {{"zhinx"}, {ImpliedExtsZhinx}}, {{"zhinxmin"}, {ImpliedExtsZhinxmin}}, + {{"zicfiss"}, {ImpliedExtsZicfiss}}, {{"zicntr"}, {ImpliedExtsZicntr}}, {{"zihpm"}, {ImpliedExtsZihpm}}, {{"zk"}, {ImpliedExtsZk}}, diff --git a/llvm/lib/Support/Windows/Path.inc b/llvm/lib/Support/Windows/Path.inc index 168a63bb2d96..2bf68b7972e7 100644 --- a/llvm/lib/Support/Windows/Path.inc +++ b/llvm/lib/Support/Windows/Path.inc @@ -154,7 +154,10 @@ std::string getMainExecutable(const char *argv0, void *MainExecAddr) { return ""; llvm::sys::path::make_preferred(PathNameUTF8); - return std::string(PathNameUTF8.data()); + + SmallString<256> RealPath; + sys::fs::real_path(PathNameUTF8, RealPath); + return std::string(RealPath); } UniqueID file_status::getUniqueID() const { diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp index 463ec41b94e9..476d99c2a7e0 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -1950,7 +1950,7 @@ void AArch64DAGToDAGISel::SelectMultiVectorMove(SDNode *N, unsigned NumVecs, unsigned BaseReg, unsigned Op) { unsigned TileNum = 0; if (BaseReg != AArch64::ZA) - TileNum = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue(); + TileNum = N->getConstantOperandVal(2); if (!SelectSMETile(BaseReg, TileNum)) return; @@ -2145,8 +2145,7 @@ void AArch64DAGToDAGISel::SelectLoadLane(SDNode *N, unsigned NumVecs, const EVT ResTys[] = {MVT::Untyped, MVT::Other}; - unsigned LaneNo = - cast<ConstantSDNode>(N->getOperand(NumVecs + 2))->getZExtValue(); + unsigned LaneNo = N->getConstantOperandVal(NumVecs + 2); SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64), N->getOperand(NumVecs + 3), N->getOperand(0)}; @@ -2185,8 +2184,7 @@ void AArch64DAGToDAGISel::SelectPostLoadLane(SDNode *N, unsigned NumVecs, const EVT ResTys[] = {MVT::i64, // Type of the write back register RegSeq->getValueType(0), MVT::Other}; - unsigned LaneNo = - cast<ConstantSDNode>(N->getOperand(NumVecs + 1))->getZExtValue(); + unsigned LaneNo = N->getConstantOperandVal(NumVecs + 1); SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, @@ -2237,8 +2235,7 @@ void AArch64DAGToDAGISel::SelectStoreLane(SDNode *N, unsigned NumVecs, SDValue RegSeq = createQTuple(Regs); - unsigned LaneNo = - cast<ConstantSDNode>(N->getOperand(NumVecs + 2))->getZExtValue(); + unsigned LaneNo = N->getConstantOperandVal(NumVecs + 2); SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64), N->getOperand(NumVecs + 3), N->getOperand(0)}; @@ -2269,8 +2266,7 @@ void AArch64DAGToDAGISel::SelectPostStoreLane(SDNode *N, unsigned NumVecs, const EVT ResTys[] = {MVT::i64, // Type of the write back register MVT::Other}; - unsigned LaneNo = - cast<ConstantSDNode>(N->getOperand(NumVecs + 1))->getZExtValue(); + unsigned LaneNo = N->getConstantOperandVal(NumVecs + 1); SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64), N->getOperand(NumVecs + 2), // Base Register @@ -2576,8 +2572,8 @@ static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc, case AArch64::UBFMXri: Opc = NOpc; Opd0 = N->getOperand(0); - Immr = cast<ConstantSDNode>(N->getOperand(1).getNode())->getZExtValue(); - Imms = cast<ConstantSDNode>(N->getOperand(2).getNode())->getZExtValue(); + Immr = N->getConstantOperandVal(1); + Imms = N->getConstantOperandVal(2); return true; } // Unreachable @@ -3877,7 +3873,7 @@ bool AArch64DAGToDAGISel::tryWriteRegister(SDNode *N) { assert(isa<ConstantSDNode>(N->getOperand(2)) && "Expected a constant integer expression."); unsigned Reg = PMapper->Encoding; - uint64_t Immed = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue(); + uint64_t Immed = N->getConstantOperandVal(2); CurDAG->SelectNodeTo( N, State, MVT::Other, CurDAG->getTargetConstant(Reg, DL, MVT::i32), CurDAG->getTargetConstant(Immed, DL, MVT::i16), N->getOperand(0)); @@ -4173,8 +4169,7 @@ bool AArch64DAGToDAGISel::trySelectStackSlotTagP(SDNode *N) { SDValue IRG_SP = N->getOperand(2); if (IRG_SP->getOpcode() != ISD::INTRINSIC_W_CHAIN || - cast<ConstantSDNode>(IRG_SP->getOperand(1))->getZExtValue() != - Intrinsic::aarch64_irg_sp) { + IRG_SP->getConstantOperandVal(1) != Intrinsic::aarch64_irg_sp) { return false; } @@ -4183,7 +4178,7 @@ bool AArch64DAGToDAGISel::trySelectStackSlotTagP(SDNode *N) { int FI = cast<FrameIndexSDNode>(N->getOperand(1))->getIndex(); SDValue FiOp = CurDAG->getTargetFrameIndex( FI, TLI->getPointerTy(CurDAG->getDataLayout())); - int TagOffset = cast<ConstantSDNode>(N->getOperand(3))->getZExtValue(); + int TagOffset = N->getConstantOperandVal(3); SDNode *Out = CurDAG->getMachineNode( AArch64::TAGPstack, DL, MVT::i64, @@ -4203,7 +4198,7 @@ void AArch64DAGToDAGISel::SelectTagP(SDNode *N) { // General case for unrelated pointers in Op1 and Op2. SDLoc DL(N); - int TagOffset = cast<ConstantSDNode>(N->getOperand(3))->getZExtValue(); + int TagOffset = N->getConstantOperandVal(3); SDNode *N1 = CurDAG->getMachineNode(AArch64::SUBP, DL, MVT::i64, {N->getOperand(1), N->getOperand(2)}); SDNode *N2 = CurDAG->getMachineNode(AArch64::ADDXrr, DL, MVT::i64, @@ -4219,7 +4214,7 @@ bool AArch64DAGToDAGISel::trySelectCastFixedLengthToScalableVector(SDNode *N) { assert(N->getOpcode() == ISD::INSERT_SUBVECTOR && "Invalid Node!"); // Bail when not a "cast" like insert_subvector. - if (cast<ConstantSDNode>(N->getOperand(2))->getZExtValue() != 0) + if (N->getConstantOperandVal(2) != 0) return false; if (!N->getOperand(0).isUndef()) return false; @@ -4250,7 +4245,7 @@ bool AArch64DAGToDAGISel::trySelectCastScalableToFixedLengthVector(SDNode *N) { assert(N->getOpcode() == ISD::EXTRACT_SUBVECTOR && "Invalid Node!"); // Bail when not a "cast" like extract_subvector. - if (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() != 0) + if (N->getConstantOperandVal(1) != 0) return false; // Bail when normal isel can do the job. @@ -4422,7 +4417,7 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) { return; } case ISD::INTRINSIC_W_CHAIN: { - unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue(); + unsigned IntNo = Node->getConstantOperandVal(1); switch (IntNo) { default: break; @@ -5179,7 +5174,7 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) { } } break; case ISD::INTRINSIC_WO_CHAIN: { - unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue(); + unsigned IntNo = Node->getConstantOperandVal(0); switch (IntNo) { default: break; @@ -5782,7 +5777,7 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) { break; } case ISD::INTRINSIC_VOID: { - unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue(); + unsigned IntNo = Node->getConstantOperandVal(1); if (Node->getNumOperands() >= 3) VT = Node->getOperand(2)->getValueType(0); switch (IntNo) { @@ -6806,7 +6801,7 @@ static EVT getMemVTFromNode(LLVMContext &Ctx, SDNode *Root) { if (Opcode != ISD::INTRINSIC_VOID && Opcode != ISD::INTRINSIC_W_CHAIN) return EVT(); - switch (cast<ConstantSDNode>(Root->getOperand(1))->getZExtValue()) { + switch (Root->getConstantOperandVal(1)) { default: return EVT(); case Intrinsic::aarch64_sme_ldr: diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index dffe69bdb900..102fd0c3dae2 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -2196,7 +2196,7 @@ void AArch64TargetLowering::computeKnownBitsForTargetNode( } case ISD::INTRINSIC_WO_CHAIN: case ISD::INTRINSIC_VOID: { - unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); + unsigned IntNo = Op.getConstantOperandVal(0); switch (IntNo) { default: break; @@ -3922,9 +3922,9 @@ static SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) { // 4: bool isDataCache static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG) { SDLoc DL(Op); - unsigned IsWrite = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue(); - unsigned Locality = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue(); - unsigned IsData = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue(); + unsigned IsWrite = Op.getConstantOperandVal(2); + unsigned Locality = Op.getConstantOperandVal(3); + unsigned IsData = Op.getConstantOperandVal(4); bool IsStream = !Locality; // When the locality number is set @@ -4973,10 +4973,10 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_VOID(SDValue Op, SDValue Chain = Op.getOperand(0); SDValue Addr = Op.getOperand(2); - unsigned IsWrite = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue(); - unsigned Locality = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue(); - unsigned IsStream = cast<ConstantSDNode>(Op.getOperand(5))->getZExtValue(); - unsigned IsData = cast<ConstantSDNode>(Op.getOperand(6))->getZExtValue(); + unsigned IsWrite = Op.getConstantOperandVal(3); + unsigned Locality = Op.getConstantOperandVal(4); + unsigned IsStream = Op.getConstantOperandVal(5); + unsigned IsData = Op.getConstantOperandVal(6); unsigned PrfOp = (IsWrite << 4) | // Load/Store bit (!IsData << 3) | // IsDataCache bit (Locality << 1) | // Cache level bits @@ -5039,7 +5039,7 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const { - unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); + unsigned IntNo = Op.getConstantOperandVal(0); SDLoc dl(Op); switch (IntNo) { default: return SDValue(); // Don't custom lower most intrinsics. @@ -5218,8 +5218,7 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, return DAG.getNode(AArch64ISD::SPLICE, dl, Op.getValueType(), Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); case Intrinsic::aarch64_sve_ptrue: - return getPTrue(DAG, dl, Op.getValueType(), - cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue()); + return getPTrue(DAG, dl, Op.getValueType(), Op.getConstantOperandVal(1)); case Intrinsic::aarch64_sve_clz: return DAG.getNode(AArch64ISD::CTLZ_MERGE_PASSTHRU, dl, Op.getValueType(), Op.getOperand(2), Op.getOperand(3), Op.getOperand(1)); @@ -6478,7 +6477,7 @@ static unsigned getIntrinsicID(const SDNode *N) { default: return Intrinsic::not_intrinsic; case ISD::INTRINSIC_WO_CHAIN: { - unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); + unsigned IID = N->getConstantOperandVal(0); if (IID < Intrinsic::num_intrinsics) return IID; return Intrinsic::not_intrinsic; @@ -10009,7 +10008,7 @@ SDValue AArch64TargetLowering::LowerFRAMEADDR(SDValue Op, EVT VT = Op.getValueType(); SDLoc DL(Op); - unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); + unsigned Depth = Op.getConstantOperandVal(0); SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, AArch64::FP, MVT::i64); while (Depth--) @@ -10076,7 +10075,7 @@ SDValue AArch64TargetLowering::LowerRETURNADDR(SDValue Op, EVT VT = Op.getValueType(); SDLoc DL(Op); - unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); + unsigned Depth = Op.getConstantOperandVal(0); SDValue ReturnAddress; if (Depth) { SDValue FrameAddr = LowerFRAMEADDR(Op, DAG); @@ -10942,7 +10941,7 @@ SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op, Source = Sources.insert(Sources.end(), ShuffleSourceInfo(SourceVec)); // Update the minimum and maximum lane number seen. - unsigned EltNo = cast<ConstantSDNode>(V.getOperand(1))->getZExtValue(); + unsigned EltNo = V.getConstantOperandVal(1); Source->MinElt = std::min(Source->MinElt, EltNo); Source->MaxElt = std::max(Source->MaxElt, EltNo); } @@ -13329,7 +13328,7 @@ SDValue AArch64TargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op, "Only cases that extract a fixed length vector are supported!"); EVT InVT = Op.getOperand(0).getValueType(); - unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); + unsigned Idx = Op.getConstantOperandVal(1); unsigned Size = Op.getValueSizeInBits(); // If we don't have legal types yet, do nothing @@ -13375,7 +13374,7 @@ SDValue AArch64TargetLowering::LowerINSERT_SUBVECTOR(SDValue Op, "Only expect to lower inserts into scalable vectors!"); EVT InVT = Op.getOperand(1).getValueType(); - unsigned Idx = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue(); + unsigned Idx = Op.getConstantOperandVal(2); SDValue Vec0 = Op.getOperand(0); SDValue Vec1 = Op.getOperand(1); @@ -13715,11 +13714,10 @@ static SDValue EmitVectorComparison(SDValue LHS, SDValue RHS, bool IsCnst = BVN && BVN->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs); - bool IsSplatUniform = - SrcVT.getVectorElementType().getSizeInBits() >= SplatBitSize; - bool IsZero = IsCnst && SplatValue == 0 && IsSplatUniform; - bool IsOne = IsCnst && SplatValue == 1 && IsSplatUniform; - bool IsMinusOne = IsCnst && SplatValue.isAllOnes() && IsSplatUniform; + bool IsZero = IsCnst && SplatValue == 0; + bool IsOne = + IsCnst && SrcVT.getScalarSizeInBits() == SplatBitSize && SplatValue == 1; + bool IsMinusOne = IsCnst && SplatValue.isAllOnes(); if (SrcVT.getVectorElementType().isFloatingPoint()) { switch (CC) { @@ -14247,7 +14245,7 @@ SDValue AArch64TargetLowering::LowerVSCALE(SDValue Op, assert(VT != MVT::i64 && "Expected illegal VSCALE node"); SDLoc DL(Op); - APInt MulImm = cast<ConstantSDNode>(Op.getOperand(0))->getAPIntValue(); + APInt MulImm = Op.getConstantOperandAPInt(0); return DAG.getZExtOrTrunc(DAG.getVScale(DL, MVT::i64, MulImm.sext(64)), DL, VT); } @@ -18343,7 +18341,7 @@ static bool isEssentiallyExtractHighSubvector(SDValue N) { return false; if (N.getOperand(0).getValueType().isScalableVector()) return false; - return cast<ConstantSDNode>(N.getOperand(1))->getAPIntValue() == + return N.getConstantOperandAPInt(1) == N.getOperand(0).getValueType().getVectorNumElements() / 2; } @@ -18399,8 +18397,8 @@ static bool isSetCC(SDValue Op, SetCCInfoAndKind &SetCCInfo) { // TODO: we want the operands of the Cmp not the csel SetCCInfo.Info.AArch64.Cmp = &Op.getOperand(3); SetCCInfo.IsAArch64 = true; - SetCCInfo.Info.AArch64.CC = static_cast<AArch64CC::CondCode>( - cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue()); + SetCCInfo.Info.AArch64.CC = + static_cast<AArch64CC::CondCode>(Op.getConstantOperandVal(2)); // Check that the operands matches the constraints: // (1) Both operands must be constants. @@ -21585,7 +21583,7 @@ static SDValue performNEONPostLDSTCombine(SDNode *N, bool IsDupOp = false; unsigned NewOpc = 0; unsigned NumVecs = 0; - unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); + unsigned IntNo = N->getConstantOperandVal(1); switch (IntNo) { default: llvm_unreachable("unexpected intrinsic for Neon base update"); case Intrinsic::aarch64_neon_ld2: NewOpc = AArch64ISD::LD2post; @@ -22501,7 +22499,7 @@ static SDValue getTestBitOperand(SDValue Op, unsigned &Bit, bool &Invert, static SDValue performTBZCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG) { - unsigned Bit = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue(); + unsigned Bit = N->getConstantOperandVal(2); bool Invert = false; SDValue TestSrc = N->getOperand(1); SDValue NewTestSrc = getTestBitOperand(TestSrc, Bit, Invert, DAG); @@ -23789,7 +23787,7 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N, return performMULLCombine(N, DCI, DAG); case ISD::INTRINSIC_VOID: case ISD::INTRINSIC_W_CHAIN: - switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) { + switch (N->getConstantOperandVal(1)) { case Intrinsic::aarch64_sve_prfb_gather_scalar_offset: return combineSVEPrefetchVecBaseImmOff(N, DAG, 1 /*=ScalarSizeInBytes*/); case Intrinsic::aarch64_sve_prfh_gather_scalar_offset: @@ -23940,8 +23938,7 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N, return performScatterStoreCombine(N, DAG, AArch64ISD::SST1_IMM_PRED); case Intrinsic::aarch64_rndr: case Intrinsic::aarch64_rndrrs: { - unsigned IntrinsicID = - cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); + unsigned IntrinsicID = N->getConstantOperandVal(1); auto Register = (IntrinsicID == Intrinsic::aarch64_rndr ? AArch64SysReg::RNDR : AArch64SysReg::RNDRRS); diff --git a/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp b/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp index e3220d103ae0..a21b4b77166e 100644 --- a/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp +++ b/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp @@ -896,7 +896,7 @@ static DecodeStatus DecodePCRelLabel16(MCInst &Inst, unsigned Imm, // Immediate is encoded as the top 16-bits of an unsigned 18-bit negative // PC-relative offset. uint64_t ImmVal = Imm; - if (ImmVal < 0 || ImmVal > (1 << 16)) + if (ImmVal > (1 << 16)) return Fail; ImmVal = -ImmVal; if (!Decoder->tryAddingSymbolicOperand(Inst, (ImmVal << 2), Addr, diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp index 8b909f53c844..1d0e8be80d07 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp @@ -623,6 +623,10 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) .legalFor({s32, s64}) .legalFor(PackedVectorAllTypeList) .maxScalar(0, s64) + .clampNumElements(0, v8s8, v16s8) + .clampNumElements(0, v4s16, v8s16) + .clampNumElements(0, v2s32, v4s32) + .clampMaxNumElements(0, s64, 2) .lower(); // FP conversions @@ -1406,7 +1410,9 @@ bool AArch64LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper, case Intrinsic::aarch64_neon_umax: case Intrinsic::aarch64_neon_umin: case Intrinsic::aarch64_neon_fmax: - case Intrinsic::aarch64_neon_fmin: { + case Intrinsic::aarch64_neon_fmin: + case Intrinsic::aarch64_neon_fmaxnm: + case Intrinsic::aarch64_neon_fminnm: { MachineIRBuilder MIB(MI); if (IntrinsicID == Intrinsic::aarch64_neon_smax) MIB.buildSMax(MI.getOperand(0), MI.getOperand(2), MI.getOperand(3)); @@ -1422,6 +1428,12 @@ bool AArch64LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper, else if (IntrinsicID == Intrinsic::aarch64_neon_fmin) MIB.buildInstr(TargetOpcode::G_FMINIMUM, {MI.getOperand(0)}, {MI.getOperand(2), MI.getOperand(3)}); + else if (IntrinsicID == Intrinsic::aarch64_neon_fmaxnm) + MIB.buildInstr(TargetOpcode::G_FMAXNUM, {MI.getOperand(0)}, + {MI.getOperand(2), MI.getOperand(3)}); + else if (IntrinsicID == Intrinsic::aarch64_neon_fminnm) + MIB.buildInstr(TargetOpcode::G_FMINNUM, {MI.getOperand(0)}, + {MI.getOperand(2), MI.getOperand(3)}); MI.eraseFromParent(); return true; } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp index b0eac567ec9f..bffea82ab8f4 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -377,7 +377,7 @@ const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N, return Subtarget->getRegisterInfo()->getRegClass(RegClass); } case AMDGPU::REG_SEQUENCE: { - unsigned RCID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); + unsigned RCID = N->getConstantOperandVal(0); const TargetRegisterClass *SuperRC = Subtarget->getRegisterInfo()->getRegClass(RCID); @@ -724,7 +724,7 @@ bool AMDGPUDAGToDAGISel::isUnneededShiftMask(const SDNode *N, unsigned ShAmtBits) const { assert(N->getOpcode() == ISD::AND); - const APInt &RHS = cast<ConstantSDNode>(N->getOperand(1))->getAPIntValue(); + const APInt &RHS = N->getConstantOperandAPInt(1); if (RHS.countr_one() >= ShAmtBits) return true; @@ -2672,7 +2672,7 @@ void AMDGPUDAGToDAGISel::SelectInterpP1F16(SDNode *N) { } void AMDGPUDAGToDAGISel::SelectINTRINSIC_W_CHAIN(SDNode *N) { - unsigned IntrID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); + unsigned IntrID = N->getConstantOperandVal(1); switch (IntrID) { case Intrinsic::amdgcn_ds_append: case Intrinsic::amdgcn_ds_consume: { @@ -2690,7 +2690,7 @@ void AMDGPUDAGToDAGISel::SelectINTRINSIC_W_CHAIN(SDNode *N) { } void AMDGPUDAGToDAGISel::SelectINTRINSIC_WO_CHAIN(SDNode *N) { - unsigned IntrID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); + unsigned IntrID = N->getConstantOperandVal(0); unsigned Opcode; switch (IntrID) { case Intrinsic::amdgcn_wqm: @@ -2731,7 +2731,7 @@ void AMDGPUDAGToDAGISel::SelectINTRINSIC_WO_CHAIN(SDNode *N) { } void AMDGPUDAGToDAGISel::SelectINTRINSIC_VOID(SDNode *N) { - unsigned IntrID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); + unsigned IntrID = N->getConstantOperandVal(1); switch (IntrID) { case Intrinsic::amdgcn_ds_gws_init: case Intrinsic::amdgcn_ds_gws_barrier: diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index 541a5b62450d..8fbc90a6db9f 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -682,7 +682,7 @@ static bool hasSourceMods(const SDNode *N) { case ISD::BITCAST: return false; case ISD::INTRINSIC_WO_CHAIN: { - switch (cast<ConstantSDNode>(N->getOperand(0))->getZExtValue()) { + switch (N->getConstantOperandVal(0)) { case Intrinsic::amdgcn_interp_p1: case Intrinsic::amdgcn_interp_p2: case Intrinsic::amdgcn_interp_mov: @@ -837,7 +837,7 @@ bool AMDGPUTargetLowering::isSDNodeAlwaysUniform(const SDNode *N) const { case ISD::TokenFactor: return true; case ISD::INTRINSIC_WO_CHAIN: { - unsigned IntrID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); + unsigned IntrID = N->getConstantOperandVal(0); switch (IntrID) { case Intrinsic::amdgcn_readfirstlane: case Intrinsic::amdgcn_readlane: @@ -1489,7 +1489,7 @@ SDValue AMDGPUTargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const { SDLoc SL(Op); SmallVector<SDValue, 8> Args; - unsigned Start = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); + unsigned Start = Op.getConstantOperandVal(1); EVT VT = Op.getValueType(); EVT SrcVT = Op.getOperand(0).getValueType(); @@ -2502,8 +2502,7 @@ static bool valueIsKnownNeverF32Denorm(SDValue Src) { case ISD::FFREXP: return true; case ISD::INTRINSIC_WO_CHAIN: { - unsigned IntrinsicID = - cast<ConstantSDNode>(Src.getOperand(0))->getZExtValue(); + unsigned IntrinsicID = Src.getConstantOperandVal(0); switch (IntrinsicID) { case Intrinsic::amdgcn_frexp_mant: return true; @@ -3601,7 +3600,7 @@ static SDValue simplifyMul24(SDNode *Node24, SDValue RHS = IsIntrin ? Node24->getOperand(2) : Node24->getOperand(1); unsigned NewOpcode = Node24->getOpcode(); if (IsIntrin) { - unsigned IID = cast<ConstantSDNode>(Node24->getOperand(0))->getZExtValue(); + unsigned IID = Node24->getConstantOperandVal(0); switch (IID) { case Intrinsic::amdgcn_mul_i24: NewOpcode = AMDGPUISD::MUL_I24; @@ -3821,7 +3820,7 @@ SDValue AMDGPUTargetLowering::performAssertSZExtCombine(SDNode *N, SDValue AMDGPUTargetLowering::performIntrinsicWOChainCombine( SDNode *N, DAGCombinerInfo &DCI) const { - unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); + unsigned IID = N->getConstantOperandVal(0); switch (IID) { case Intrinsic::amdgcn_mul_i24: case Intrinsic::amdgcn_mul_u24: @@ -5652,7 +5651,7 @@ void AMDGPUTargetLowering::computeKnownBitsForTargetNode( break; } case ISD::INTRINSIC_WO_CHAIN: { - unsigned IID = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); + unsigned IID = Op.getConstantOperandVal(0); switch (IID) { case Intrinsic::amdgcn_workitem_id_x: case Intrinsic::amdgcn_workitem_id_y: @@ -5834,8 +5833,7 @@ bool AMDGPUTargetLowering::isKnownNeverNaNForTargetNode(SDValue Op, return SNaN; } case ISD::INTRINSIC_WO_CHAIN: { - unsigned IntrinsicID - = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); + unsigned IntrinsicID = Op.getConstantOperandVal(0); // TODO: Handle more intrinsics switch (IntrinsicID) { case Intrinsic::amdgcn_cubeid: diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td index eaf72d7157ee..36e07d944c94 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td @@ -642,6 +642,7 @@ defm int_amdgcn_global_atomic_fmax : noret_op; defm int_amdgcn_global_atomic_csub : noret_op; defm int_amdgcn_flat_atomic_fadd : local_addr_space_atomic_op; defm int_amdgcn_ds_fadd_v2bf16 : noret_op; +defm int_amdgcn_global_atomic_ordered_add_b64 : noret_op; defm int_amdgcn_flat_atomic_fmin_num : noret_op; defm int_amdgcn_flat_atomic_fmax_num : noret_op; defm int_amdgcn_global_atomic_fmin_num : noret_op; diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp index c9412f720c62..fba060464a6e 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -4690,6 +4690,7 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { case Intrinsic::amdgcn_flat_atomic_fmax_num: case Intrinsic::amdgcn_global_atomic_fadd_v2bf16: case Intrinsic::amdgcn_flat_atomic_fadd_v2bf16: + case Intrinsic::amdgcn_global_atomic_ordered_add_b64: return getDefaultMappingAllVGPR(MI); case Intrinsic::amdgcn_ds_ordered_add: case Intrinsic::amdgcn_ds_ordered_swap: diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBanks.td b/llvm/lib/Target/AMDGPU/AMDGPURegisterBanks.td index e83e644d13f3..2d8dc9d47225 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBanks.td +++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBanks.td @@ -11,7 +11,7 @@ def SGPRRegBank : RegisterBank<"SGPR", >; def VGPRRegBank : RegisterBank<"VGPR", - [VGPR_LO16, VGPR_HI16, VGPR_32, VReg_64, VReg_96, VReg_128, VReg_160, VReg_192, VReg_224, VReg_256, VReg_288, VReg_320, VReg_352, VReg_384, VReg_512, VReg_1024] + [VGPR_32, VReg_64, VReg_96, VReg_128, VReg_160, VReg_192, VReg_224, VReg_256, VReg_288, VReg_320, VReg_352, VReg_384, VReg_512, VReg_1024] >; // It is helpful to distinguish conditions from ordinary SGPRs. diff --git a/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp b/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp index db5d2bbcf5bb..fc47b02c98e0 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp @@ -346,8 +346,7 @@ AMDGPUResourceUsageAnalysis::analyzeResourceUsage( IsSGPR = true; Width = 1; } else if (AMDGPU::VGPR_32RegClass.contains(Reg) || - AMDGPU::VGPR_LO16RegClass.contains(Reg) || - AMDGPU::VGPR_HI16RegClass.contains(Reg)) { + AMDGPU::VGPR_16RegClass.contains(Reg)) { IsSGPR = false; Width = 1; } else if (AMDGPU::AGPR_32RegClass.contains(Reg) || diff --git a/llvm/lib/Target/AMDGPU/AMDGPURewriteUndefForPHI.cpp b/llvm/lib/Target/AMDGPU/AMDGPURewriteUndefForPHI.cpp index 459400e3359c..79e9312034da 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURewriteUndefForPHI.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURewriteUndefForPHI.cpp @@ -85,7 +85,6 @@ public: AU.addRequired<DominatorTreeWrapperPass>(); AU.addPreserved<DominatorTreeWrapperPass>(); - AU.addPreserved<UniformityInfoWrapperPass>(); AU.setPreservesCFG(); } }; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td b/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td index beb670669581..4cc8871a00fe 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td @@ -243,6 +243,7 @@ def : SourceOfDivergence<int_amdgcn_global_atomic_fmin>; def : SourceOfDivergence<int_amdgcn_global_atomic_fmax>; def : SourceOfDivergence<int_amdgcn_global_atomic_fmin_num>; def : SourceOfDivergence<int_amdgcn_global_atomic_fmax_num>; +def : SourceOfDivergence<int_amdgcn_global_atomic_ordered_add_b64>; def : SourceOfDivergence<int_amdgcn_flat_atomic_fadd>; def : SourceOfDivergence<int_amdgcn_flat_atomic_fmin>; def : SourceOfDivergence<int_amdgcn_flat_atomic_fmax>; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp b/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp index 9bc3ba161c9e..1bfb7c0edd80 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp @@ -109,9 +109,6 @@ void AMDGPUUnifyDivergentExitNodes::getAnalysisUsage(AnalysisUsage &AU) const { // FIXME: preserve PostDominatorTreeWrapperPass } - // No divergent values are changed, only blocks and branch edges. - AU.addPreserved<UniformityInfoWrapperPass>(); - // We preserve the non-critical-edgeness property AU.addPreservedID(BreakCriticalEdgesID); diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp index 3b69a37728ea..abd7e911beef 100644 --- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -5416,11 +5416,12 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS, Val, ValRange); } else if (ID == ".amdhsa_shared_vgpr_count") { - if (IVersion.Major < 10) - return Error(IDRange.Start, "directive requires gfx10+", IDRange); + if (IVersion.Major < 10 || IVersion.Major >= 12) + return Error(IDRange.Start, "directive requires gfx10 or gfx11", + IDRange); SharedVGPRCount = Val; PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, - COMPUTE_PGM_RSRC3_GFX10_PLUS_SHARED_VGPR_COUNT, Val, + COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT, Val, ValRange); } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") { PARSE_BITS_ENTRY( @@ -5522,7 +5523,7 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { (AccumOffset / 4 - 1)); } - if (IVersion.Major >= 10) { + if (IVersion.Major >= 10 && IVersion.Major < 12) { // SharedVGPRCount < 16 checked by PARSE_ENTRY_BITS if (SharedVGPRCount && EnableWavefrontSize32 && *EnableWavefrontSize32) { return TokError("shared_vgpr_count directive not valid on " diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp index 7939d0036568..67be7b0fd642 100644 --- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp +++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp @@ -1284,9 +1284,8 @@ MCOperand AMDGPUDisassembler::createSRegOperand(unsigned SRegClassID, MCOperand AMDGPUDisassembler::createVGPR16Operand(unsigned RegIdx, bool IsHi) const { - unsigned RCID = - IsHi ? AMDGPU::VGPR_HI16RegClassID : AMDGPU::VGPR_LO16RegClassID; - return createRegOperand(RCID, RegIdx); + unsigned RegIdxInVGPR16 = RegIdx * 2 + (IsHi ? 1 : 0); + return createRegOperand(AMDGPU::VGPR_16RegClassID, RegIdxInVGPR16); } // Decode Literals for insts which always have a literal in the encoding @@ -2000,34 +1999,60 @@ MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC3( if (FourByteBuffer & COMPUTE_PGM_RSRC3_GFX90A_RESERVED1) return MCDisassembler::Fail; } else if (isGFX10Plus()) { - if (!EnableWavefrontSize32 || !*EnableWavefrontSize32) { - PRINT_DIRECTIVE(".amdhsa_shared_vgpr_count", - COMPUTE_PGM_RSRC3_GFX10_PLUS_SHARED_VGPR_COUNT); + // Bits [0-3]. + if (!isGFX12Plus()) { + if (!EnableWavefrontSize32 || !*EnableWavefrontSize32) { + PRINT_DIRECTIVE(".amdhsa_shared_vgpr_count", + COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT); + } else { + PRINT_PSEUDO_DIRECTIVE_COMMENT( + "SHARED_VGPR_COUNT", + COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT); + } } else { - PRINT_PSEUDO_DIRECTIVE_COMMENT( - "SHARED_VGPR_COUNT", COMPUTE_PGM_RSRC3_GFX10_PLUS_SHARED_VGPR_COUNT); + if (FourByteBuffer & COMPUTE_PGM_RSRC3_GFX12_PLUS_RESERVED0) + return MCDisassembler::Fail; } - if (isGFX11Plus()) { + // Bits [4-11]. + if (isGFX11()) { PRINT_PSEUDO_DIRECTIVE_COMMENT("INST_PREF_SIZE", - COMPUTE_PGM_RSRC3_GFX11_PLUS_INST_PREF_SIZE); + COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE); PRINT_PSEUDO_DIRECTIVE_COMMENT("TRAP_ON_START", - COMPUTE_PGM_RSRC3_GFX11_PLUS_TRAP_ON_START); + COMPUTE_PGM_RSRC3_GFX11_TRAP_ON_START); PRINT_PSEUDO_DIRECTIVE_COMMENT("TRAP_ON_END", - COMPUTE_PGM_RSRC3_GFX11_PLUS_TRAP_ON_END); + COMPUTE_PGM_RSRC3_GFX11_TRAP_ON_END); + } else if (isGFX12Plus()) { + PRINT_PSEUDO_DIRECTIVE_COMMENT( + "INST_PREF_SIZE", COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE); + } else { + if (FourByteBuffer & COMPUTE_PGM_RSRC3_GFX10_RESERVED1) + return MCDisassembler::Fail; + } + + // Bits [12]. + if (FourByteBuffer & COMPUTE_PGM_RSRC3_GFX10_PLUS_RESERVED2) + return MCDisassembler::Fail; + + // Bits [13]. + if (isGFX12Plus()) { + PRINT_PSEUDO_DIRECTIVE_COMMENT("GLG_EN", + COMPUTE_PGM_RSRC3_GFX12_PLUS_GLG_EN); } else { - if (FourByteBuffer & COMPUTE_PGM_RSRC3_GFX10_RESERVED0) + if (FourByteBuffer & COMPUTE_PGM_RSRC3_GFX10_GFX11_RESERVED3) return MCDisassembler::Fail; } - if (FourByteBuffer & COMPUTE_PGM_RSRC3_GFX10_PLUS_RESERVED1) + // Bits [14-30]. + if (FourByteBuffer & COMPUTE_PGM_RSRC3_GFX10_PLUS_RESERVED4) return MCDisassembler::Fail; + // Bits [31]. if (isGFX11Plus()) { PRINT_PSEUDO_DIRECTIVE_COMMENT("IMAGE_OP", - COMPUTE_PGM_RSRC3_GFX11_PLUS_TRAP_ON_START); + COMPUTE_PGM_RSRC3_GFX11_PLUS_IMAGE_OP); } else { - if (FourByteBuffer & COMPUTE_PGM_RSRC3_GFX10_RESERVED2) + if (FourByteBuffer & COMPUTE_PGM_RSRC3_GFX10_RESERVED5) return MCDisassembler::Fail; } } else if (FourByteBuffer) { diff --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td index 0dd2b3f5c2c9..615f8cd54d8f 100644 --- a/llvm/lib/Target/AMDGPU/FLATInstructions.td +++ b/llvm/lib/Target/AMDGPU/FLATInstructions.td @@ -926,9 +926,11 @@ defm GLOBAL_LOAD_LDS_USHORT : FLAT_Global_Load_LDS_Pseudo <"global_load_lds_usho defm GLOBAL_LOAD_LDS_SSHORT : FLAT_Global_Load_LDS_Pseudo <"global_load_lds_sshort">; defm GLOBAL_LOAD_LDS_DWORD : FLAT_Global_Load_LDS_Pseudo <"global_load_lds_dword">; -} // End is_flat_global = 1 - +let SubtargetPredicate = isGFX12Plus in { + defm GLOBAL_ATOMIC_ORDERED_ADD_B64 : FLAT_Global_Atomic_Pseudo <"global_atomic_ordered_add_b64", VReg_64, i64>; +} // End SubtargetPredicate = isGFX12Plus +} // End is_flat_global = 1 let SubtargetPredicate = HasFlatScratchInsts in { defm SCRATCH_LOAD_UBYTE : FLAT_Scratch_Load_Pseudo <"scratch_load_ubyte", VGPR_32>; @@ -1529,6 +1531,10 @@ defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SWAP_X2", "atomic_swap_global", i64> defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_CMPSWAP_X2", "AMDGPUatomic_cmp_swap_global", i64, v2i64>; defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_XOR_X2", "atomic_load_xor_global", i64>; +let OtherPredicates = [isGFX12Plus] in { + defm : GlobalFLATAtomicPatsRtn <"GLOBAL_ATOMIC_ORDERED_ADD_B64", "int_amdgcn_global_atomic_ordered_add_b64", i64, i64, /* isIntr */ 1>; +} + let OtherPredicates = [isGFX10Plus] in { defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_FMIN", "atomic_load_fmin_global", f32>; defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_FMAX", "atomic_load_fmax_global", f32>; @@ -2654,6 +2660,7 @@ defm GLOBAL_ATOMIC_DEC_U64 : VGLOBAL_Real_Atomics_gfx12<0x04d, "GLOBAL_A defm GLOBAL_ATOMIC_MIN_NUM_F32 : VGLOBAL_Real_Atomics_gfx12<0x051, "GLOBAL_ATOMIC_FMIN", "global_atomic_min_num_f32", true, "global_atomic_min_f32">; defm GLOBAL_ATOMIC_MAX_NUM_F32 : VGLOBAL_Real_Atomics_gfx12<0x052, "GLOBAL_ATOMIC_FMAX", "global_atomic_max_num_f32", true, "global_atomic_max_f32">; defm GLOBAL_ATOMIC_ADD_F32 : VGLOBAL_Real_Atomics_gfx12<0x056, "GLOBAL_ATOMIC_ADD_F32", "global_atomic_add_f32">; +defm GLOBAL_ATOMIC_ORDERED_ADD_B64 : VGLOBAL_Real_Atomics_gfx12<0x073, "GLOBAL_ATOMIC_ORDERED_ADD_B64", "global_atomic_ordered_add_b64">; // ENC_VSCRATCH. defm SCRATCH_LOAD_U8 : VSCRATCH_Real_AllAddr_gfx12<0x10, "SCRATCH_LOAD_UBYTE", "scratch_load_u8", true>; diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp index a855cf585205..e135a4e25dd1 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp @@ -475,8 +475,10 @@ void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor( PRINT_FIELD(OS, ".amdhsa_forward_progress", KD, compute_pgm_rsrc1, amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS); + } + if (IVersion.Major >= 10 && IVersion.Major < 12) { PRINT_FIELD(OS, ".amdhsa_shared_vgpr_count", KD, compute_pgm_rsrc3, - amdhsa::COMPUTE_PGM_RSRC3_GFX10_PLUS_SHARED_VGPR_COUNT); + amdhsa::COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT); } if (IVersion.Major >= 12) PRINT_FIELD(OS, ".amdhsa_round_robin_scheduling", KD, compute_pgm_rsrc1, diff --git a/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp b/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp index c1ba9c514874..9a2fb0bc37b2 100644 --- a/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp @@ -424,8 +424,7 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const return lowerADDRSPACECAST(Op, DAG); case ISD::INTRINSIC_VOID: { SDValue Chain = Op.getOperand(0); - unsigned IntrinsicID = - cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); + unsigned IntrinsicID = Op.getConstantOperandVal(1); switch (IntrinsicID) { case Intrinsic::r600_store_swizzle: { SDLoc DL(Op); @@ -449,8 +448,7 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const break; } case ISD::INTRINSIC_WO_CHAIN: { - unsigned IntrinsicID = - cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); + unsigned IntrinsicID = Op.getConstantOperandVal(0); EVT VT = Op.getValueType(); SDLoc DL(Op); switch (IntrinsicID) { diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index fc119aa61d01..0e857e6ac71b 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -1240,6 +1240,7 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, case Intrinsic::amdgcn_global_atomic_fmax: case Intrinsic::amdgcn_global_atomic_fmin_num: case Intrinsic::amdgcn_global_atomic_fmax_num: + case Intrinsic::amdgcn_global_atomic_ordered_add_b64: case Intrinsic::amdgcn_flat_atomic_fadd: case Intrinsic::amdgcn_flat_atomic_fmin: case Intrinsic::amdgcn_flat_atomic_fmax: @@ -5304,7 +5305,7 @@ SDValue SITargetLowering::splitTernaryVectorOp(SDValue Op, assert(VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v4f32 || VT == MVT::v16i16 || VT == MVT::v16f16 || VT == MVT::v8f32 || VT == MVT::v16f32 || - VT == MVT::v32f32); + VT == MVT::v32f32 || VT == MVT::v32f16 || VT == MVT::v32i16); SDValue Lo0, Hi0; SDValue Op0 = Op.getOperand(0); @@ -5388,7 +5389,7 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { return SDValue(); // Get the rounding mode from the last operand - int RoundMode = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); + int RoundMode = Op.getConstantOperandVal(1); if (RoundMode == (int)RoundingMode::TowardPositive) Opc = AMDGPUISD::FPTRUNC_ROUND_UPWARD; else if (RoundMode == (int)RoundingMode::TowardNegative) @@ -5698,7 +5699,7 @@ void SITargetLowering::ReplaceNodeResults(SDNode *N, return; } case ISD::INTRINSIC_WO_CHAIN: { - unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); + unsigned IID = N->getConstantOperandVal(0); switch (IID) { case Intrinsic::amdgcn_make_buffer_rsrc: Results.push_back(lowerPointerAsRsrcIntrin(N, DAG)); @@ -5836,7 +5837,7 @@ static SDNode *findUser(SDValue Value, unsigned Opcode) { unsigned SITargetLowering::isCFIntrinsic(const SDNode *Intr) const { if (Intr->getOpcode() == ISD::INTRINSIC_W_CHAIN) { - switch (cast<ConstantSDNode>(Intr->getOperand(1))->getZExtValue()) { + switch (Intr->getConstantOperandVal(1)) { case Intrinsic::amdgcn_if: return AMDGPUISD::IF; case Intrinsic::amdgcn_else: @@ -5985,7 +5986,7 @@ SDValue SITargetLowering::LowerRETURNADDR(SDValue Op, MVT VT = Op.getSimpleValueType(); SDLoc DL(Op); // Checking the depth - if (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue() != 0) + if (Op.getConstantOperandVal(0) != 0) return DAG.getConstant(0, DL, VT); MachineFunction &MF = DAG.getMachineFunction(); @@ -7634,7 +7635,7 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, EVT VT = Op.getValueType(); SDLoc DL(Op); - unsigned IntrinsicID = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); + unsigned IntrinsicID = Op.getConstantOperandVal(0); // TODO: Should this propagate fast-math-flags? @@ -7788,7 +7789,7 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, return DAG.getConstant(MF.getSubtarget<GCNSubtarget>().getWavefrontSize(), SDLoc(Op), MVT::i32); case Intrinsic::amdgcn_s_buffer_load: { - unsigned CPol = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue(); + unsigned CPol = Op.getConstantOperandVal(3); if (CPol & ~((Subtarget->getGeneration() >= AMDGPUSubtarget::GFX12) ? AMDGPU::CPol::ALL : AMDGPU::CPol::ALL_pregfx12)) @@ -8038,7 +8039,7 @@ SITargetLowering::lowerStructBufferAtomicIntrin(SDValue Op, SelectionDAG &DAG, SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const { - unsigned IntrID = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); + unsigned IntrID = Op.getConstantOperandVal(1); SDLoc DL(Op); switch (IntrID) { @@ -8134,8 +8135,8 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, } case Intrinsic::amdgcn_buffer_load: case Intrinsic::amdgcn_buffer_load_format: { - unsigned Glc = cast<ConstantSDNode>(Op.getOperand(5))->getZExtValue(); - unsigned Slc = cast<ConstantSDNode>(Op.getOperand(6))->getZExtValue(); + unsigned Glc = Op.getConstantOperandVal(5); + unsigned Slc = Op.getConstantOperandVal(6); unsigned IdxEn = getIdxEn(Op.getOperand(3)); SDValue Ops[] = { Op.getOperand(0), // Chain @@ -8223,10 +8224,10 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, EVT LoadVT = Op.getValueType(); auto SOffset = selectSOffset(Op.getOperand(5), DAG, Subtarget); - unsigned Dfmt = cast<ConstantSDNode>(Op.getOperand(7))->getZExtValue(); - unsigned Nfmt = cast<ConstantSDNode>(Op.getOperand(8))->getZExtValue(); - unsigned Glc = cast<ConstantSDNode>(Op.getOperand(9))->getZExtValue(); - unsigned Slc = cast<ConstantSDNode>(Op.getOperand(10))->getZExtValue(); + unsigned Dfmt = Op.getConstantOperandVal(7); + unsigned Nfmt = Op.getConstantOperandVal(8); + unsigned Glc = Op.getConstantOperandVal(9); + unsigned Slc = Op.getConstantOperandVal(10); unsigned IdxEn = getIdxEn(Op.getOperand(3)); SDValue Ops[] = { Op.getOperand(0), // Chain @@ -8313,7 +8314,7 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, case Intrinsic::amdgcn_buffer_atomic_or: case Intrinsic::amdgcn_buffer_atomic_xor: case Intrinsic::amdgcn_buffer_atomic_fadd: { - unsigned Slc = cast<ConstantSDNode>(Op.getOperand(6))->getZExtValue(); + unsigned Slc = Op.getConstantOperandVal(6); unsigned IdxEn = getIdxEn(Op.getOperand(4)); SDValue Ops[] = { Op.getOperand(0), // Chain @@ -8474,7 +8475,7 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, return lowerStructBufferAtomicIntrin(Op, DAG, AMDGPUISD::BUFFER_ATOMIC_DEC); case Intrinsic::amdgcn_buffer_atomic_cmpswap: { - unsigned Slc = cast<ConstantSDNode>(Op.getOperand(7))->getZExtValue(); + unsigned Slc = Op.getConstantOperandVal(7); unsigned IdxEn = getIdxEn(Op.getOperand(5)); SDValue Ops[] = { Op.getOperand(0), // Chain @@ -8878,7 +8879,7 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); SDValue Chain = Op.getOperand(0); - unsigned IntrinsicID = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); + unsigned IntrinsicID = Op.getConstantOperandVal(1); MachineFunction &MF = DAG.getMachineFunction(); switch (IntrinsicID) { @@ -8943,10 +8944,10 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op, bool IsD16 = (VData.getValueType().getScalarType() == MVT::f16); if (IsD16) VData = handleD16VData(VData, DAG); - unsigned Dfmt = cast<ConstantSDNode>(Op.getOperand(8))->getZExtValue(); - unsigned Nfmt = cast<ConstantSDNode>(Op.getOperand(9))->getZExtValue(); - unsigned Glc = cast<ConstantSDNode>(Op.getOperand(10))->getZExtValue(); - unsigned Slc = cast<ConstantSDNode>(Op.getOperand(11))->getZExtValue(); + unsigned Dfmt = Op.getConstantOperandVal(8); + unsigned Nfmt = Op.getConstantOperandVal(9); + unsigned Glc = Op.getConstantOperandVal(10); + unsigned Slc = Op.getConstantOperandVal(11); unsigned IdxEn = getIdxEn(Op.getOperand(4)); SDValue Ops[] = { Chain, @@ -9029,8 +9030,8 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op, bool IsD16 = (VData.getValueType().getScalarType() == MVT::f16); if (IsD16) VData = handleD16VData(VData, DAG); - unsigned Glc = cast<ConstantSDNode>(Op.getOperand(6))->getZExtValue(); - unsigned Slc = cast<ConstantSDNode>(Op.getOperand(7))->getZExtValue(); + unsigned Glc = Op.getConstantOperandVal(6); + unsigned Slc = Op.getConstantOperandVal(7); unsigned IdxEn = getIdxEn(Op.getOperand(4)); SDValue Ops[] = { Chain, @@ -12069,8 +12070,7 @@ bool SITargetLowering::isCanonicalized(SelectionDAG &DAG, SDValue Op, return false; } case ISD::INTRINSIC_WO_CHAIN: { - unsigned IntrinsicID - = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); + unsigned IntrinsicID = Op.getConstantOperandVal(0); // TODO: Handle more intrinsics switch (IntrinsicID) { case Intrinsic::amdgcn_cvt_pkrtz: @@ -15008,7 +15008,7 @@ void SITargetLowering::computeKnownBitsForTargetNode(const SDValue Op, unsigned Opc = Op.getOpcode(); switch (Opc) { case ISD::INTRINSIC_WO_CHAIN: { - unsigned IID = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); + unsigned IID = Op.getConstantOperandVal(0); switch (IID) { case Intrinsic::amdgcn_mbcnt_lo: case Intrinsic::amdgcn_mbcnt_hi: { @@ -15251,11 +15251,9 @@ bool SITargetLowering::isSDNodeSourceOfDivergence(const SDNode *N, case ISD::CALLSEQ_END: return true; case ISD::INTRINSIC_WO_CHAIN: - return AMDGPU::isIntrinsicSourceOfDivergence( - cast<ConstantSDNode>(N->getOperand(0))->getZExtValue()); + return AMDGPU::isIntrinsicSourceOfDivergence(N->getConstantOperandVal(0)); case ISD::INTRINSIC_W_CHAIN: - return AMDGPU::isIntrinsicSourceOfDivergence( - cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()); + return AMDGPU::isIntrinsicSourceOfDivergence(N->getConstantOperandVal(1)); case AMDGPUISD::ATOMIC_CMP_SWAP: case AMDGPUISD::ATOMIC_LOAD_FMIN: case AMDGPUISD::ATOMIC_LOAD_FMAX: diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index ebe23a5eac57..396d22c7ec18 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -273,8 +273,8 @@ bool SIInstrInfo::areLoadsFromSameBasePtr(SDNode *Load0, SDNode *Load1, // subtract the index by one. Offset0Idx -= get(Opc0).NumDefs; Offset1Idx -= get(Opc1).NumDefs; - Offset0 = cast<ConstantSDNode>(Load0->getOperand(Offset0Idx))->getZExtValue(); - Offset1 = cast<ConstantSDNode>(Load1->getOperand(Offset1Idx))->getZExtValue(); + Offset0 = Load0->getConstantOperandVal(Offset0Idx); + Offset1 = Load1->getConstantOperandVal(Offset1Idx); return true; } @@ -955,12 +955,8 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB, bool IsSGPRSrc = AMDGPU::SReg_LO16RegClass.contains(SrcReg); bool IsAGPRDst = AMDGPU::AGPR_LO16RegClass.contains(DestReg); bool IsAGPRSrc = AMDGPU::AGPR_LO16RegClass.contains(SrcReg); - bool DstLow = AMDGPU::VGPR_LO16RegClass.contains(DestReg) || - AMDGPU::SReg_LO16RegClass.contains(DestReg) || - AMDGPU::AGPR_LO16RegClass.contains(DestReg); - bool SrcLow = AMDGPU::VGPR_LO16RegClass.contains(SrcReg) || - AMDGPU::SReg_LO16RegClass.contains(SrcReg) || - AMDGPU::AGPR_LO16RegClass.contains(SrcReg); + bool DstLow = !AMDGPU::isHi(DestReg, RI); + bool SrcLow = !AMDGPU::isHi(SrcReg, RI); MCRegister NewDestReg = RI.get32BitRegister(DestReg); MCRegister NewSrcReg = RI.get32BitRegister(SrcReg); @@ -7202,6 +7198,18 @@ void SIInstrInfo::moveToVALUImpl(SIInstrWorklist &Worklist, Register DstReg = Inst.getOperand(0).getReg(); const TargetRegisterClass *NewDstRC = getDestEquivalentVGPRClass(Inst); + // If it's a copy of a VGPR to a physical SGPR, insert a V_READFIRSTLANE and + // hope for the best. + if (Inst.isCopy() && DstReg.isPhysical() && + RI.isVGPR(MRI, Inst.getOperand(1).getReg())) { + // TODO: Only works for 32 bit registers. + BuildMI(*Inst.getParent(), &Inst, Inst.getDebugLoc(), + get(AMDGPU::V_READFIRSTLANE_B32), Inst.getOperand(0).getReg()) + .add(Inst.getOperand(1)); + Inst.eraseFromParent(); + return; + } + if (Inst.isCopy() && Inst.getOperand(1).getReg().isVirtual() && NewDstRC == RI.getRegClassForReg(MRI, Inst.getOperand(1).getReg())) { // Instead of creating a copy where src and dst are the same register diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index f9bc623abcd0..8310c6b57dad 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -1487,8 +1487,18 @@ foreach Index = 0-31 in { // 16-bit bitcast def : BitConvert <i16, f16, VGPR_32>; def : BitConvert <f16, i16, VGPR_32>; +def : BitConvert <f16, bf16, VGPR_32>; +def : BitConvert <bf16, f16, VGPR_32>; + def : BitConvert <i16, f16, SReg_32>; def : BitConvert <f16, i16, SReg_32>; +def : BitConvert <f16, bf16, SReg_32>; +def : BitConvert <bf16, f16, SReg_32>; + +def : BitConvert <i16, bf16, VGPR_32>; +def : BitConvert <bf16, i16, VGPR_32>; +def : BitConvert <i16, bf16, SReg_32>; +def : BitConvert <bf16, i16, SReg_32>; // 32-bit bitcast def : BitConvert <i32, f32, VGPR_32>; diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp index 021d797344c5..a93cf5cad411 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -330,8 +330,10 @@ SIRegisterInfo::SIRegisterInfo(const GCNSubtarget &ST) RegPressureIgnoredUnits.resize(getNumRegUnits()); RegPressureIgnoredUnits.set(*regunits(MCRegister::from(AMDGPU::M0)).begin()); - for (auto Reg : AMDGPU::VGPR_HI16RegClass) - RegPressureIgnoredUnits.set(*regunits(Reg).begin()); + for (auto Reg : AMDGPU::VGPR_16RegClass) { + if (AMDGPU::isHi(Reg, *this)) + RegPressureIgnoredUnits.set(*regunits(Reg).begin()); + } // HACK: Until this is fully tablegen'd. static llvm::once_flag InitializeRegSplitPartsFlag; @@ -2661,7 +2663,7 @@ SIRegisterInfo::getVGPRClassForBitWidth(unsigned BitWidth) const { if (BitWidth == 1) return &AMDGPU::VReg_1RegClass; if (BitWidth == 16) - return &AMDGPU::VGPR_LO16RegClass; + return &AMDGPU::VGPR_16RegClass; if (BitWidth == 32) return &AMDGPU::VGPR_32RegClass; return ST.needsAlignedVGPRs() ? getAlignedVGPRClassForBitWidth(BitWidth) @@ -2808,8 +2810,6 @@ getAlignedVectorSuperClassForBitWidth(unsigned BitWidth) { const TargetRegisterClass * SIRegisterInfo::getVectorSuperClassForBitWidth(unsigned BitWidth) const { - if (BitWidth == 16) - return &AMDGPU::VGPR_LO16RegClass; if (BitWidth == 32) return &AMDGPU::AV_32RegClass; return ST.needsAlignedVGPRs() @@ -3041,8 +3041,6 @@ unsigned SIRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC, default: return AMDGPUGenRegisterInfo::getRegPressureLimit(RC, MF); case AMDGPU::VGPR_32RegClassID: - case AMDGPU::VGPR_LO16RegClassID: - case AMDGPU::VGPR_HI16RegClassID: return std::min(ST.getMaxNumVGPRs(Occupancy), ST.getMaxNumVGPRs(MF)); case AMDGPU::SGPR_32RegClassID: case AMDGPU::SGPR_LO16RegClassID: diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td index 981da13fe089..c94b894c5841 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td @@ -376,7 +376,7 @@ def M0_CLASS : SIRegisterClass<"AMDGPU", [i32], 32, (add M0)> { let HasSGPR = 1; } -def M0_CLASS_LO16 : SIRegisterClass<"AMDGPU", [i16, f16], 16, (add M0_LO16)> { +def M0_CLASS_LO16 : SIRegisterClass<"AMDGPU", [i16, f16, bf16], 16, (add M0_LO16)> { let CopyCost = 1; let Size = 16; let isAllocatable = 0; @@ -385,7 +385,7 @@ def M0_CLASS_LO16 : SIRegisterClass<"AMDGPU", [i16, f16], 16, (add M0_LO16)> { // TODO: Do we need to set DwarfRegAlias on register tuples? -def SGPR_LO16 : SIRegisterClass<"AMDGPU", [i16, f16], 16, +def SGPR_LO16 : SIRegisterClass<"AMDGPU", [i16, f16, bf16], 16, (add (sequence "SGPR%u_LO16", 0, 105))> { let AllocationPriority = 0; let Size = 16; @@ -393,7 +393,7 @@ def SGPR_LO16 : SIRegisterClass<"AMDGPU", [i16, f16], 16, let HasSGPR = 1; } -def SGPR_HI16 : SIRegisterClass<"AMDGPU", [i16, f16], 16, +def SGPR_HI16 : SIRegisterClass<"AMDGPU", [i16, f16, bf16], 16, (add (sequence "SGPR%u_HI16", 0, 105))> { let isAllocatable = 0; let Size = 16; @@ -402,7 +402,7 @@ def SGPR_HI16 : SIRegisterClass<"AMDGPU", [i16, f16], 16, } // SGPR 32-bit registers -def SGPR_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32, +def SGPR_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, bf16, v2i16, v2f16, v2bf16], 32, (add (sequence "SGPR%u", 0, 105))> { // Give all SGPR classes higher priority than VGPR classes, because // we want to spill SGPRs to VGPRs. @@ -451,14 +451,14 @@ def SGPR_512Regs : SIRegisterTuples<getSubRegs<16>.ret, SGPR_32, 105, 4, 16, "s" def SGPR_1024Regs : SIRegisterTuples<getSubRegs<32>.ret, SGPR_32, 105, 4, 32, "s">; // Trap handler TMP 32-bit registers -def TTMP_32 : SIRegisterClass<"AMDGPU", [i32, f32, v2i16, v2f16], 32, +def TTMP_32 : SIRegisterClass<"AMDGPU", [i32, f32, v2i16, v2f16, v2bf16], 32, (add (sequence "TTMP%u", 0, 15))> { let isAllocatable = 0; let HasSGPR = 1; } // Trap handler TMP 16-bit registers -def TTMP_LO16 : SIRegisterClass<"AMDGPU", [i16, f16], 16, +def TTMP_LO16 : SIRegisterClass<"AMDGPU", [i16, f16, bf16], 16, (add (sequence "TTMP%u_LO16", 0, 15))> { let Size = 16; let isAllocatable = 0; @@ -584,24 +584,10 @@ class RegisterTypes<list<ValueType> reg_types> { list<ValueType> types = reg_types; } -def Reg16Types : RegisterTypes<[i16, f16]>; -def Reg32Types : RegisterTypes<[i32, f32, v2i16, v2f16, p2, p3, p5, p6]>; +def Reg16Types : RegisterTypes<[i16, f16, bf16]>; +def Reg32Types : RegisterTypes<[i32, f32, v2i16, v2f16, v2bf16, p2, p3, p5, p6]>; let HasVGPR = 1 in { -def VGPR_LO16 : SIRegisterClass<"AMDGPU", Reg16Types.types, 16, - (add (sequence "VGPR%u_LO16", 0, 255))> { - let AllocationPriority = 0; - let Size = 16; - let GeneratePressureSet = 0; -} - -def VGPR_HI16 : SIRegisterClass<"AMDGPU", Reg16Types.types, 16, - (add (sequence "VGPR%u_HI16", 0, 255))> { - let AllocationPriority = 0; - let Size = 16; - let GeneratePressureSet = 0; -} - // VOP3 and VINTERP can access 256 lo and 256 hi registers. def VGPR_16 : SIRegisterClass<"AMDGPU", Reg16Types.types, 16, (add (interleave (sequence "VGPR%u_LO16", 0, 255), @@ -697,7 +683,7 @@ def AGPR_LO16 : SIRegisterClass<"AMDGPU", Reg16Types.types, 16, } // AccVGPR 32-bit registers -def AGPR_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32, +def AGPR_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, bf16, v2i16, v2f16, v2bf16], 32, (add (sequence "AGPR%u", 0, 255))> { let AllocationPriority = 0; let Size = 32; @@ -749,7 +735,7 @@ def AGPR_1024 : SIRegisterTuples<getSubRegs<32>.ret, AGPR_32, 255, 1, 32, "a">; // Register classes used as source and destination //===----------------------------------------------------------------------===// -def Pseudo_SReg_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32, +def Pseudo_SReg_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, bf16, v2i16, v2f16, v2bf16], 32, (add FP_REG, SP_REG)> { let isAllocatable = 0; let CopyCost = -1; @@ -757,7 +743,7 @@ def Pseudo_SReg_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16 let BaseClassOrder = 10000; } -def Pseudo_SReg_128 : SIRegisterClass<"AMDGPU", [v4i32, v2i64, v2f64, v8i16, v8f16], 32, +def Pseudo_SReg_128 : SIRegisterClass<"AMDGPU", [v4i32, v2i64, v2f64, v8i16, v8f16, v8bf16], 32, (add PRIVATE_RSRC_REG)> { let isAllocatable = 0; let CopyCost = -1; @@ -774,7 +760,7 @@ def LDS_DIRECT_CLASS : RegisterClass<"AMDGPU", [i32], 32, let GeneratePressureSet = 0, HasSGPR = 1 in { // Subset of SReg_32 without M0 for SMRD instructions and alike. // See comments in SIInstructions.td for more info. -def SReg_32_XM0_XEXEC : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16, i1], 32, +def SReg_32_XM0_XEXEC : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, bf16, v2i16, v2f16, v2bf16, i1], 32, (add SGPR_32, VCC_LO, VCC_HI, FLAT_SCR_LO, FLAT_SCR_HI, XNACK_MASK_LO, XNACK_MASK_HI, SGPR_NULL, SGPR_NULL_HI, TTMP_32, TMA_LO, TMA_HI, TBA_LO, TBA_HI, SRC_SHARED_BASE_LO, SRC_SHARED_LIMIT_LO, SRC_PRIVATE_BASE_LO, SRC_PRIVATE_LIMIT_LO, SRC_SHARED_BASE_HI, @@ -783,7 +769,7 @@ def SReg_32_XM0_XEXEC : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2 let AllocationPriority = 0; } -def SReg_LO16 : SIRegisterClass<"AMDGPU", [i16, f16], 16, +def SReg_LO16 : SIRegisterClass<"AMDGPU", [i16, f16, bf16], 16, (add SGPR_LO16, VCC_LO_LO16, VCC_HI_LO16, FLAT_SCR_LO_LO16, FLAT_SCR_HI_LO16, XNACK_MASK_LO_LO16, XNACK_MASK_HI_LO16, SGPR_NULL_LO16, SGPR_NULL_HI_LO16, TTMP_LO16, TMA_LO_LO16, TMA_HI_LO16, TBA_LO_LO16, TBA_HI_LO16, SRC_SHARED_BASE_LO_LO16, @@ -796,17 +782,17 @@ def SReg_LO16 : SIRegisterClass<"AMDGPU", [i16, f16], 16, let BaseClassOrder = 16; } -def SReg_32_XEXEC : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16, i1], 32, +def SReg_32_XEXEC : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, bf16, v2i16, v2f16, v2bf16, i1], 32, (add SReg_32_XM0_XEXEC, M0_CLASS)> { let AllocationPriority = 0; } -def SReg_32_XEXEC_HI : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16, i1], 32, +def SReg_32_XEXEC_HI : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, bf16, v2i16, v2f16, v2bf16, i1], 32, (add SReg_32_XEXEC, EXEC_LO)> { let AllocationPriority = 0; } -def SReg_32_XM0 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16, i1], 32, +def SReg_32_XM0 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, bf16, v2i16, v2f16, v2bf16, i1], 32, (add SReg_32_XM0_XEXEC, EXEC_LO, EXEC_HI)> { let AllocationPriority = 0; } @@ -814,7 +800,7 @@ def SReg_32_XM0 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16, i } // End GeneratePressureSet = 0 // Register class for all scalar registers (SGPRs + Special Registers) -def SReg_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16, i1], 32, +def SReg_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, bf16, v2i16, v2f16, v2bf16, i1], 32, (add SReg_32_XM0, M0_CLASS)> { let AllocationPriority = 0; let HasSGPR = 1; @@ -822,13 +808,13 @@ def SReg_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16, i1], } let GeneratePressureSet = 0 in { -def SRegOrLds_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32, +def SRegOrLds_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, bf16, v2i16, v2f16, v2bf16], 32, (add SReg_32, LDS_DIRECT_CLASS)> { let isAllocatable = 0; let HasSGPR = 1; } -def SGPR_64 : SIRegisterClass<"AMDGPU", [v2i32, i64, v2f32, f64, v4i16, v4f16], 32, +def SGPR_64 : SIRegisterClass<"AMDGPU", [v2i32, i64, v2f32, f64, v4i16, v4f16, v4bf16], 32, (add SGPR_64Regs)> { let CopyCost = 1; let AllocationPriority = 1; @@ -850,13 +836,13 @@ def Gfx_CCR_SGPR_64 : SIRegisterClass<"AMDGPU", SGPR_64.RegTypes, 32, let HasSGPR = 1; } -def TTMP_64 : SIRegisterClass<"AMDGPU", [v2i32, i64, f64, v4i16, v4f16], 32, +def TTMP_64 : SIRegisterClass<"AMDGPU", [v2i32, i64, f64, v4i16, v4f16, v4bf16], 32, (add TTMP_64Regs)> { let isAllocatable = 0; let HasSGPR = 1; } -def SReg_64_XEXEC : SIRegisterClass<"AMDGPU", [v2i32, i64, v2f32, f64, i1, v4i16, v4f16], 32, +def SReg_64_XEXEC : SIRegisterClass<"AMDGPU", [v2i32, i64, v2f32, f64, i1, v4i16, v4f16, v4bf16], 32, (add SGPR_64, VCC, FLAT_SCR, XNACK_MASK, SGPR_NULL64, SRC_SHARED_BASE, SRC_SHARED_LIMIT, SRC_PRIVATE_BASE, SRC_PRIVATE_LIMIT, TTMP_64, TBA, TMA)> { let CopyCost = 1; @@ -864,7 +850,7 @@ def SReg_64_XEXEC : SIRegisterClass<"AMDGPU", [v2i32, i64, v2f32, f64, i1, v4i16 let HasSGPR = 1; } -def SReg_64 : SIRegisterClass<"AMDGPU", [v2i32, i64, v2f32, f64, i1, v4i16, v4f16], 32, +def SReg_64 : SIRegisterClass<"AMDGPU", [v2i32, i64, v2f32, f64, i1, v4i16, v4f16, v4bf16], 32, (add SReg_64_XEXEC, EXEC)> { let CopyCost = 1; let AllocationPriority = 1; @@ -919,11 +905,11 @@ multiclass SRegClass<int numRegs, } defm "" : SRegClass<3, [v3i32, v3f32], SGPR_96Regs, TTMP_96Regs>; -defm "" : SRegClass<4, [v4i32, v4f32, v2i64, v2f64, v8i16, v8f16], SGPR_128Regs, TTMP_128Regs>; +defm "" : SRegClass<4, [v4i32, v4f32, v2i64, v2f64, v8i16, v8f16, v8bf16], SGPR_128Regs, TTMP_128Regs>; defm "" : SRegClass<5, [v5i32, v5f32], SGPR_160Regs, TTMP_160Regs>; defm "" : SRegClass<6, [v6i32, v6f32, v3i64, v3f64], SGPR_192Regs, TTMP_192Regs>; defm "" : SRegClass<7, [v7i32, v7f32], SGPR_224Regs, TTMP_224Regs>; -defm "" : SRegClass<8, [v8i32, v8f32, v4i64, v4f64, v16i16, v16f16], SGPR_256Regs, TTMP_256Regs>; +defm "" : SRegClass<8, [v8i32, v8f32, v4i64, v4f64, v16i16, v16f16, v16bf16], SGPR_256Regs, TTMP_256Regs>; defm "" : SRegClass<9, [v9i32, v9f32], SGPR_288Regs, TTMP_288Regs>; defm "" : SRegClass<10, [v10i32, v10f32], SGPR_320Regs, TTMP_320Regs>; defm "" : SRegClass<11, [v11i32, v11f32], SGPR_352Regs, TTMP_352Regs>; @@ -934,7 +920,7 @@ defm "" : SRegClass<16, [v16i32, v16f32, v8i64, v8f64, v32i16, v32f16], SGPR_512 defm "" : SRegClass<32, [v32i32, v32f32, v16i64, v16f64], SGPR_1024Regs>; } -def VRegOrLds_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32, +def VRegOrLds_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, bf16, v2i16, v2f16, v2bf16], 32, (add VGPR_32, LDS_DIRECT_CLASS)> { let isAllocatable = 0; let HasVGPR = 1; @@ -969,15 +955,15 @@ multiclass VRegClass<int numRegs, list<ValueType> regTypes, dag regList> { } } -defm VReg_64 : VRegClass<2, [i64, f64, v2i32, v2f32, v4f16, v4i16, p0, p1, p4], +defm VReg_64 : VRegClass<2, [i64, f64, v2i32, v2f32, v4f16, v4bf16, v4i16, p0, p1, p4], (add VGPR_64)>; defm VReg_96 : VRegClass<3, [v3i32, v3f32], (add VGPR_96)>; -defm VReg_128 : VRegClass<4, [v4i32, v4f32, v2i64, v2f64, v8i16, v8f16], (add VGPR_128)>; +defm VReg_128 : VRegClass<4, [v4i32, v4f32, v2i64, v2f64, v8i16, v8f16, v8bf16], (add VGPR_128)>; defm VReg_160 : VRegClass<5, [v5i32, v5f32], (add VGPR_160)>; defm VReg_192 : VRegClass<6, [v6i32, v6f32, v3i64, v3f64], (add VGPR_192)>; defm VReg_224 : VRegClass<7, [v7i32, v7f32], (add VGPR_224)>; -defm VReg_256 : VRegClass<8, [v8i32, v8f32, v4i64, v4f64, v16i16, v16f16], (add VGPR_256)>; +defm VReg_256 : VRegClass<8, [v8i32, v8f32, v4i64, v4f64, v16i16, v16f16, v16bf16], (add VGPR_256)>; defm VReg_288 : VRegClass<9, [v9i32, v9f32], (add VGPR_288)>; defm VReg_320 : VRegClass<10, [v10i32, v10f32], (add VGPR_320)>; defm VReg_352 : VRegClass<11, [v11i32, v11f32], (add VGPR_352)>; @@ -1007,7 +993,7 @@ multiclass ARegClass<int numRegs, list<ValueType> regTypes, dag regList> { defm AReg_64 : ARegClass<2, [i64, f64, v2i32, v2f32, v4f16, v4i16], (add AGPR_64)>; defm AReg_96 : ARegClass<3, [v3i32, v3f32], (add AGPR_96)>; -defm AReg_128 : ARegClass<4, [v4i32, v4f32, v2i64, v2f64, v8i16, v8f16], (add AGPR_128)>; +defm AReg_128 : ARegClass<4, [v4i32, v4f32, v2i64, v2f64, v8i16, v8f16, v8bf16], (add AGPR_128)>; defm AReg_160 : ARegClass<5, [v5i32, v5f32], (add AGPR_160)>; defm AReg_192 : ARegClass<6, [v6i32, v6f32, v3i64, v3f64], (add AGPR_192)>; defm AReg_224 : ARegClass<7, [v7i32, v7f32], (add AGPR_224)>; @@ -1046,14 +1032,14 @@ def VS_16_Lo128 : SIRegisterClass<"AMDGPU", Reg16Types.types, 16, let HasVGPR = 1; } -def VS_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32, +def VS_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, bf16, v2i16, v2f16, v2bf16], 32, (add VGPR_32, SReg_32, LDS_DIRECT_CLASS)> { let isAllocatable = 0; let HasVGPR = 1; let HasSGPR = 1; } -def VS_32_Lo128 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32, +def VS_32_Lo128 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, bf16, v2i16, v2f16, v2bf16], 32, (add VGPR_32_Lo128, SReg_32, LDS_DIRECT_CLASS)> { let isAllocatable = 0; let HasVGPR = 1; diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index 0f92a56237ac..a91d77175234 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -2296,8 +2296,6 @@ bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo) { // (move from MC* level to Target* level). Return size in bits. unsigned getRegBitWidth(unsigned RCID) { switch (RCID) { - case AMDGPU::VGPR_LO16RegClassID: - case AMDGPU::VGPR_HI16RegClassID: case AMDGPU::SGPR_LO16RegClassID: case AMDGPU::AGPR_LO16RegClassID: return 16; diff --git a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td index d3cefb339d9e..7f52501b5d90 100644 --- a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td @@ -190,9 +190,9 @@ multiclass MadFmaMixPats<SDPatternOperator fma_like, // because dealing with the write to high half of the register is // difficult. def : GCNPat < - (build_vector f16:$elt0, (fpround (fma_like (f32 (VOP3PMadMixMods f16:$src0, i32:$src0_modifiers)), - (f32 (VOP3PMadMixMods f16:$src1, i32:$src1_modifiers)), - (f32 (VOP3PMadMixMods f16:$src2, i32:$src2_modifiers))))), + (build_vector f16:$elt0, (f16 (fpround (fma_like (f32 (VOP3PMadMixMods f16:$src0, i32:$src0_modifiers)), + (f32 (VOP3PMadMixMods f16:$src1, i32:$src1_modifiers)), + (f32 (VOP3PMadMixMods f16:$src2, i32:$src2_modifiers)))))), (v2f16 (mixhi_inst $src0_modifiers, $src0, $src1_modifiers, $src1, $src2_modifiers, $src2, @@ -203,9 +203,9 @@ multiclass MadFmaMixPats<SDPatternOperator fma_like, def : GCNPat < (build_vector f16:$elt0, - (AMDGPUclamp (fpround (fma_like (f32 (VOP3PMadMixMods f16:$src0, i32:$src0_modifiers)), + (AMDGPUclamp (f16 (fpround (fma_like (f32 (VOP3PMadMixMods f16:$src0, i32:$src0_modifiers)), (f32 (VOP3PMadMixMods f16:$src1, i32:$src1_modifiers)), - (f32 (VOP3PMadMixMods f16:$src2, i32:$src2_modifiers)))))), + (f32 (VOP3PMadMixMods f16:$src2, i32:$src2_modifiers))))))), (v2f16 (mixhi_inst $src0_modifiers, $src0, $src1_modifiers, $src1, $src2_modifiers, $src2, @@ -215,12 +215,12 @@ multiclass MadFmaMixPats<SDPatternOperator fma_like, def : GCNPat < (AMDGPUclamp (build_vector - (fpround (fma_like (f32 (VOP3PMadMixMods f16:$lo_src0, i32:$lo_src0_modifiers)), + (f16 (fpround (fma_like (f32 (VOP3PMadMixMods f16:$lo_src0, i32:$lo_src0_modifiers)), (f32 (VOP3PMadMixMods f16:$lo_src1, i32:$lo_src1_modifiers)), - (f32 (VOP3PMadMixMods f16:$lo_src2, i32:$lo_src2_modifiers)))), - (fpround (fma_like (f32 (VOP3PMadMixMods f16:$hi_src0, i32:$hi_src0_modifiers)), + (f32 (VOP3PMadMixMods f16:$lo_src2, i32:$lo_src2_modifiers))))), + (f16 (fpround (fma_like (f32 (VOP3PMadMixMods f16:$hi_src0, i32:$hi_src0_modifiers)), (f32 (VOP3PMadMixMods f16:$hi_src1, i32:$hi_src1_modifiers)), - (f32 (VOP3PMadMixMods f16:$hi_src2, i32:$hi_src2_modifiers)))))), + (f32 (VOP3PMadMixMods f16:$hi_src2, i32:$hi_src2_modifiers))))))), (v2f16 (mixhi_inst $hi_src0_modifiers, $hi_src0, $hi_src1_modifiers, $hi_src1, $hi_src2_modifiers, $hi_src2, @@ -243,8 +243,8 @@ multiclass MadFmaMixPats<SDPatternOperator fma_like, >; def : GCNPat < - (build_vector f16:$elt0, (fpround (fmul (f32 (VOP3PMadMixMods f32:$src0, i32:$src0_modifiers)), - (f32 (VOP3PMadMixMods f32:$src1, i32:$src1_modifiers))))), + (build_vector f16:$elt0, (f16 (fpround (fmul (f32 (VOP3PMadMixMods f32:$src0, i32:$src0_modifiers)), + (f32 (VOP3PMadMixMods f32:$src1, i32:$src1_modifiers)))))), (v2f16 (mixhi_inst $src0_modifiers, $src0, $src1_modifiers, $src1, (i32 0), (i32 0), diff --git a/llvm/lib/Target/ARC/ARCISelLowering.cpp b/llvm/lib/Target/ARC/ARCISelLowering.cpp index 5d9a366f5ed5..2265f5db6737 100644 --- a/llvm/lib/Target/ARC/ARCISelLowering.cpp +++ b/llvm/lib/Target/ARC/ARCISelLowering.cpp @@ -751,7 +751,7 @@ SDValue ARCTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); SDLoc dl(Op); - assert(cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue() == 0 && + assert(Op.getConstantOperandVal(0) == 0 && "Only support lowering frame addr of current frame."); Register FrameReg = ARI.getFrameRegister(MF); return DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT); diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp index a0776296b8eb..ef02dc997011 100644 --- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -4499,8 +4499,7 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, default: break; case ARM::LDRrs: case ARM::LDRBrs: { - unsigned ShOpVal = - cast<ConstantSDNode>(DefNode->getOperand(2))->getZExtValue(); + unsigned ShOpVal = DefNode->getConstantOperandVal(2); unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal); if (ShImm == 0 || (ShImm == 2 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)) @@ -4512,8 +4511,7 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, case ARM::t2LDRHs: case ARM::t2LDRSHs: { // Thumb2 mode: lsl only. - unsigned ShAmt = - cast<ConstantSDNode>(DefNode->getOperand(2))->getZExtValue(); + unsigned ShAmt = DefNode->getConstantOperandVal(2); if (ShAmt == 0 || ShAmt == 2) Latency = *Latency - 1; break; @@ -4526,8 +4524,7 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, default: break; case ARM::LDRrs: case ARM::LDRBrs: { - unsigned ShOpVal = - cast<ConstantSDNode>(DefNode->getOperand(2))->getZExtValue(); + unsigned ShOpVal = DefNode->getConstantOperandVal(2); unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal); if (ShImm == 0 || ((ShImm == 1 || ShImm == 2 || ShImm == 3) && diff --git a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp index 984d8d3e0b08..adc429b61bbc 100644 --- a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -2422,8 +2422,7 @@ void ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating, MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); SDValue Chain = N->getOperand(0); - unsigned Lane = - cast<ConstantSDNode>(N->getOperand(Vec0Idx + NumVecs))->getZExtValue(); + unsigned Lane = N->getConstantOperandVal(Vec0Idx + NumVecs); EVT VT = N->getOperand(Vec0Idx).getValueType(); bool is64BitVector = VT.is64BitVector(); @@ -2587,7 +2586,7 @@ void ARMDAGToDAGISel::SelectMVE_WB(SDNode *N, const uint16_t *Opcodes, Ops.push_back(N->getOperand(2)); // vector of base addresses - int32_t ImmValue = cast<ConstantSDNode>(N->getOperand(3))->getZExtValue(); + int32_t ImmValue = N->getConstantOperandVal(3); Ops.push_back(getI32Imm(ImmValue, Loc)); // immediate offset if (Predicated) @@ -2622,7 +2621,7 @@ void ARMDAGToDAGISel::SelectMVE_LongShift(SDNode *N, uint16_t Opcode, // The shift count if (Immediate) { - int32_t ImmValue = cast<ConstantSDNode>(N->getOperand(3))->getZExtValue(); + int32_t ImmValue = N->getConstantOperandVal(3); Ops.push_back(getI32Imm(ImmValue, Loc)); // immediate shift count } else { Ops.push_back(N->getOperand(3)); @@ -2630,7 +2629,7 @@ void ARMDAGToDAGISel::SelectMVE_LongShift(SDNode *N, uint16_t Opcode, // The immediate saturation operand, if any if (HasSaturationOperand) { - int32_t SatOp = cast<ConstantSDNode>(N->getOperand(4))->getZExtValue(); + int32_t SatOp = N->getConstantOperandVal(4); int SatBit = (SatOp == 64 ? 0 : 1); Ops.push_back(getI32Imm(SatBit, Loc)); } @@ -2685,7 +2684,7 @@ void ARMDAGToDAGISel::SelectMVE_VSHLC(SDNode *N, bool Predicated) { // and then an immediate shift count Ops.push_back(N->getOperand(1)); Ops.push_back(N->getOperand(2)); - int32_t ImmValue = cast<ConstantSDNode>(N->getOperand(3))->getZExtValue(); + int32_t ImmValue = N->getConstantOperandVal(3); Ops.push_back(getI32Imm(ImmValue, Loc)); // immediate shift count if (Predicated) @@ -4138,14 +4137,13 @@ void ARMDAGToDAGISel::Select(SDNode *N) { if (InGlue.getOpcode() == ARMISD::CMPZ) { if (InGlue.getOperand(0).getOpcode() == ISD::INTRINSIC_W_CHAIN) { SDValue Int = InGlue.getOperand(0); - uint64_t ID = cast<ConstantSDNode>(Int->getOperand(1))->getZExtValue(); + uint64_t ID = Int->getConstantOperandVal(1); // Handle low-overhead loops. if (ID == Intrinsic::loop_decrement_reg) { SDValue Elements = Int.getOperand(2); - SDValue Size = CurDAG->getTargetConstant( - cast<ConstantSDNode>(Int.getOperand(3))->getZExtValue(), dl, - MVT::i32); + SDValue Size = CurDAG->getTargetConstant(Int.getConstantOperandVal(3), + dl, MVT::i32); SDValue Args[] = { Elements, Size, Int.getOperand(0) }; SDNode *LoopDec = @@ -4715,7 +4713,7 @@ void ARMDAGToDAGISel::Select(SDNode *N) { case ISD::INTRINSIC_VOID: case ISD::INTRINSIC_W_CHAIN: { - unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); + unsigned IntNo = N->getConstantOperandVal(1); switch (IntNo) { default: break; @@ -4732,9 +4730,9 @@ void ARMDAGToDAGISel::Select(SDNode *N) { Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::MRRC : ARM::MRRC2); SmallVector<SDValue, 5> Ops; - Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(2))->getZExtValue(), dl)); /* coproc */ - Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(3))->getZExtValue(), dl)); /* opc */ - Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(4))->getZExtValue(), dl)); /* CRm */ + Ops.push_back(getI32Imm(N->getConstantOperandVal(2), dl)); /* coproc */ + Ops.push_back(getI32Imm(N->getConstantOperandVal(3), dl)); /* opc */ + Ops.push_back(getI32Imm(N->getConstantOperandVal(4), dl)); /* CRm */ // The mrrc2 instruction in ARM doesn't allow predicates, the top 4 bits of the encoded // instruction will always be '1111' but it is possible in assembly language to specify @@ -5181,7 +5179,7 @@ void ARMDAGToDAGISel::Select(SDNode *N) { } case ISD::INTRINSIC_WO_CHAIN: { - unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); + unsigned IntNo = N->getConstantOperandVal(0); switch (IntNo) { default: break; diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index d00b7853816e..9f3bcffc7a99 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -4110,7 +4110,7 @@ SDValue ARMTargetLowering::LowerINTRINSIC_VOID( SDValue ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget) const { - unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); + unsigned IntNo = Op.getConstantOperandVal(0); SDLoc dl(Op); switch (IntNo) { default: return SDValue(); // Don't custom lower most intrinsics. @@ -4289,13 +4289,13 @@ static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG, return Op.getOperand(0); SDLoc dl(Op); - unsigned isRead = ~cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() & 1; + unsigned isRead = ~Op.getConstantOperandVal(2) & 1; if (!isRead && (!Subtarget->hasV7Ops() || !Subtarget->hasMPExtension())) // ARMv7 with MP extension has PLDW. return Op.getOperand(0); - unsigned isData = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue(); + unsigned isData = Op.getConstantOperandVal(4); if (Subtarget->isThumb()) { // Invert the bits. isRead = ~isRead & 1; @@ -4800,7 +4800,7 @@ SDValue ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, LHS->hasOneUse() && isa<ConstantSDNode>(LHS.getOperand(1)) && LHS.getValueType() == MVT::i32 && isa<ConstantSDNode>(RHS) && !isSignedIntSetCC(CC)) { - unsigned Mask = cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue(); + unsigned Mask = LHS.getConstantOperandVal(1); auto *RHSC = cast<ConstantSDNode>(RHS.getNode()); uint64_t RHSV = RHSC->getZExtValue(); if (isMask_32(Mask) && (RHSV & ~Mask) == 0 && Mask != 255 && Mask != 65535) { @@ -4823,9 +4823,8 @@ SDValue ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, isa<ConstantSDNode>(RHS) && cast<ConstantSDNode>(RHS)->getZExtValue() == 0x80000000U && CC == ISD::SETUGT && isa<ConstantSDNode>(LHS.getOperand(1)) && - cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue() < 31) { - unsigned ShiftAmt = - cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue() + 1; + LHS.getConstantOperandVal(1) < 31) { + unsigned ShiftAmt = LHS.getConstantOperandVal(1) + 1; SDValue Shift = DAG.getNode(ARMISD::LSLS, dl, DAG.getVTList(MVT::i32, MVT::i32), LHS.getOperand(0), @@ -6112,7 +6111,7 @@ SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{ EVT VT = Op.getValueType(); SDLoc dl(Op); - unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); + unsigned Depth = Op.getConstantOperandVal(0); if (Depth) { SDValue FrameAddr = LowerFRAMEADDR(Op, DAG); SDValue Offset = DAG.getConstant(4, dl, MVT::i32); @@ -6135,7 +6134,7 @@ SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); SDLoc dl(Op); // FIXME probably not meaningful - unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); + unsigned Depth = Op.getConstantOperandVal(0); Register FrameReg = ARI.getFrameRegister(MF); SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT); while (Depth--) @@ -8221,7 +8220,7 @@ SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op, Source = Sources.insert(Sources.end(), ShuffleSourceInfo(SourceVec)); // Update the minimum and maximum lane number seen. - unsigned EltNo = cast<ConstantSDNode>(V.getOperand(1))->getZExtValue(); + unsigned EltNo = V.getConstantOperandVal(1); Source->MinElt = std::min(Source->MinElt, EltNo); Source->MaxElt = std::max(Source->MaxElt, EltNo); } @@ -9034,7 +9033,7 @@ static SDValue LowerINSERT_VECTOR_ELT_i1(SDValue Op, SelectionDAG &DAG, SDValue Conv = DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::i32, Op->getOperand(0)); - unsigned Lane = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue(); + unsigned Lane = Op.getConstantOperandVal(2); unsigned LaneWidth = getVectorTyFromPredicateVector(VecVT).getScalarSizeInBits() / 8; unsigned Mask = ((1 << LaneWidth) - 1) << Lane * LaneWidth; @@ -9097,7 +9096,7 @@ static SDValue LowerEXTRACT_VECTOR_ELT_i1(SDValue Op, SelectionDAG &DAG, SDValue Conv = DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::i32, Op->getOperand(0)); - unsigned Lane = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); + unsigned Lane = Op.getConstantOperandVal(1); unsigned LaneWidth = getVectorTyFromPredicateVector(VecVT).getScalarSizeInBits() / 8; SDValue Shift = DAG.getNode(ISD::SRL, dl, MVT::i32, Conv, @@ -10682,7 +10681,7 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { static void ReplaceLongIntrinsic(SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) { - unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); + unsigned IntNo = N->getConstantOperandVal(0); unsigned Opc = 0; if (IntNo == Intrinsic::arm_smlald) Opc = ARMISD::SMLALD; @@ -14842,14 +14841,14 @@ static SDValue ParseBFI(SDNode *N, APInt &ToMask, APInt &FromMask) { assert(N->getOpcode() == ARMISD::BFI); SDValue From = N->getOperand(1); - ToMask = ~cast<ConstantSDNode>(N->getOperand(2))->getAPIntValue(); + ToMask = ~N->getConstantOperandAPInt(2); FromMask = APInt::getLowBitsSet(ToMask.getBitWidth(), ToMask.popcount()); // If the Base came from a SHR #C, we can deduce that it is really testing bit // #C in the base of the SHR. if (From->getOpcode() == ISD::SRL && isa<ConstantSDNode>(From->getOperand(1))) { - APInt Shift = cast<ConstantSDNode>(From->getOperand(1))->getAPIntValue(); + APInt Shift = From->getConstantOperandAPInt(1); assert(Shift.getLimitedValue() < 32 && "Shift too large!"); FromMask <<= Shift.getLimitedValue(31); From = From->getOperand(0); @@ -14908,7 +14907,7 @@ static SDValue PerformBFICombine(SDNode *N, SelectionDAG &DAG) { ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1)); if (!N11C) return SDValue(); - unsigned InvMask = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue(); + unsigned InvMask = N->getConstantOperandVal(2); unsigned LSB = llvm::countr_zero(~InvMask); unsigned Width = llvm::bit_width<unsigned>(~InvMask) - LSB; assert(Width < @@ -15448,8 +15447,7 @@ static SDValue PerformVCMPCombine(SDNode *N, SelectionDAG &DAG, EVT VT = N->getValueType(0); SDValue Op0 = N->getOperand(0); SDValue Op1 = N->getOperand(1); - ARMCC::CondCodes Cond = - (ARMCC::CondCodes)cast<ConstantSDNode>(N->getOperand(2))->getZExtValue(); + ARMCC::CondCodes Cond = (ARMCC::CondCodes)N->getConstantOperandVal(2); SDLoc dl(N); // vcmp X, 0, cc -> vcmpz X, cc @@ -15794,7 +15792,7 @@ static bool TryCombineBaseUpdate(struct BaseUpdateTarget &Target, unsigned NewOpc = 0; unsigned NumVecs = 0; if (Target.isIntrinsic) { - unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); + unsigned IntNo = N->getConstantOperandVal(1); switch (IntNo) { default: llvm_unreachable("unexpected intrinsic for Neon base update"); @@ -16254,12 +16252,10 @@ static SDValue PerformMVEVLDCombine(SDNode *N, // For the stores, where there are multiple intrinsics we only actually want // to post-inc the last of the them. - unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); - if (IntNo == Intrinsic::arm_mve_vst2q && - cast<ConstantSDNode>(N->getOperand(5))->getZExtValue() != 1) + unsigned IntNo = N->getConstantOperandVal(1); + if (IntNo == Intrinsic::arm_mve_vst2q && N->getConstantOperandVal(5) != 1) return SDValue(); - if (IntNo == Intrinsic::arm_mve_vst4q && - cast<ConstantSDNode>(N->getOperand(7))->getZExtValue() != 3) + if (IntNo == Intrinsic::arm_mve_vst4q && N->getConstantOperandVal(7) != 3) return SDValue(); // Search for a use of the address operand that is an increment. @@ -16381,7 +16377,7 @@ static bool CombineVLDDUP(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { return false; unsigned NumVecs = 0; unsigned NewOpc = 0; - unsigned IntNo = cast<ConstantSDNode>(VLD->getOperand(1))->getZExtValue(); + unsigned IntNo = VLD->getConstantOperandVal(1); if (IntNo == Intrinsic::arm_neon_vld2lane) { NumVecs = 2; NewOpc = ARMISD::VLD2DUP; @@ -16397,8 +16393,7 @@ static bool CombineVLDDUP(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { // First check that all the vldN-lane uses are VDUPLANEs and that the lane // numbers match the load. - unsigned VLDLaneNo = - cast<ConstantSDNode>(VLD->getOperand(NumVecs+3))->getZExtValue(); + unsigned VLDLaneNo = VLD->getConstantOperandVal(NumVecs + 3); for (SDNode::use_iterator UI = VLD->use_begin(), UE = VLD->use_end(); UI != UE; ++UI) { // Ignore uses of the chain result. @@ -16406,7 +16401,7 @@ static bool CombineVLDDUP(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { continue; SDNode *User = *UI; if (User->getOpcode() != ARMISD::VDUPLANE || - VLDLaneNo != cast<ConstantSDNode>(User->getOperand(1))->getZExtValue()) + VLDLaneNo != User->getConstantOperandVal(1)) return false; } @@ -16479,7 +16474,7 @@ static SDValue PerformVDUPLANECombine(SDNode *N, // Make sure the VMOV element size is not bigger than the VDUPLANE elements. unsigned EltSize = Op.getScalarValueSizeInBits(); // The canonical VMOV for a zero vector uses a 32-bit element size. - unsigned Imm = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); + unsigned Imm = Op.getConstantOperandVal(0); unsigned EltBits; if (ARM_AM::decodeVMOVModImm(Imm, EltBits) == 0) EltSize = 8; @@ -17479,7 +17474,7 @@ static SDValue PerformLongShiftCombine(SDNode *N, SelectionDAG &DAG) { SDValue ARMTargetLowering::PerformIntrinsicCombine(SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; - unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); + unsigned IntNo = N->getConstantOperandVal(0); switch (IntNo) { default: // Don't do anything for most intrinsics. @@ -17669,7 +17664,7 @@ SDValue ARMTargetLowering::PerformIntrinsicCombine(SDNode *N, case Intrinsic::arm_mve_addv: { // Turn this intrinsic straight into the appropriate ARMISD::VADDV node, // which allow PerformADDVecReduce to turn it into VADDLV when possible. - bool Unsigned = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue(); + bool Unsigned = N->getConstantOperandVal(2); unsigned Opc = Unsigned ? ARMISD::VADDVu : ARMISD::VADDVs; return DAG.getNode(Opc, SDLoc(N), N->getVTList(), N->getOperand(1)); } @@ -17678,7 +17673,7 @@ SDValue ARMTargetLowering::PerformIntrinsicCombine(SDNode *N, case Intrinsic::arm_mve_addlv_predicated: { // Same for these, but ARMISD::VADDLV has to be followed by a BUILD_PAIR // which recombines the two outputs into an i64 - bool Unsigned = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue(); + bool Unsigned = N->getConstantOperandVal(2); unsigned Opc = IntNo == Intrinsic::arm_mve_addlv ? (Unsigned ? ARMISD::VADDLVu : ARMISD::VADDLVs) : (Unsigned ? ARMISD::VADDLVpu : ARMISD::VADDLVps); @@ -18193,7 +18188,7 @@ static SDValue SearchLoopIntrinsic(SDValue N, ISD::CondCode &CC, int &Imm, return SearchLoopIntrinsic(N->getOperand(0), CC, Imm, Negate); } case ISD::INTRINSIC_W_CHAIN: { - unsigned IntOp = cast<ConstantSDNode>(N.getOperand(1))->getZExtValue(); + unsigned IntOp = N.getConstantOperandVal(1); if (IntOp != Intrinsic::test_start_loop_iterations && IntOp != Intrinsic::loop_decrement_reg) return SDValue(); @@ -18271,7 +18266,7 @@ static SDValue PerformHWLoopCombine(SDNode *N, SDLoc dl(Int); SelectionDAG &DAG = DCI.DAG; SDValue Elements = Int.getOperand(2); - unsigned IntOp = cast<ConstantSDNode>(Int->getOperand(1))->getZExtValue(); + unsigned IntOp = Int->getConstantOperandVal(1); assert((N->hasOneUse() && N->use_begin()->getOpcode() == ISD::BR) && "expected single br user"); SDNode *Br = *N->use_begin(); @@ -18305,8 +18300,8 @@ static SDValue PerformHWLoopCombine(SDNode *N, DAG.ReplaceAllUsesOfValueWith(Int.getValue(2), Int.getOperand(0)); return Res; } else { - SDValue Size = DAG.getTargetConstant( - cast<ConstantSDNode>(Int.getOperand(3))->getZExtValue(), dl, MVT::i32); + SDValue Size = + DAG.getTargetConstant(Int.getConstantOperandVal(3), dl, MVT::i32); SDValue Args[] = { Int.getOperand(0), Elements, Size, }; SDValue LoopDec = DAG.getNode(ARMISD::LOOP_DEC, dl, DAG.getVTList(MVT::i32, MVT::Other), Args); @@ -19051,7 +19046,7 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N, } case ISD::INTRINSIC_VOID: case ISD::INTRINSIC_W_CHAIN: - switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) { + switch (N->getConstantOperandVal(1)) { case Intrinsic::arm_neon_vld1: case Intrinsic::arm_neon_vld1x2: case Intrinsic::arm_neon_vld1x3: diff --git a/llvm/lib/Target/AVR/AVRISelDAGToDAG.cpp b/llvm/lib/Target/AVR/AVRISelDAGToDAG.cpp index 196122e45ab8..e67a1e2ed509 100644 --- a/llvm/lib/Target/AVR/AVRISelDAGToDAG.cpp +++ b/llvm/lib/Target/AVR/AVRISelDAGToDAG.cpp @@ -335,7 +335,7 @@ template <> bool AVRDAGToDAGISel::select<ISD::STORE>(SDNode *N) { return false; } - int CST = (int)cast<ConstantSDNode>(BasePtr.getOperand(1))->getZExtValue(); + int CST = (int)BasePtr.getConstantOperandVal(1); SDValue Chain = ST->getChain(); EVT VT = ST->getValue().getValueType(); SDLoc DL(N); diff --git a/llvm/lib/Target/AVR/AVRISelLowering.cpp b/llvm/lib/Target/AVR/AVRISelLowering.cpp index cd1dcfaea0eb..d36bfb188ed3 100644 --- a/llvm/lib/Target/AVR/AVRISelLowering.cpp +++ b/llvm/lib/Target/AVR/AVRISelLowering.cpp @@ -298,8 +298,7 @@ SDValue AVRTargetLowering::LowerShifts(SDValue Op, SelectionDAG &DAG) const { SDValue SrcHi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i16, Op.getOperand(0), DAG.getConstant(1, dl, MVT::i16)); - uint64_t ShiftAmount = - cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); + uint64_t ShiftAmount = N->getConstantOperandVal(1); if (ShiftAmount == 16) { // Special case these two operations because they appear to be used by the // generic codegen parts to lower 32-bit numbers. @@ -367,7 +366,7 @@ SDValue AVRTargetLowering::LowerShifts(SDValue Op, SelectionDAG &DAG) const { } } - uint64_t ShiftAmount = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); + uint64_t ShiftAmount = N->getConstantOperandVal(1); SDValue Victim = N->getOperand(0); switch (Op.getOpcode()) { diff --git a/llvm/lib/Target/BPF/BPFISelDAGToDAG.cpp b/llvm/lib/Target/BPF/BPFISelDAGToDAG.cpp index 909c7c005735..d8139958e9fc 100644 --- a/llvm/lib/Target/BPF/BPFISelDAGToDAG.cpp +++ b/llvm/lib/Target/BPF/BPFISelDAGToDAG.cpp @@ -193,7 +193,7 @@ void BPFDAGToDAGISel::Select(SDNode *Node) { default: break; case ISD::INTRINSIC_W_CHAIN: { - unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue(); + unsigned IntNo = Node->getConstantOperandVal(1); switch (IntNo) { case Intrinsic::bpf_load_byte: case Intrinsic::bpf_load_half: @@ -469,7 +469,7 @@ void BPFDAGToDAGISel::PreprocessTrunc(SDNode *Node, if (BaseV.getOpcode() != ISD::INTRINSIC_W_CHAIN) return; - unsigned IntNo = cast<ConstantSDNode>(BaseV->getOperand(1))->getZExtValue(); + unsigned IntNo = BaseV->getConstantOperandVal(1); uint64_t MaskV = MaskN->getZExtValue(); if (!((IntNo == Intrinsic::bpf_load_byte && MaskV == 0xFF) || diff --git a/llvm/lib/Target/CSKY/CSKYISelLowering.cpp b/llvm/lib/Target/CSKY/CSKYISelLowering.cpp index e3b4a2dc048a..90f70b83a02d 100644 --- a/llvm/lib/Target/CSKY/CSKYISelLowering.cpp +++ b/llvm/lib/Target/CSKY/CSKYISelLowering.cpp @@ -1219,7 +1219,7 @@ SDValue CSKYTargetLowering::LowerFRAMEADDR(SDValue Op, EVT VT = Op.getValueType(); SDLoc dl(Op); - unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); + unsigned Depth = Op.getConstantOperandVal(0); Register FrameReg = RI.getFrameRegister(MF); SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT); while (Depth--) @@ -1240,7 +1240,7 @@ SDValue CSKYTargetLowering::LowerRETURNADDR(SDValue Op, EVT VT = Op.getValueType(); SDLoc dl(Op); - unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); + unsigned Depth = Op.getConstantOperandVal(0); if (Depth) { SDValue FrameAddr = LowerFRAMEADDR(Op, DAG); SDValue Offset = DAG.getConstant(4, dl, MVT::i32); diff --git a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp index f930015026a5..eb5c59672224 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp +++ b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp @@ -192,7 +192,7 @@ MachineSDNode *HexagonDAGToDAGISel::LoadInstrForLoadIntrinsic(SDNode *IntN) { return nullptr; SDLoc dl(IntN); - unsigned IntNo = cast<ConstantSDNode>(IntN->getOperand(1))->getZExtValue(); + unsigned IntNo = IntN->getConstantOperandVal(1); static std::map<unsigned,unsigned> LoadPciMap = { { Intrinsic::hexagon_circ_ldb, Hexagon::L2_loadrb_pci }, @@ -284,18 +284,18 @@ bool HexagonDAGToDAGISel::tryLoadOfLoadIntrinsic(LoadSDNode *N) { // can provide an address of an unsigned variable to store the result of // a sign-extending intrinsic into (or the other way around). ISD::LoadExtType IntExt; - switch (cast<ConstantSDNode>(C->getOperand(1))->getZExtValue()) { - case Intrinsic::hexagon_circ_ldub: - case Intrinsic::hexagon_circ_lduh: - IntExt = ISD::ZEXTLOAD; - break; - case Intrinsic::hexagon_circ_ldw: - case Intrinsic::hexagon_circ_ldd: - IntExt = ISD::NON_EXTLOAD; - break; - default: - IntExt = ISD::SEXTLOAD; - break; + switch (C->getConstantOperandVal(1)) { + case Intrinsic::hexagon_circ_ldub: + case Intrinsic::hexagon_circ_lduh: + IntExt = ISD::ZEXTLOAD; + break; + case Intrinsic::hexagon_circ_ldw: + case Intrinsic::hexagon_circ_ldd: + IntExt = ISD::NON_EXTLOAD; + break; + default: + IntExt = ISD::SEXTLOAD; + break; } if (N->getExtensionType() != IntExt) return false; @@ -325,7 +325,7 @@ bool HexagonDAGToDAGISel::SelectBrevLdIntrinsic(SDNode *IntN) { return false; const SDLoc &dl(IntN); - unsigned IntNo = cast<ConstantSDNode>(IntN->getOperand(1))->getZExtValue(); + unsigned IntNo = IntN->getConstantOperandVal(1); static const std::map<unsigned, unsigned> LoadBrevMap = { { Intrinsic::hexagon_L2_loadrb_pbr, Hexagon::L2_loadrb_pbr }, @@ -366,7 +366,7 @@ bool HexagonDAGToDAGISel::SelectNewCircIntrinsic(SDNode *IntN) { return false; SDLoc DL(IntN); - unsigned IntNo = cast<ConstantSDNode>(IntN->getOperand(1))->getZExtValue(); + unsigned IntNo = IntN->getConstantOperandVal(1); SmallVector<SDValue, 7> Ops; static std::map<unsigned,unsigned> LoadNPcMap = { @@ -641,7 +641,7 @@ void HexagonDAGToDAGISel::SelectIntrinsicWChain(SDNode *N) { if (SelectNewCircIntrinsic(N)) return; - unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); + unsigned IntNo = N->getConstantOperandVal(1); if (IntNo == Intrinsic::hexagon_V6_vgathermw || IntNo == Intrinsic::hexagon_V6_vgathermw_128B || IntNo == Intrinsic::hexagon_V6_vgathermh || @@ -665,7 +665,7 @@ void HexagonDAGToDAGISel::SelectIntrinsicWChain(SDNode *N) { } void HexagonDAGToDAGISel::SelectIntrinsicWOChain(SDNode *N) { - unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); + unsigned IID = N->getConstantOperandVal(0); unsigned Bits; switch (IID) { case Intrinsic::hexagon_S2_vsplatrb: diff --git a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp index e08566718d7c..fb156f2583e8 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp +++ b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp @@ -2895,7 +2895,7 @@ void HexagonDAGToDAGISel::SelectV65GatherPred(SDNode *N) { SDValue ImmOperand = CurDAG->getTargetConstant(0, dl, MVT::i32); unsigned Opcode; - unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); + unsigned IntNo = N->getConstantOperandVal(1); switch (IntNo) { default: llvm_unreachable("Unexpected HVX gather intrinsic."); @@ -2934,7 +2934,7 @@ void HexagonDAGToDAGISel::SelectV65Gather(SDNode *N) { SDValue ImmOperand = CurDAG->getTargetConstant(0, dl, MVT::i32); unsigned Opcode; - unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); + unsigned IntNo = N->getConstantOperandVal(1); switch (IntNo) { default: llvm_unreachable("Unexpected HVX gather intrinsic."); @@ -2963,7 +2963,7 @@ void HexagonDAGToDAGISel::SelectV65Gather(SDNode *N) { } void HexagonDAGToDAGISel::SelectHVXDualOutput(SDNode *N) { - unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); + unsigned IID = N->getConstantOperandVal(0); SDNode *Result; switch (IID) { case Intrinsic::hexagon_V6_vaddcarry: { diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp index a7d452e7227d..51138091f4a5 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp +++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp @@ -669,8 +669,7 @@ HexagonTargetLowering::LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const { --NumOps; // Ignore the flag operand. for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) { - const InlineAsm::Flag Flags( - cast<ConstantSDNode>(Op.getOperand(i))->getZExtValue()); + const InlineAsm::Flag Flags(Op.getConstantOperandVal(i)); unsigned NumVals = Flags.getNumOperandRegisters(); ++i; // Skip the ID value. @@ -729,7 +728,7 @@ SDValue HexagonTargetLowering::LowerREADCYCLECOUNTER(SDValue Op, SDValue HexagonTargetLowering::LowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const { SDValue Chain = Op.getOperand(0); - unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); + unsigned IntNo = Op.getConstantOperandVal(1); // Lower the hexagon_prefetch builtin to DCFETCH, as above. if (IntNo == Intrinsic::hexagon_prefetch) { SDValue Addr = Op.getOperand(2); @@ -1176,7 +1175,7 @@ HexagonTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); SDLoc dl(Op); - unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); + unsigned Depth = Op.getConstantOperandVal(0); if (Depth) { SDValue FrameAddr = LowerFRAMEADDR(Op, DAG); SDValue Offset = DAG.getConstant(4, dl, MVT::i32); @@ -1198,7 +1197,7 @@ HexagonTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); SDLoc dl(Op); - unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); + unsigned Depth = Op.getConstantOperandVal(0); SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, HRI.getFrameRegister(), VT); while (Depth--) diff --git a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp index db416a500f59..665e2d79c83d 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp +++ b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp @@ -2127,7 +2127,7 @@ HexagonTargetLowering::LowerHvxFunnelShift(SDValue Op, SDValue HexagonTargetLowering::LowerHvxIntrinsic(SDValue Op, SelectionDAG &DAG) const { const SDLoc &dl(Op); - unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); + unsigned IntNo = Op.getConstantOperandVal(0); SmallVector<SDValue> Ops(Op->ops().begin(), Op->ops().end()); auto Swap = [&](SDValue P) { @@ -2922,7 +2922,7 @@ SDValue HexagonTargetLowering::RemoveTLWrapper(SDValue Op, SelectionDAG &DAG) const { assert(Op.getOpcode() == HexagonISD::TL_EXTEND || Op.getOpcode() == HexagonISD::TL_TRUNCATE); - unsigned Opc = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue(); + unsigned Opc = Op.getConstantOperandVal(2); return DAG.getNode(Opc, SDLoc(Op), ty(Op), Op.getOperand(0)); } diff --git a/llvm/lib/Target/Lanai/LanaiISelLowering.cpp b/llvm/lib/Target/Lanai/LanaiISelLowering.cpp index cbb5c2b998e2..17d7ffb586f4 100644 --- a/llvm/lib/Target/Lanai/LanaiISelLowering.cpp +++ b/llvm/lib/Target/Lanai/LanaiISelLowering.cpp @@ -1057,7 +1057,7 @@ SDValue LanaiTargetLowering::LowerRETURNADDR(SDValue Op, EVT VT = Op.getValueType(); SDLoc DL(Op); - unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); + unsigned Depth = Op.getConstantOperandVal(0); if (Depth) { SDValue FrameAddr = LowerFRAMEADDR(Op, DAG); const unsigned Offset = -4; @@ -1080,7 +1080,7 @@ SDValue LanaiTargetLowering::LowerFRAMEADDR(SDValue Op, EVT VT = Op.getValueType(); SDLoc DL(Op); SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, Lanai::FP, VT); - unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); + unsigned Depth = Op.getConstantOperandVal(0); while (Depth--) { const unsigned Offset = -8; SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr, diff --git a/llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp b/llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp index 276374afee38..66a37fce5dda 100644 --- a/llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp +++ b/llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp @@ -85,7 +85,7 @@ class LoongArchAsmParser : public MCTargetAsmParser { // "emitLoadAddress*" functions. void emitLAInstSeq(MCRegister DestReg, MCRegister TmpReg, const MCExpr *Symbol, SmallVectorImpl<Inst> &Insts, - SMLoc IDLoc, MCStreamer &Out); + SMLoc IDLoc, MCStreamer &Out, bool RelaxHint = false); // Helper to emit pseudo instruction "la.abs $rd, sym". void emitLoadAddressAbs(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out); @@ -748,12 +748,14 @@ bool LoongArchAsmParser::ParseInstruction(ParseInstructionInfo &Info, void LoongArchAsmParser::emitLAInstSeq(MCRegister DestReg, MCRegister TmpReg, const MCExpr *Symbol, SmallVectorImpl<Inst> &Insts, - SMLoc IDLoc, MCStreamer &Out) { + SMLoc IDLoc, MCStreamer &Out, + bool RelaxHint) { MCContext &Ctx = getContext(); for (LoongArchAsmParser::Inst &Inst : Insts) { unsigned Opc = Inst.Opc; LoongArchMCExpr::VariantKind VK = Inst.VK; - const LoongArchMCExpr *LE = LoongArchMCExpr::create(Symbol, VK, Ctx); + const LoongArchMCExpr *LE = + LoongArchMCExpr::create(Symbol, VK, Ctx, RelaxHint); switch (Opc) { default: llvm_unreachable("unexpected opcode"); @@ -854,7 +856,7 @@ void LoongArchAsmParser::emitLoadAddressPcrel(MCInst &Inst, SMLoc IDLoc, Insts.push_back( LoongArchAsmParser::Inst(ADDI, LoongArchMCExpr::VK_LoongArch_PCALA_LO12)); - emitLAInstSeq(DestReg, DestReg, Symbol, Insts, IDLoc, Out); + emitLAInstSeq(DestReg, DestReg, Symbol, Insts, IDLoc, Out, true); } void LoongArchAsmParser::emitLoadAddressPcrelLarge(MCInst &Inst, SMLoc IDLoc, @@ -900,7 +902,7 @@ void LoongArchAsmParser::emitLoadAddressGot(MCInst &Inst, SMLoc IDLoc, Insts.push_back( LoongArchAsmParser::Inst(LD, LoongArchMCExpr::VK_LoongArch_GOT_PC_LO12)); - emitLAInstSeq(DestReg, DestReg, Symbol, Insts, IDLoc, Out); + emitLAInstSeq(DestReg, DestReg, Symbol, Insts, IDLoc, Out, true); } void LoongArchAsmParser::emitLoadAddressGotLarge(MCInst &Inst, SMLoc IDLoc, diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp index 4794a131edae..e14bbadf9ed2 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp @@ -286,7 +286,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, setOperationAction(ISD::UNDEF, VT, Legal); setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); - setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Legal); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); setOperationAction(ISD::BUILD_VECTOR, VT, Custom); setOperationAction(ISD::SETCC, VT, Legal); @@ -406,6 +406,8 @@ SDValue LoongArchTargetLowering::LowerOperation(SDValue Op, return lowerWRITE_REGISTER(Op, DAG); case ISD::INSERT_VECTOR_ELT: return lowerINSERT_VECTOR_ELT(Op, DAG); + case ISD::EXTRACT_VECTOR_ELT: + return lowerEXTRACT_VECTOR_ELT(Op, DAG); case ISD::BUILD_VECTOR: return lowerBUILD_VECTOR(Op, DAG); case ISD::VECTOR_SHUFFLE: @@ -514,6 +516,23 @@ SDValue LoongArchTargetLowering::lowerBUILD_VECTOR(SDValue Op, } SDValue +LoongArchTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op, + SelectionDAG &DAG) const { + EVT VecTy = Op->getOperand(0)->getValueType(0); + SDValue Idx = Op->getOperand(1); + EVT EltTy = VecTy.getVectorElementType(); + unsigned NumElts = VecTy.getVectorNumElements(); + + if (isa<ConstantSDNode>(Idx) && + (EltTy == MVT::i32 || EltTy == MVT::i64 || EltTy == MVT::f32 || + EltTy == MVT::f64 || + cast<ConstantSDNode>(Idx)->getZExtValue() < NumElts / 2)) + return Op; + + return SDValue(); +} + +SDValue LoongArchTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const { if (isa<ConstantSDNode>(Op->getOperand(2))) @@ -569,7 +588,7 @@ SDValue LoongArchTargetLowering::lowerFRAMEADDR(SDValue Op, EVT VT = Op.getValueType(); SDLoc DL(Op); SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT); - unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); + unsigned Depth = Op.getConstantOperandVal(0); int GRLenInBytes = Subtarget.getGRLen() / 8; while (Depth--) { @@ -588,7 +607,7 @@ SDValue LoongArchTargetLowering::lowerRETURNADDR(SDValue Op, return SDValue(); // Currently only support lowering return address for current frame. - if (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue() != 0) { + if (Op.getConstantOperandVal(0) != 0) { DAG.getContext()->emitError( "return address can only be determined for the current frame"); return SDValue(); @@ -1244,7 +1263,7 @@ LoongArchTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op, return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgReqLA64, DAG); case Intrinsic::loongarch_csrrd_w: case Intrinsic::loongarch_csrrd_d: { - unsigned Imm = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue(); + unsigned Imm = Op.getConstantOperandVal(2); return !isUInt<14>(Imm) ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG) : DAG.getNode(LoongArchISD::CSRRD, DL, {GRLenVT, MVT::Other}, @@ -1252,7 +1271,7 @@ LoongArchTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op, } case Intrinsic::loongarch_csrwr_w: case Intrinsic::loongarch_csrwr_d: { - unsigned Imm = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue(); + unsigned Imm = Op.getConstantOperandVal(3); return !isUInt<14>(Imm) ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG) : DAG.getNode(LoongArchISD::CSRWR, DL, {GRLenVT, MVT::Other}, @@ -1261,7 +1280,7 @@ LoongArchTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op, } case Intrinsic::loongarch_csrxchg_w: case Intrinsic::loongarch_csrxchg_d: { - unsigned Imm = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue(); + unsigned Imm = Op.getConstantOperandVal(4); return !isUInt<14>(Imm) ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG) : DAG.getNode(LoongArchISD::CSRXCHG, DL, {GRLenVT, MVT::Other}, @@ -1287,7 +1306,7 @@ LoongArchTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op, {Chain, Op.getOperand(2)}); } case Intrinsic::loongarch_lddir_d: { - unsigned Imm = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue(); + unsigned Imm = Op.getConstantOperandVal(3); return !isUInt<8>(Imm) ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG) : Op; @@ -1295,7 +1314,7 @@ LoongArchTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op, case Intrinsic::loongarch_movfcsr2gr: { if (!Subtarget.hasBasicF()) return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgReqF, DAG); - unsigned Imm = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue(); + unsigned Imm = Op.getConstantOperandVal(2); return !isUInt<2>(Imm) ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG) : DAG.getNode(LoongArchISD::MOVFCSR2GR, DL, {VT, MVT::Other}, @@ -1441,7 +1460,7 @@ SDValue LoongArchTargetLowering::lowerINTRINSIC_VOID(SDValue Op, ASRT_LE_GT_CASE(asrtgt_d) #undef ASRT_LE_GT_CASE case Intrinsic::loongarch_ldpte_d: { - unsigned Imm = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue(); + unsigned Imm = Op.getConstantOperandVal(3); return !Subtarget.is64Bit() ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG) : !isUInt<8>(Imm) ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG) @@ -1454,53 +1473,53 @@ SDValue LoongArchTargetLowering::lowerINTRINSIC_VOID(SDValue Op, : SDValue(); case Intrinsic::loongarch_lasx_xvstelm_b: return (!isInt<8>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) || - !isUInt<5>(cast<ConstantSDNode>(Op.getOperand(5))->getZExtValue())) + !isUInt<5>(Op.getConstantOperandVal(5))) ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG) : SDValue(); case Intrinsic::loongarch_lsx_vstelm_b: return (!isInt<8>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) || - !isUInt<4>(cast<ConstantSDNode>(Op.getOperand(5))->getZExtValue())) + !isUInt<4>(Op.getConstantOperandVal(5))) ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG) : SDValue(); case Intrinsic::loongarch_lasx_xvstelm_h: return (!isShiftedInt<8, 1>( cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) || - !isUInt<4>(cast<ConstantSDNode>(Op.getOperand(5))->getZExtValue())) + !isUInt<4>(Op.getConstantOperandVal(5))) ? emitIntrinsicErrorMessage( Op, "argument out of range or not a multiple of 2", DAG) : SDValue(); case Intrinsic::loongarch_lsx_vstelm_h: return (!isShiftedInt<8, 1>( cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) || - !isUInt<3>(cast<ConstantSDNode>(Op.getOperand(5))->getZExtValue())) + !isUInt<3>(Op.getConstantOperandVal(5))) ? emitIntrinsicErrorMessage( Op, "argument out of range or not a multiple of 2", DAG) : SDValue(); case Intrinsic::loongarch_lasx_xvstelm_w: return (!isShiftedInt<8, 2>( cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) || - !isUInt<3>(cast<ConstantSDNode>(Op.getOperand(5))->getZExtValue())) + !isUInt<3>(Op.getConstantOperandVal(5))) ? emitIntrinsicErrorMessage( Op, "argument out of range or not a multiple of 4", DAG) : SDValue(); case Intrinsic::loongarch_lsx_vstelm_w: return (!isShiftedInt<8, 2>( cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) || - !isUInt<2>(cast<ConstantSDNode>(Op.getOperand(5))->getZExtValue())) + !isUInt<2>(Op.getConstantOperandVal(5))) ? emitIntrinsicErrorMessage( Op, "argument out of range or not a multiple of 4", DAG) : SDValue(); case Intrinsic::loongarch_lasx_xvstelm_d: return (!isShiftedInt<8, 3>( cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) || - !isUInt<2>(cast<ConstantSDNode>(Op.getOperand(5))->getZExtValue())) + !isUInt<2>(Op.getConstantOperandVal(5))) ? emitIntrinsicErrorMessage( Op, "argument out of range or not a multiple of 8", DAG) : SDValue(); case Intrinsic::loongarch_lsx_vstelm_d: return (!isShiftedInt<8, 3>( cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) || - !isUInt<1>(cast<ConstantSDNode>(Op.getOperand(5))->getZExtValue())) + !isUInt<1>(Op.getConstantOperandVal(5))) ? emitIntrinsicErrorMessage( Op, "argument out of range or not a multiple of 8", DAG) : SDValue(); @@ -1673,7 +1692,7 @@ replaceVPICKVE2GRResults(SDNode *Node, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, unsigned ResOp) { const StringRef ErrorMsgOOR = "argument out of range"; - unsigned Imm = cast<ConstantSDNode>(Node->getOperand(2))->getZExtValue(); + unsigned Imm = Node->getConstantOperandVal(2); if (!isUInt<N>(Imm)) { emitErrorAndReplaceIntrinsicResults(Node, Results, DAG, ErrorMsgOOR, /*WithChain=*/false); @@ -1976,7 +1995,7 @@ void LoongArchTargetLowering::ReplaceNodeResults( break; } case Intrinsic::loongarch_csrwr_w: { - unsigned Imm = cast<ConstantSDNode>(N->getOperand(3))->getZExtValue(); + unsigned Imm = N->getConstantOperandVal(3); if (!isUInt<14>(Imm)) { emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR); return; @@ -1991,7 +2010,7 @@ void LoongArchTargetLowering::ReplaceNodeResults( break; } case Intrinsic::loongarch_csrxchg_w: { - unsigned Imm = cast<ConstantSDNode>(N->getOperand(4))->getZExtValue(); + unsigned Imm = N->getConstantOperandVal(4); if (!isUInt<14>(Imm)) { emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR); return; diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h index 2d73a7394946..6f8878f9ccd5 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h @@ -279,6 +279,7 @@ private: SDValue lowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; SDValue lowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const; SDValue lowerWRITE_REGISTER(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; SDValue lowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const; SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td index ec6983d0f487..b3c11bc5423d 100644 --- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td @@ -1571,11 +1571,11 @@ def : Pat<(loongarch_vreplve v8i32:$xj, GRLenVT:$rk), def : Pat<(loongarch_vreplve v4i64:$xj, GRLenVT:$rk), (XVREPLVE_D v4i64:$xj, GRLenVT:$rk)>; -// XVREPL128VEI_{W/D} +// XVREPLVE0_{W/D} def : Pat<(lasxsplatf32 FPR32:$fj), - (XVREPL128VEI_W (SUBREG_TO_REG (i64 0), FPR32:$fj, sub_32), 0)>; + (XVREPLVE0_W (SUBREG_TO_REG (i64 0), FPR32:$fj, sub_32))>; def : Pat<(lasxsplatf64 FPR64:$fj), - (XVREPL128VEI_D (SUBREG_TO_REG (i64 0), FPR64:$fj, sub_64), 0)>; + (XVREPLVE0_D (SUBREG_TO_REG (i64 0), FPR64:$fj, sub_64))>; // Loads/Stores foreach vt = [v32i8, v16i16, v8i32, v4i64, v8f32, v4f64] in { @@ -1590,42 +1590,18 @@ def : Pat<(i64 (vector_extract v32i8:$xj, uimm4:$imm)), (VPICKVE2GR_B (EXTRACT_SUBREG v32i8:$xj, sub_128), uimm4:$imm)>; def : Pat<(i64 (vector_extract v16i16:$xj, uimm3:$imm)), (VPICKVE2GR_H (EXTRACT_SUBREG v16i16:$xj, sub_128), uimm3:$imm)>; -def : Pat<(i64 (vector_extract v8i32:$xj, uimm2:$imm)), - (VPICKVE2GR_W (EXTRACT_SUBREG v8i32:$xj, sub_128), uimm2:$imm)>; -def : Pat<(i64 (vector_extract v4i64:$xj, uimm1:$imm)), - (VPICKVE2GR_D (EXTRACT_SUBREG v4i64:$xj, sub_128), uimm1:$imm)>; -def : Pat<(f32 (vector_extract v8f32:$xj, uimm2:$imm)), - (f32 (EXTRACT_SUBREG (XVREPL128VEI_W v8f32:$xj, uimm2:$imm), sub_32))>; -def : Pat<(f64 (vector_extract v4f64:$xj, uimm1:$imm)), - (f64 (EXTRACT_SUBREG (XVREPL128VEI_D v4f64:$xj, uimm1:$imm), sub_64))>; - -// Vector extraction with variable index. -def : Pat<(i64 (vector_extract v32i8:$xj, i64:$rk)), - (SRAI_W (COPY_TO_REGCLASS (f32 (EXTRACT_SUBREG (XVREPLVE_B v32i8:$xj, - i64:$rk), - sub_32)), - GPR), (i64 24))>; -def : Pat<(i64 (vector_extract v16i16:$xj, i64:$rk)), - (SRAI_W (COPY_TO_REGCLASS (f32 (EXTRACT_SUBREG (XVREPLVE_H v16i16:$xj, - i64:$rk), - sub_32)), - GPR), (i64 16))>; -def : Pat<(i64 (vector_extract v8i32:$xj, i64:$rk)), - (COPY_TO_REGCLASS (f32 (EXTRACT_SUBREG (XVREPLVE_W v8i32:$xj, i64:$rk), - sub_32)), - GPR)>; -def : Pat<(i64 (vector_extract v4i64:$xj, i64:$rk)), - (COPY_TO_REGCLASS (f64 (EXTRACT_SUBREG (XVREPLVE_D v4i64:$xj, i64:$rk), - sub_64)), - GPR)>; -def : Pat<(f32 (vector_extract v8f32:$xj, i64:$rk)), - (f32 (EXTRACT_SUBREG (XVREPLVE_W v8f32:$xj, i64:$rk), sub_32))>; -def : Pat<(f64 (vector_extract v4f64:$xj, i64:$rk)), - (f64 (EXTRACT_SUBREG (XVREPLVE_D v4f64:$xj, i64:$rk), sub_64))>; +def : Pat<(i64 (vector_extract v8i32:$xj, uimm3:$imm)), + (XVPICKVE2GR_W v8i32:$xj, uimm3:$imm)>; +def : Pat<(i64 (vector_extract v4i64:$xj, uimm2:$imm)), + (XVPICKVE2GR_D v4i64:$xj, uimm2:$imm)>; +def : Pat<(f32 (vector_extract v8f32:$xj, uimm3:$imm)), + (MOVGR2FR_W (XVPICKVE2GR_W v8f32:$xj, uimm3:$imm))>; +def : Pat<(f64 (vector_extract v4f64:$xj, uimm2:$imm)), + (MOVGR2FR_D (XVPICKVE2GR_D v4f64:$xj, uimm2:$imm))>; // vselect -def : Pat<(v32i8 (vselect LASX256:$xj, LASX256:$xd, - (v32i8 (SplatPat_uimm8 uimm8:$imm)))), +def : Pat<(v32i8 (vselect LASX256:$xd, (v32i8 (SplatPat_uimm8 uimm8:$imm)), + LASX256:$xj)), (XVBITSELI_B LASX256:$xd, LASX256:$xj, uimm8:$imm)>; foreach vt = [v32i8, v16i16, v8i32, v4i64, v8f32, v4f64] in def : Pat<(vt (vselect LASX256:$xa, LASX256:$xk, LASX256:$xj)), diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td index e468176885d7..5569c2cd15b5 100644 --- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td @@ -1731,8 +1731,8 @@ def : Pat<(f64 (vector_extract v2f64:$vj, i64:$rk)), (f64 (EXTRACT_SUBREG (VREPLVE_D v2f64:$vj, i64:$rk), sub_64))>; // vselect -def : Pat<(v16i8 (vselect LSX128:$vj, LSX128:$vd, - (v16i8 (SplatPat_uimm8 uimm8:$imm)))), +def : Pat<(v16i8 (vselect LSX128:$vd, (v16i8 (SplatPat_uimm8 uimm8:$imm)), + LSX128:$vj)), (VBITSELI_B LSX128:$vd, LSX128:$vj, uimm8:$imm)>; foreach vt = [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64] in def : Pat<(vt (vselect LSX128:$va, LSX128:$vk, LSX128:$vj)), diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp index 45169becca37..d2ea062dc09a 100644 --- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp +++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp @@ -19,6 +19,7 @@ #include "llvm/MC/MCInstBuilder.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCSubtargetInfo.h" #include "llvm/Support/Casting.h" #include "llvm/Support/EndianStream.h" @@ -120,12 +121,15 @@ LoongArchMCCodeEmitter::getExprOpValue(const MCInst &MI, const MCOperand &MO, SmallVectorImpl<MCFixup> &Fixups, const MCSubtargetInfo &STI) const { assert(MO.isExpr() && "getExprOpValue expects only expressions"); + bool RelaxCandidate = false; + bool EnableRelax = STI.hasFeature(LoongArch::FeatureRelax); const MCExpr *Expr = MO.getExpr(); MCExpr::ExprKind Kind = Expr->getKind(); LoongArch::Fixups FixupKind = LoongArch::fixup_loongarch_invalid; if (Kind == MCExpr::Target) { const LoongArchMCExpr *LAExpr = cast<LoongArchMCExpr>(Expr); + RelaxCandidate = LAExpr->getRelaxHint(); switch (LAExpr->getKind()) { case LoongArchMCExpr::VK_LoongArch_None: case LoongArchMCExpr::VK_LoongArch_Invalid: @@ -270,6 +274,15 @@ LoongArchMCCodeEmitter::getExprOpValue(const MCInst &MI, const MCOperand &MO, Fixups.push_back( MCFixup::create(0, Expr, MCFixupKind(FixupKind), MI.getLoc())); + + // Emit an R_LARCH_RELAX if linker relaxation is enabled and LAExpr has relax + // hint. + if (EnableRelax && RelaxCandidate) { + const MCConstantExpr *Dummy = MCConstantExpr::create(0, Ctx); + Fixups.push_back(MCFixup::create( + 0, Dummy, MCFixupKind(LoongArch::fixup_loongarch_relax), MI.getLoc())); + } + return 0; } diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCExpr.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCExpr.cpp index 993111552a31..82c992b1cc8c 100644 --- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCExpr.cpp +++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCExpr.cpp @@ -25,9 +25,10 @@ using namespace llvm; #define DEBUG_TYPE "loongarch-mcexpr" -const LoongArchMCExpr * -LoongArchMCExpr::create(const MCExpr *Expr, VariantKind Kind, MCContext &Ctx) { - return new (Ctx) LoongArchMCExpr(Expr, Kind); +const LoongArchMCExpr *LoongArchMCExpr::create(const MCExpr *Expr, + VariantKind Kind, MCContext &Ctx, + bool Hint) { + return new (Ctx) LoongArchMCExpr(Expr, Kind, Hint); } void LoongArchMCExpr::printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const { diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCExpr.h b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCExpr.h index 0945cf82db86..93251f824103 100644 --- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCExpr.h +++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCExpr.h @@ -67,16 +67,18 @@ public: private: const MCExpr *Expr; const VariantKind Kind; + const bool RelaxHint; - explicit LoongArchMCExpr(const MCExpr *Expr, VariantKind Kind) - : Expr(Expr), Kind(Kind) {} + explicit LoongArchMCExpr(const MCExpr *Expr, VariantKind Kind, bool Hint) + : Expr(Expr), Kind(Kind), RelaxHint(Hint) {} public: static const LoongArchMCExpr *create(const MCExpr *Expr, VariantKind Kind, - MCContext &Ctx); + MCContext &Ctx, bool Hint = false); VariantKind getKind() const { return Kind; } const MCExpr *getSubExpr() const { return Expr; } + bool getRelaxHint() const { return RelaxHint; } void printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const override; bool evaluateAsRelocatableImpl(MCValue &Res, const MCAsmLayout *Layout, diff --git a/llvm/lib/Target/M68k/M68kExpandPseudo.cpp b/llvm/lib/Target/M68k/M68kExpandPseudo.cpp index 7bd382107773..7fcc65beaa65 100644 --- a/llvm/lib/Target/M68k/M68kExpandPseudo.cpp +++ b/llvm/lib/Target/M68k/M68kExpandPseudo.cpp @@ -161,6 +161,16 @@ bool M68kExpandPseudo::ExpandMI(MachineBasicBlock &MBB, return TII->ExpandMOVSZX_RM(MIB, false, TII->get(M68k::MOV16rf), MVT::i32, MVT::i16); + case M68k::MOVSXd16q8: + return TII->ExpandMOVSZX_RM(MIB, true, TII->get(M68k::MOV8dq), MVT::i16, + MVT::i8); + case M68k::MOVSXd32q8: + return TII->ExpandMOVSZX_RM(MIB, true, TII->get(M68k::MOV8dq), MVT::i32, + MVT::i8); + case M68k::MOVSXd32q16: + return TII->ExpandMOVSZX_RM(MIB, true, TII->get(M68k::MOV16dq), MVT::i32, + MVT::i16); + case M68k::MOVZXd16q8: return TII->ExpandMOVSZX_RM(MIB, false, TII->get(M68k::MOV8dq), MVT::i16, MVT::i8); diff --git a/llvm/lib/Target/M68k/M68kISelLowering.cpp b/llvm/lib/Target/M68k/M68kISelLowering.cpp index 0830cc7feb22..c4d7a0dec7f3 100644 --- a/llvm/lib/Target/M68k/M68kISelLowering.cpp +++ b/llvm/lib/Target/M68k/M68kISelLowering.cpp @@ -94,11 +94,10 @@ M68kTargetLowering::M68kTargetLowering(const M68kTargetMachine &TM, setOperationAction(OP, MVT::i16, Expand); } - // FIXME It would be better to use a custom lowering for (auto OP : {ISD::SMULO, ISD::UMULO}) { - setOperationAction(OP, MVT::i8, Expand); - setOperationAction(OP, MVT::i16, Expand); - setOperationAction(OP, MVT::i32, Expand); + setOperationAction(OP, MVT::i8, Custom); + setOperationAction(OP, MVT::i16, Custom); + setOperationAction(OP, MVT::i32, Custom); } for (auto OP : {ISD::SHL_PARTS, ISD::SRA_PARTS, ISD::SRL_PARTS}) @@ -1533,46 +1532,119 @@ bool M68kTargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT, return VT.bitsLE(MVT::i32) || Subtarget.atLeastM68020(); } -SDValue M68kTargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) const { - // Lower the "add/sub/mul with overflow" instruction into a regular ins plus - // a "setcc" instruction that checks the overflow flag. The "brcond" lowering - // looks for this combo and may remove the "setcc" instruction if the "setcc" - // has only one use. +static bool isOverflowArithmetic(unsigned Opcode) { + switch (Opcode) { + case ISD::UADDO: + case ISD::SADDO: + case ISD::USUBO: + case ISD::SSUBO: + case ISD::UMULO: + case ISD::SMULO: + return true; + default: + return false; + } +} + +static void lowerOverflowArithmetic(SDValue Op, SelectionDAG &DAG, + SDValue &Result, SDValue &CCR, + unsigned &CC) { SDNode *N = Op.getNode(); + EVT VT = N->getValueType(0); SDValue LHS = N->getOperand(0); SDValue RHS = N->getOperand(1); - unsigned BaseOp = 0; - unsigned Cond = 0; SDLoc DL(Op); + + unsigned TruncOp = 0; + auto PromoteMULO = [&](unsigned ExtOp) { + // We don't have 8-bit multiplications, so promote i8 version of U/SMULO + // to i16. + // Ideally this should be done by legalizer but sadly there is no promotion + // rule for U/SMULO at this moment. + if (VT == MVT::i8) { + LHS = DAG.getNode(ExtOp, DL, MVT::i16, LHS); + RHS = DAG.getNode(ExtOp, DL, MVT::i16, RHS); + VT = MVT::i16; + TruncOp = ISD::TRUNCATE; + } + }; + + bool NoOverflow = false; + unsigned BaseOp = 0; switch (Op.getOpcode()) { default: llvm_unreachable("Unknown ovf instruction!"); case ISD::SADDO: BaseOp = M68kISD::ADD; - Cond = M68k::COND_VS; + CC = M68k::COND_VS; break; case ISD::UADDO: BaseOp = M68kISD::ADD; - Cond = M68k::COND_CS; + CC = M68k::COND_CS; break; case ISD::SSUBO: BaseOp = M68kISD::SUB; - Cond = M68k::COND_VS; + CC = M68k::COND_VS; break; case ISD::USUBO: BaseOp = M68kISD::SUB; - Cond = M68k::COND_CS; + CC = M68k::COND_CS; + break; + case ISD::UMULO: + PromoteMULO(ISD::ZERO_EXTEND); + NoOverflow = VT != MVT::i32; + BaseOp = NoOverflow ? ISD::MUL : M68kISD::UMUL; + CC = M68k::COND_VS; + break; + case ISD::SMULO: + PromoteMULO(ISD::SIGN_EXTEND); + NoOverflow = VT != MVT::i32; + BaseOp = NoOverflow ? ISD::MUL : M68kISD::SMUL; + CC = M68k::COND_VS; break; } - // Also sets CCR. - SDVTList VTs = DAG.getVTList(N->getValueType(0), MVT::i8); + SDVTList VTs; + if (NoOverflow) + VTs = DAG.getVTList(VT); + else + // Also sets CCR. + VTs = DAG.getVTList(VT, MVT::i8); + SDValue Arith = DAG.getNode(BaseOp, DL, VTs, LHS, RHS); - SDValue SetCC = DAG.getNode(M68kISD::SETCC, DL, N->getValueType(1), - DAG.getConstant(Cond, DL, MVT::i8), - SDValue(Arith.getNode(), 1)); + Result = Arith.getValue(0); + if (TruncOp) + // Right now the only place to truncate is from i16 to i8. + Result = DAG.getNode(TruncOp, DL, MVT::i8, Arith); - return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Arith, SetCC); + if (NoOverflow) + CCR = DAG.getConstant(0, DL, N->getValueType(1)); + else + CCR = Arith.getValue(1); +} + +SDValue M68kTargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) const { + SDNode *N = Op.getNode(); + SDLoc DL(Op); + + // Lower the "add/sub/mul with overflow" instruction into a regular ins plus + // a "setcc" instruction that checks the overflow flag. + SDValue Result, CCR; + unsigned CC; + lowerOverflowArithmetic(Op, DAG, Result, CCR, CC); + + SDValue Overflow; + if (isa<ConstantSDNode>(CCR)) { + // It's likely a result of operations that will not overflow + // hence no setcc is needed. + Overflow = CCR; + } else { + // Generate a M68kISD::SETCC. + Overflow = DAG.getNode(M68kISD::SETCC, DL, N->getValueType(1), + DAG.getConstant(CC, DL, MVT::i8), CCR); + } + + return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, Overflow); } /// Create a BTST (Bit Test) node - Test bit \p BitNo in \p Src and set @@ -2206,8 +2278,7 @@ SDValue M68kTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { isNullConstant(Cond.getOperand(1).getOperand(0))) { SDValue Cmp = Cond.getOperand(1); - unsigned CondCode = - cast<ConstantSDNode>(Cond.getOperand(0))->getZExtValue(); + unsigned CondCode = Cond.getConstantOperandVal(0); if ((isAllOnesConstant(Op1) || isAllOnesConstant(Op2)) && (CondCode == M68k::COND_EQ || CondCode == M68k::COND_NE)) { @@ -2269,55 +2340,12 @@ SDValue M68kTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { Cond = Cmp; addTest = false; } - } else if (CondOpcode == ISD::USUBO || CondOpcode == ISD::SSUBO || - CondOpcode == ISD::UADDO || CondOpcode == ISD::SADDO || - CondOpcode == ISD::UMULO || CondOpcode == ISD::SMULO) { - SDValue LHS = Cond.getOperand(0); - SDValue RHS = Cond.getOperand(1); - unsigned MxOpcode; - unsigned MxCond; - SDVTList VTs; - switch (CondOpcode) { - case ISD::UADDO: - MxOpcode = M68kISD::ADD; - MxCond = M68k::COND_CS; - break; - case ISD::SADDO: - MxOpcode = M68kISD::ADD; - MxCond = M68k::COND_VS; - break; - case ISD::USUBO: - MxOpcode = M68kISD::SUB; - MxCond = M68k::COND_CS; - break; - case ISD::SSUBO: - MxOpcode = M68kISD::SUB; - MxCond = M68k::COND_VS; - break; - case ISD::UMULO: - MxOpcode = M68kISD::UMUL; - MxCond = M68k::COND_VS; - break; - case ISD::SMULO: - MxOpcode = M68kISD::SMUL; - MxCond = M68k::COND_VS; - break; - default: - llvm_unreachable("unexpected overflowing operator"); - } - if (CondOpcode == ISD::UMULO) - VTs = DAG.getVTList(LHS.getValueType(), LHS.getValueType(), MVT::i32); - else - VTs = DAG.getVTList(LHS.getValueType(), MVT::i32); - - SDValue MxOp = DAG.getNode(MxOpcode, DL, VTs, LHS, RHS); - - if (CondOpcode == ISD::UMULO) - Cond = MxOp.getValue(2); - else - Cond = MxOp.getValue(1); - - CC = DAG.getConstant(MxCond, DL, MVT::i8); + } else if (isOverflowArithmetic(CondOpcode)) { + // Result is unused here. + SDValue Result; + unsigned CCode; + lowerOverflowArithmetic(Cond, DAG, Result, Cond, CCode); + CC = DAG.getConstant(CCode, DL, MVT::i8); addTest = false; } @@ -2377,6 +2405,17 @@ SDValue M68kTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { } } + // Simple optimization when Cond is a constant to avoid generating + // M68kISD::CMOV if possible. + // TODO: Generalize this to use SelectionDAG::computeKnownBits. + if (auto *Const = dyn_cast<ConstantSDNode>(Cond.getNode())) { + const APInt &C = Const->getAPIntValue(); + if (C.countr_zero() >= 5) + return Op2; + else if (C.countr_one() >= 5) + return Op1; + } + // M68kISD::CMOV means set the result (which is operand 1) to the RHS if // condition is true. SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue); @@ -2466,61 +2505,15 @@ SDValue M68kTargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const { } } CondOpcode = Cond.getOpcode(); - if (CondOpcode == ISD::UADDO || CondOpcode == ISD::SADDO || - CondOpcode == ISD::USUBO || CondOpcode == ISD::SSUBO) { - SDValue LHS = Cond.getOperand(0); - SDValue RHS = Cond.getOperand(1); - unsigned MxOpcode; - unsigned MxCond; - SDVTList VTs; - // Keep this in sync with LowerXALUO, otherwise we might create redundant - // instructions that can't be removed afterwards (i.e. M68kISD::ADD and - // M68kISD::INC). - switch (CondOpcode) { - case ISD::UADDO: - MxOpcode = M68kISD::ADD; - MxCond = M68k::COND_CS; - break; - case ISD::SADDO: - MxOpcode = M68kISD::ADD; - MxCond = M68k::COND_VS; - break; - case ISD::USUBO: - MxOpcode = M68kISD::SUB; - MxCond = M68k::COND_CS; - break; - case ISD::SSUBO: - MxOpcode = M68kISD::SUB; - MxCond = M68k::COND_VS; - break; - case ISD::UMULO: - MxOpcode = M68kISD::UMUL; - MxCond = M68k::COND_VS; - break; - case ISD::SMULO: - MxOpcode = M68kISD::SMUL; - MxCond = M68k::COND_VS; - break; - default: - llvm_unreachable("unexpected overflowing operator"); - } + if (isOverflowArithmetic(CondOpcode)) { + SDValue Result; + unsigned CCode; + lowerOverflowArithmetic(Cond, DAG, Result, Cond, CCode); if (Inverted) - MxCond = M68k::GetOppositeBranchCondition((M68k::CondCode)MxCond); + CCode = M68k::GetOppositeBranchCondition((M68k::CondCode)CCode); + CC = DAG.getConstant(CCode, DL, MVT::i8); - if (CondOpcode == ISD::UMULO) - VTs = DAG.getVTList(LHS.getValueType(), LHS.getValueType(), MVT::i8); - else - VTs = DAG.getVTList(LHS.getValueType(), MVT::i8); - - SDValue MxOp = DAG.getNode(MxOpcode, DL, VTs, LHS, RHS); - - if (CondOpcode == ISD::UMULO) - Cond = MxOp.getValue(2); - else - Cond = MxOp.getValue(1); - - CC = DAG.getConstant(MxCond, DL, MVT::i8); AddTest = false; } else { unsigned CondOpc; @@ -3394,7 +3387,7 @@ SDValue M68kTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SDNode *Node = Op.getNode(); SDValue Chain = Op.getOperand(0); SDValue Size = Op.getOperand(1); - unsigned Align = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue(); + unsigned Align = Op.getConstantOperandVal(2); EVT VT = Node->getValueType(0); // Chain the dynamic stack allocation so that it doesn't modify the stack diff --git a/llvm/lib/Target/M68k/M68kISelLowering.h b/llvm/lib/Target/M68k/M68kISelLowering.h index 02427a4e749e..d00907775f92 100644 --- a/llvm/lib/Target/M68k/M68kISelLowering.h +++ b/llvm/lib/Target/M68k/M68kISelLowering.h @@ -194,6 +194,15 @@ private: unsigned GetAlignedArgumentStackSize(unsigned StackSize, SelectionDAG &DAG) const; + bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override { + // In many cases, `GA` doesn't give the correct offset to fold. It's + // hard to know if the real offset actually fits into the displacement + // of the perspective addressing mode. + // Thus, we disable offset folding altogether and leave that to ISel + // patterns. + return false; + } + SDValue getReturnAddressFrameIndex(SelectionDAG &DAG) const; /// Emit a load of return address if tail call diff --git a/llvm/lib/Target/M68k/M68kInstrArithmetic.td b/llvm/lib/Target/M68k/M68kInstrArithmetic.td index 15d2049f62cb..3532e56e7417 100644 --- a/llvm/lib/Target/M68k/M68kInstrArithmetic.td +++ b/llvm/lib/Target/M68k/M68kInstrArithmetic.td @@ -590,8 +590,9 @@ class MxDiMuOp_DD<string MN, bits<4> CMD, bit SIGNED = false, } // $dreg <- $dreg op $dreg -class MxDiMuOp_DD_Long<string MN, bits<10> CMD, bit SIGNED = false> - : MxInst<(outs MxDRD32:$dst), (ins MxDRD32:$src, MxDRD32:$opd), MN#"\t$opd, $dst", []> { +class MxDiMuOp_DD_Long<string MN, SDNode NODE, bits<10> CMD, bit SIGNED = false> + : MxInst<(outs MxDRD32:$dst), (ins MxDRD32:$src, MxDRD32:$opd), MN#"\t$opd, $dst", + [(set i32:$dst, CCR, (NODE i32:$src, i32:$opd))]> { let Inst = (ascend (descend CMD, /*MODE*/0b000, /*REGISTER*/(operand "$opd", 3)), @@ -622,11 +623,9 @@ class MxDiMuOp_DI<string MN, bits<4> CMD, bit SIGNED = false, } // let Constraints } // Defs = [CCR] -multiclass MxDiMuOp<string MN, bits<4> CMD, bit isComm = 0> { - let isCommutable = isComm in { - def "S"#NAME#"d32d16" : MxDiMuOp_DD<MN#"s", CMD, /*SIGNED*/true, MxDRD32, MxDRD16>; - def "U"#NAME#"d32d16" : MxDiMuOp_DD<MN#"u", CMD, /*SIGNED*/false, MxDRD32, MxDRD16>; - } +multiclass MxDiMuOp<string MN, bits<4> CMD> { + def "S"#NAME#"d32d16" : MxDiMuOp_DD<MN#"s", CMD, /*SIGNED*/true, MxDRD32, MxDRD16>; + def "U"#NAME#"d32d16" : MxDiMuOp_DD<MN#"u", CMD, /*SIGNED*/false, MxDRD32, MxDRD16>; def "S"#NAME#"d32i16" : MxDiMuOp_DI<MN#"s", CMD, /*SIGNED*/true, MxDRD32, Mxi16imm>; def "U"#NAME#"d32i16" : MxDiMuOp_DI<MN#"u", CMD, /*SIGNED*/false, MxDRD32, Mxi16imm>; @@ -634,8 +633,8 @@ multiclass MxDiMuOp<string MN, bits<4> CMD, bit isComm = 0> { defm DIV : MxDiMuOp<"div", 0x8>; -def SDIVd32d32 : MxDiMuOp_DD_Long<"divs.l", 0x131, /*SIGNED*/true>; -def UDIVd32d32 : MxDiMuOp_DD_Long<"divu.l", 0x131, /*SIGNED*/false>; +def SDIVd32d32 : MxDiMuOp_DD_Long<"divs.l", sdiv, 0x131, /*SIGNED*/true>; +def UDIVd32d32 : MxDiMuOp_DD_Long<"divu.l", udiv, 0x131, /*SIGNED*/false>; // This is used to cast immediates to 16-bits for operations which don't // support smaller immediate sizes. @@ -685,60 +684,53 @@ def : Pat<(urem i16:$dst, i16:$opd), (LSR32di (LSR32di (UDIVd32d16 (MOVZXd32d16 $dst), $opd), 8), 8), MxSubRegIndex16Lo)>; - -// RR i32 -def : Pat<(sdiv i32:$dst, i32:$opd), (SDIVd32d32 $dst, $opd)>; - -def : Pat<(udiv i32:$dst, i32:$opd), (UDIVd32d32 $dst, $opd)>; - - // RI i8 -def : Pat<(sdiv i8:$dst, MximmSExt8:$opd), +def : Pat<(sdiv i8:$dst, Mxi8immSExt8:$opd), (EXTRACT_SUBREG (SDIVd32i16 (MOVSXd32d8 $dst), (as_i16imm $opd)), MxSubRegIndex8Lo)>; -def : Pat<(udiv i8:$dst, MximmSExt8:$opd), +def : Pat<(udiv i8:$dst, Mxi8immSExt8:$opd), (EXTRACT_SUBREG (UDIVd32i16 (MOVZXd32d8 $dst), (as_i16imm $opd)), MxSubRegIndex8Lo)>; -def : Pat<(srem i8:$dst, MximmSExt8:$opd), +def : Pat<(srem i8:$dst, Mxi8immSExt8:$opd), (EXTRACT_SUBREG (ASR32di (ASR32di (SDIVd32i16 (MOVSXd32d8 $dst), (as_i16imm $opd)), 8), 8), MxSubRegIndex8Lo)>; -def : Pat<(urem i8:$dst, MximmSExt8:$opd), +def : Pat<(urem i8:$dst, Mxi8immSExt8:$opd), (EXTRACT_SUBREG (LSR32di (LSR32di (UDIVd32i16 (MOVZXd32d8 $dst), (as_i16imm $opd)), 8), 8), MxSubRegIndex8Lo)>; // RI i16 -def : Pat<(sdiv i16:$dst, MximmSExt16:$opd), +def : Pat<(sdiv i16:$dst, Mxi16immSExt16:$opd), (EXTRACT_SUBREG (SDIVd32i16 (MOVSXd32d16 $dst), imm:$opd), MxSubRegIndex16Lo)>; -def : Pat<(udiv i16:$dst, MximmSExt16:$opd), +def : Pat<(udiv i16:$dst, Mxi16immSExt16:$opd), (EXTRACT_SUBREG (UDIVd32i16 (MOVZXd32d16 $dst), imm:$opd), MxSubRegIndex16Lo)>; -def : Pat<(srem i16:$dst, MximmSExt16:$opd), +def : Pat<(srem i16:$dst, Mxi16immSExt16:$opd), (EXTRACT_SUBREG (ASR32di (ASR32di (SDIVd32i16 (MOVSXd32d16 $dst), imm:$opd), 8), 8), MxSubRegIndex16Lo)>; -def : Pat<(urem i16:$dst, MximmSExt16:$opd), +def : Pat<(urem i16:$dst, Mxi16immSExt16:$opd), (EXTRACT_SUBREG (LSR32di (LSR32di (UDIVd32i16 (MOVZXd32d16 $dst), imm:$opd), 8), 8), MxSubRegIndex16Lo)>; -defm MUL : MxDiMuOp<"mul", 0xC, 1>; +defm MUL : MxDiMuOp<"mul", 0xC>; -def SMULd32d32 : MxDiMuOp_DD_Long<"muls.l", 0x130, /*SIGNED*/true>; -def UMULd32d32 : MxDiMuOp_DD_Long<"mulu.l", 0x130, /*SIGNED*/false>; +def SMULd32d32 : MxDiMuOp_DD_Long<"muls.l", MxSMul, 0x130, /*SIGNED*/true>; +def UMULd32d32 : MxDiMuOp_DD_Long<"mulu.l", MxUMul, 0x130, /*SIGNED*/false>; // RR def : Pat<(mul i16:$dst, i16:$opd), @@ -760,17 +752,17 @@ def : Pat<(mul i32:$dst, i32:$opd), (SMULd32d32 $dst, $opd)>; // RI -def : Pat<(mul i16:$dst, MximmSExt16:$opd), +def : Pat<(mul i16:$dst, Mxi16immSExt16:$opd), (EXTRACT_SUBREG (SMULd32i16 (MOVXd32d16 $dst), imm:$opd), MxSubRegIndex16Lo)>; -def : Pat<(mulhs i16:$dst, MximmSExt16:$opd), +def : Pat<(mulhs i16:$dst, Mxi16immSExt16:$opd), (EXTRACT_SUBREG (ASR32di (ASR32di (SMULd32i16 (MOVXd32d16 $dst), imm:$opd), 8), 8), MxSubRegIndex16Lo)>; -def : Pat<(mulhu i16:$dst, MximmSExt16:$opd), +def : Pat<(mulhu i16:$dst, Mxi16immSExt16:$opd), (EXTRACT_SUBREG (LSR32di (LSR32di (UMULd32i16 (MOVXd32d16 $dst), imm:$opd), 8), 8), MxSubRegIndex16Lo)>; @@ -881,16 +873,16 @@ foreach N = ["add", "addc"] in { (ADD32df MxDRD32:$src, MxType32.FOp:$opd)>; // add reg, imm - def : Pat<(!cast<SDNode>(N) i8: $src, MximmSExt8:$opd), + def : Pat<(!cast<SDNode>(N) i8: $src, Mxi8immSExt8:$opd), (ADD8di MxDRD8 :$src, imm:$opd)>; - def : Pat<(!cast<SDNode>(N) i16:$src, MximmSExt16:$opd), + def : Pat<(!cast<SDNode>(N) i16:$src, Mxi16immSExt16:$opd), (ADD16di MxDRD16:$src, imm:$opd)>; // LEAp is more complex and thus will be selected over normal ADD32ri but it cannot // be used with data registers, here by adding complexity to a simple ADD32ri insts // we make sure it will be selected over LEAp let AddedComplexity = 15 in { - def : Pat<(!cast<SDNode>(N) i32:$src, MximmSExt32:$opd), + def : Pat<(!cast<SDNode>(N) i32:$src, Mxi32immSExt32:$opd), (ADD32di MxDRD32:$src, imm:$opd)>; } // AddedComplexity = 15 @@ -949,11 +941,11 @@ foreach N = ["sub", "subc"] in { (SUB32df MxDRD32:$src, MxType32.FOp:$opd)>; // sub reg, imm - def : Pat<(!cast<SDNode>(N) i8 :$src, MximmSExt8 :$opd), + def : Pat<(!cast<SDNode>(N) i8 :$src, Mxi8immSExt8 :$opd), (SUB8di MxDRD8 :$src, imm:$opd)>; - def : Pat<(!cast<SDNode>(N) i16:$src, MximmSExt16:$opd), + def : Pat<(!cast<SDNode>(N) i16:$src, Mxi16immSExt16:$opd), (SUB16di MxDRD16:$src, imm:$opd)>; - def : Pat<(!cast<SDNode>(N) i32:$src, MximmSExt32:$opd), + def : Pat<(!cast<SDNode>(N) i32:$src, Mxi32immSExt32:$opd), (SUB32di MxDRD32:$src, imm:$opd)>; // sub imm, (An) @@ -982,11 +974,11 @@ multiclass BitwisePat<string INST, SDNode OP> { def : Pat<(OP i32:$src, i32:$opd), (!cast<MxInst>(INST#"32dd") MxDRD32:$src, MxDRD32:$opd)>; // op reg, imm - def : Pat<(OP i8: $src, MximmSExt8 :$opd), + def : Pat<(OP i8: $src, Mxi8immSExt8 :$opd), (!cast<MxInst>(INST#"8di") MxDRD8 :$src, imm:$opd)>; - def : Pat<(OP i16:$src, MximmSExt16:$opd), + def : Pat<(OP i16:$src, Mxi16immSExt16:$opd), (!cast<MxInst>(INST#"16di") MxDRD16:$src, imm:$opd)>; - def : Pat<(OP i32:$src, MximmSExt32:$opd), + def : Pat<(OP i32:$src, Mxi32immSExt32:$opd), (!cast<MxInst>(INST#"32di") MxDRD32:$src, imm:$opd)>; } diff --git a/llvm/lib/Target/M68k/M68kInstrData.td b/llvm/lib/Target/M68k/M68kInstrData.td index 624093661d19..fa7e7aa0ed46 100644 --- a/llvm/lib/Target/M68k/M68kInstrData.td +++ b/llvm/lib/Target/M68k/M68kInstrData.td @@ -554,18 +554,21 @@ def: Pat<(MxSExtLoadi16i8 MxCP_ARID:$src), (EXTRACT_SUBREG (MOVSXd32p8 MxARID8:$src), MxSubRegIndex16Lo)>; def: Pat<(MxSExtLoadi16i8 MxCP_ARII:$src), (EXTRACT_SUBREG (MOVSXd32f8 MxARII8:$src), MxSubRegIndex16Lo)>; +def: Pat<(MxSExtLoadi16i8 MxCP_PCD:$src), (MOVSXd16q8 MxPCD8:$src)>; // i32 <- sext i8 def: Pat<(i32 (sext i8:$src)), (MOVSXd32d8 MxDRD8:$src)>; def: Pat<(MxSExtLoadi32i8 MxCP_ARI :$src), (MOVSXd32j8 MxARI8 :$src)>; def: Pat<(MxSExtLoadi32i8 MxCP_ARID:$src), (MOVSXd32p8 MxARID8:$src)>; def: Pat<(MxSExtLoadi32i8 MxCP_ARII:$src), (MOVSXd32f8 MxARII8:$src)>; +def: Pat<(MxSExtLoadi32i8 MxCP_PCD:$src), (MOVSXd32q8 MxPCD8:$src)>; // i32 <- sext i16 def: Pat<(i32 (sext i16:$src)), (MOVSXd32d16 MxDRD16:$src)>; def: Pat<(MxSExtLoadi32i16 MxCP_ARI :$src), (MOVSXd32j16 MxARI16 :$src)>; def: Pat<(MxSExtLoadi32i16 MxCP_ARID:$src), (MOVSXd32p16 MxARID16:$src)>; def: Pat<(MxSExtLoadi32i16 MxCP_ARII:$src), (MOVSXd32f16 MxARII16:$src)>; +def: Pat<(MxSExtLoadi32i16 MxCP_PCD:$src), (MOVSXd32q16 MxPCD16:$src)>; // i16 <- zext i8 def: Pat<(i16 (zext i8:$src)), diff --git a/llvm/lib/Target/M68k/M68kInstrFormats.td b/llvm/lib/Target/M68k/M68kInstrFormats.td index 38d3127ac6a6..99bac7a59939 100644 --- a/llvm/lib/Target/M68k/M68kInstrFormats.td +++ b/llvm/lib/Target/M68k/M68kInstrFormats.td @@ -17,22 +17,22 @@ /// 03 M68000 (An) j address register indirect /// 04 M68000 (An)+ o address register indirect with postincrement /// 05 M68000 -(An) e address register indirect with predecrement -/// 06 M68000 (i,An) p address register indirect with displacement -/// 10 M68000 (i,An,Xn.L) f address register indirect with index and scale = 1 -/// 07 M68000 (i,An,Xn.W) F address register indirect with index and scale = 1 -/// 12 M68020 (i,An,Xn.L,SCALE) g address register indirect with index -/// 11 M68020 (i,An,Xn.W,SCALE) G address register indirect with index +/// 06 M68000 (d16,An) p address register indirect with displacement +/// 10 M68000 (d8,An,Xn.L) f address register indirect with index and scale = 1 +/// 07 M68000 (d8,An,Xn.W) F address register indirect with index and scale = 1 +/// 12 M68020 (d8,An,Xn.L,SCALE) g address register indirect with index +/// 11 M68020 (d8,An,Xn.W,SCALE) G address register indirect with index /// 14 M68020 ([bd,An],Xn.L,SCALE,od) u memory indirect postindexed mode /// 13 M68020 ([bd,An],Xn.W,SCALE,od) U memory indirect postindexed mode /// 16 M68020 ([bd,An,Xn.L,SCALE],od) v memory indirect preindexed mode /// 15 M68020 ([bd,An,Xn.W,SCALE],od) V memory indirect preindexed mode /// 20 M68000 abs.L b absolute long address /// 17 M68000 abs.W B absolute short address -/// 21 M68000 (i,PC) q program counter with displacement -/// 23 M68000 (i,PC,Xn.L) k program counter with index and scale = 1 -/// 22 M68000 (i,PC,Xn.W) K program counter with index and scale = 1 -/// 25 M68020 (i,PC,Xn.L,SCALE) l program counter with index -/// 24 M68020 (i,PC,Xn.W,SCALE) L program counter with index +/// 21 M68000 (d16,PC) q program counter with displacement +/// 23 M68000 (d8,PC,Xn.L) k program counter with index and scale = 1 +/// 22 M68000 (d8,PC,Xn.W) K program counter with index and scale = 1 +/// 25 M68020 (d8,PC,Xn.L,SCALE) l program counter with index +/// 24 M68020 (d8,PC,Xn.W,SCALE) L program counter with index /// 27 M68020 ([bd,PC],Xn.L,SCALE,od) x program counter memory indirect postindexed mode /// 26 M68020 ([bd,PC],Xn.W,SCALE,od) X program counter memory indirect postindexed mode /// 31 M68020 ([bd,PC,Xn.L,SCALE],od) y program counter memory indirect preindexed mode diff --git a/llvm/lib/Target/M68k/M68kInstrInfo.td b/llvm/lib/Target/M68k/M68kInstrInfo.td index dc66e103361a..84eb8e56da76 100644 --- a/llvm/lib/Target/M68k/M68kInstrInfo.td +++ b/llvm/lib/Target/M68k/M68kInstrInfo.td @@ -55,15 +55,6 @@ def MxSDT_BiArithCCRInOut : SDTypeProfile<2, 3, [ /* CCR */ SDTCisSameAs<1, 4> ]>; -// RES1, RES2, CCR <- op LHS, RHS -def MxSDT_2BiArithCCROut : SDTypeProfile<3, 2, [ - /* RES 1 */ SDTCisInt<0>, - /* RES 2 */ SDTCisSameAs<0, 1>, - /* CCR */ SDTCisVT<1, i8>, - /* LHS */ SDTCisSameAs<0, 2>, - /* RHS */ SDTCisSameAs<0, 3> -]>; - def MxSDT_CmpTest : SDTypeProfile<1, 2, [ /* CCR */ SDTCisVT<0, i8>, /* Ops */ SDTCisSameAs<1, 2> @@ -134,7 +125,7 @@ def MxAddX : SDNode<"M68kISD::ADDX", MxSDT_BiArithCCRInOut>; def MxSubX : SDNode<"M68kISD::SUBX", MxSDT_BiArithCCRInOut>; def MxSMul : SDNode<"M68kISD::SMUL", MxSDT_BiArithCCROut, [SDNPCommutative]>; -def MxUMul : SDNode<"M68kISD::UMUL", MxSDT_2BiArithCCROut, [SDNPCommutative]>; +def MxUMul : SDNode<"M68kISD::UMUL", MxSDT_BiArithCCROut, [SDNPCommutative]>; def MxCmp : SDNode<"M68kISD::CMP", MxSDT_CmpTest>; def MxBtst : SDNode<"M68kISD::BTST", MxSDT_CmpTest>; @@ -522,9 +513,14 @@ def MxCP_PCI : ComplexPattern<iPTR, 2, "SelectPCI", // Pattern Fragments //===----------------------------------------------------------------------===// -def MximmSExt8 : PatLeaf<(i8 imm)>; -def MximmSExt16 : PatLeaf<(i16 imm)>; -def MximmSExt32 : PatLeaf<(i32 imm)>; +def Mxi8immSExt8 : PatLeaf<(i8 imm)>; +def MximmSExt8 : PatLeaf<(imm), [{ return isInt<8>(N->getSExtValue()); }]>; + +def Mxi16immSExt16 : PatLeaf<(i16 imm)>; +def MximmSExt16 : PatLeaf<(imm), [{ return isInt<16>(N->getSExtValue()); }]>; + +def Mxi32immSExt32 : PatLeaf<(i32 imm)>; +def MximmSExt32 : PatLeaf<(imm), [{ return isInt<32>(N->getSExtValue()); }]>; // Used for Shifts and Rotations, since M68k immediates in these instructions // are 1 <= i <= 8. Generally, if immediate is bigger than 8 it will be moved @@ -717,7 +713,7 @@ foreach size = [8, 16, 32] in { // #imm def MxOp#size#AddrMode_i : MxImmOpBundle<size, !cast<MxOperand>("Mxi"#size#"imm"), - !cast<PatFrag>("MximmSExt"#size)>; + !cast<PatFrag>("Mxi"#size#"immSExt"#size)>; } // foreach size = [8, 16, 32] foreach size = [16, 32] in { @@ -747,7 +743,7 @@ class MxType8Class<string rLet, MxOperand reg> MxAL8, MxCP_AL, MxPCD8, MxCP_PCD, MxPCI8, MxCP_PCI, - Mxi8imm, MximmSExt8, + Mxi8imm, Mxi8immSExt8, Mxloadi8>; def MxType8 : MxType8Class<?,?>; @@ -762,7 +758,7 @@ class MxType16Class<string rLet, MxOperand reg> MxAL16, MxCP_AL, MxPCD16, MxCP_PCD, MxPCI16, MxCP_PCI, - Mxi16imm, MximmSExt16, + Mxi16imm, Mxi16immSExt16, Mxloadi16>; def MxType16 : MxType16Class<?,?>; @@ -777,7 +773,7 @@ class MxType32Class<string rLet, MxOperand reg> MxAL32, MxCP_AL, MxPCD32, MxCP_PCD, MxPCI32, MxCP_PCI, - Mxi32imm, MximmSExt32, + Mxi32imm, Mxi32immSExt32, Mxloadi32>; def MxType32 : MxType32Class<?,?>; diff --git a/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp b/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp index ee7762c296bf..d3b59138a5a9 100644 --- a/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp +++ b/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp @@ -964,7 +964,7 @@ SDValue MSP430TargetLowering::LowerShifts(SDValue Op, if (!isa<ConstantSDNode>(N->getOperand(1))) return Op; - uint64_t ShiftAmount = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); + uint64_t ShiftAmount = N->getConstantOperandVal(1); // Expand the stuff into sequence of shifts. SDValue Victim = N->getOperand(0); @@ -1269,7 +1269,7 @@ SDValue MSP430TargetLowering::LowerRETURNADDR(SDValue Op, if (verifyReturnAddressArgumentIsConstant(Op, DAG)) return SDValue(); - unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); + unsigned Depth = Op.getConstantOperandVal(0); SDLoc dl(Op); EVT PtrVT = Op.getValueType(); @@ -1295,7 +1295,7 @@ SDValue MSP430TargetLowering::LowerFRAMEADDR(SDValue Op, EVT VT = Op.getValueType(); SDLoc dl(Op); // FIXME probably not meaningful - unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); + unsigned Depth = Op.getConstantOperandVal(0); SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, MSP430::R4, VT); while (Depth--) diff --git a/llvm/lib/Target/Mips/Mips64InstrInfo.td b/llvm/lib/Target/Mips/Mips64InstrInfo.td index ac679c4c01bc..c0e7eef8dd9d 100644 --- a/llvm/lib/Target/Mips/Mips64InstrInfo.td +++ b/llvm/lib/Target/Mips/Mips64InstrInfo.td @@ -164,20 +164,20 @@ def NOR64 : LogicNOR<"nor", GPR64Opnd>, ADD_FM<0, 0x27>, GPR_64; /// Shift Instructions let AdditionalPredicates = [NotInMicroMips] in { - def DSLL : shift_rotate_imm<"dsll", uimm6, GPR64Opnd, II_DSLL, shl, + def DSLL : shift_rotate_imm<"dsll", uimm6, GPR64Opnd, II_DSLL, mshl_64, immZExt6>, SRA_FM<0x38, 0>, ISA_MIPS3; - def DSRL : shift_rotate_imm<"dsrl", uimm6, GPR64Opnd, II_DSRL, srl, + def DSRL : shift_rotate_imm<"dsrl", uimm6, GPR64Opnd, II_DSRL, msrl_64, immZExt6>, SRA_FM<0x3a, 0>, ISA_MIPS3; - def DSRA : shift_rotate_imm<"dsra", uimm6, GPR64Opnd, II_DSRA, sra, + def DSRA : shift_rotate_imm<"dsra", uimm6, GPR64Opnd, II_DSRA, msra_64, immZExt6>, SRA_FM<0x3b, 0>, ISA_MIPS3; - def DSLLV : shift_rotate_reg<"dsllv", GPR64Opnd, II_DSLLV, shl>, + def DSLLV : shift_rotate_reg<"dsllv", GPR64Opnd, II_DSLLV, mshl_64>, SRLV_FM<0x14, 0>, ISA_MIPS3; - def DSRAV : shift_rotate_reg<"dsrav", GPR64Opnd, II_DSRAV, sra>, + def DSRAV : shift_rotate_reg<"dsrav", GPR64Opnd, II_DSRAV, msra_64>, SRLV_FM<0x17, 0>, ISA_MIPS3; - def DSRLV : shift_rotate_reg<"dsrlv", GPR64Opnd, II_DSRLV, srl>, + def DSRLV : shift_rotate_reg<"dsrlv", GPR64Opnd, II_DSRLV, msrl_64>, SRLV_FM<0x16, 0>, ISA_MIPS3; def DSLL32 : shift_rotate_imm<"dsll32", uimm5, GPR64Opnd, II_DSLL32>, SRA_FM<0x3c, 0>, ISA_MIPS3; diff --git a/llvm/lib/Target/Mips/MipsISelDAGToDAG.cpp b/llvm/lib/Target/Mips/MipsISelDAGToDAG.cpp index 77ce8ba890a8..01b41f3b2159 100644 --- a/llvm/lib/Target/Mips/MipsISelDAGToDAG.cpp +++ b/llvm/lib/Target/Mips/MipsISelDAGToDAG.cpp @@ -22,6 +22,7 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/CodeGen/StackProtector.h" #include "llvm/IR/CFG.h" @@ -31,6 +32,7 @@ #include "llvm/IR/Type.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/KnownBits.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" using namespace llvm; @@ -324,6 +326,24 @@ bool MipsDAGToDAGISel::SelectInlineAsmMemoryOperand( return true; } +bool MipsDAGToDAGISel::isUnneededShiftMask(SDNode *N, + unsigned ShAmtBits) const { + assert(N->getOpcode() == ISD::AND && "Unexpected opcode"); + + const APInt &RHS = N->getConstantOperandAPInt(1); + if (RHS.countr_one() >= ShAmtBits) { + LLVM_DEBUG( + dbgs() + << DEBUG_TYPE + << " Need optimize 'and & shl/srl/sra' and operand value bits is " + << RHS.countr_one() << "\n"); + return true; + } + + KnownBits Known = CurDAG->computeKnownBits(N->getOperand(0)); + return (Known.Zero | RHS).countr_one() >= ShAmtBits; +} + char MipsDAGToDAGISel::ID = 0; INITIALIZE_PASS(MipsDAGToDAGISel, DEBUG_TYPE, PASS_NAME, false, false) diff --git a/llvm/lib/Target/Mips/MipsISelDAGToDAG.h b/llvm/lib/Target/Mips/MipsISelDAGToDAG.h index e41cb08712ca..52207d0f6284 100644 --- a/llvm/lib/Target/Mips/MipsISelDAGToDAG.h +++ b/llvm/lib/Target/Mips/MipsISelDAGToDAG.h @@ -143,6 +143,7 @@ private: bool SelectInlineAsmMemoryOperand(const SDValue &Op, InlineAsm::ConstraintCode ConstraintID, std::vector<SDValue> &OutOps) override; + bool isUnneededShiftMask(SDNode *N, unsigned ShAmtBits) const; }; } diff --git a/llvm/lib/Target/Mips/MipsISelLowering.cpp b/llvm/lib/Target/Mips/MipsISelLowering.cpp index a0cab8024386..483eba4e4f47 100644 --- a/llvm/lib/Target/Mips/MipsISelLowering.cpp +++ b/llvm/lib/Target/Mips/MipsISelLowering.cpp @@ -2508,7 +2508,7 @@ SDValue MipsTargetLowering::lowerFABS(SDValue Op, SelectionDAG &DAG) const { SDValue MipsTargetLowering:: lowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { // check the depth - if (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue() != 0) { + if (Op.getConstantOperandVal(0) != 0) { DAG.getContext()->emitError( "return address can be determined only for current frame"); return SDValue(); @@ -2529,7 +2529,7 @@ SDValue MipsTargetLowering::lowerRETURNADDR(SDValue Op, return SDValue(); // check the depth - if (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue() != 0) { + if (Op.getConstantOperandVal(0) != 0) { DAG.getContext()->emitError( "return address can be determined only for current frame"); return SDValue(); diff --git a/llvm/lib/Target/Mips/MipsInstrCompiler.td b/llvm/lib/Target/Mips/MipsInstrCompiler.td new file mode 100644 index 000000000000..8ae3d71978b1 --- /dev/null +++ b/llvm/lib/Target/Mips/MipsInstrCompiler.td @@ -0,0 +1,33 @@ +//===- MipsInstrCompiler.td - Compiler Pseudos and Patterns -*- tablegen -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file describes the various pseudo instructions used by the compiler, +// as well as Pat patterns used during instruction selection. +// +//===----------------------------------------------------------------------===// + + +def shiftMask_32 : PatFrag<(ops node:$lhs), (and node:$lhs, imm), [{ + return isUnneededShiftMask(N, 5); +}]>; + +def shiftMask_64 : PatFrag<(ops node:$src0), (and node:$src0, imm), [{ + return isUnneededShiftMask(N, 6); +}]>; + +foreach width = [32, 64] in { +defvar shiftMask = !cast<SDPatternOperator>("shiftMask_"#width); +def mshl_#width : PatFrags<(ops node:$src0, node:$src1), + [(shl node:$src0, node:$src1), (shl node:$src0, (shiftMask node:$src1))]>; + +def msrl_#width : PatFrags<(ops node:$src0, node:$src1), + [(srl node:$src0, node:$src1), (srl node:$src0, (shiftMask node:$src1))]>; + +def msra_#width : PatFrags<(ops node:$src0, node:$src1), + [(sra node:$src0, node:$src1), (sra node:$src0, (shiftMask node:$src1))]>; +} diff --git a/llvm/lib/Target/Mips/MipsInstrInfo.td b/llvm/lib/Target/Mips/MipsInstrInfo.td index 75270857ea13..4b6f4b22e71b 100644 --- a/llvm/lib/Target/Mips/MipsInstrInfo.td +++ b/llvm/lib/Target/Mips/MipsInstrInfo.td @@ -14,6 +14,7 @@ //===----------------------------------------------------------------------===// // Mips profiles and nodes //===----------------------------------------------------------------------===// +include "MipsInstrCompiler.td" def SDT_MipsJmpLink : SDTypeProfile<0, 1, [SDTCisVT<0, iPTR>]>; def SDT_MipsCMov : SDTypeProfile<1, 4, [SDTCisSameAs<0, 1>, @@ -2079,17 +2080,17 @@ let AdditionalPredicates = [NotInMicroMips] in { let AdditionalPredicates = [NotInMicroMips] in { /// Shift Instructions - def SLL : MMRel, shift_rotate_imm<"sll", uimm5, GPR32Opnd, II_SLL, shl, + def SLL : MMRel, shift_rotate_imm<"sll", uimm5, GPR32Opnd, II_SLL, mshl_32, immZExt5>, SRA_FM<0, 0>, ISA_MIPS1; - def SRL : MMRel, shift_rotate_imm<"srl", uimm5, GPR32Opnd, II_SRL, srl, + def SRL : MMRel, shift_rotate_imm<"srl", uimm5, GPR32Opnd, II_SRL, msrl_32, immZExt5>, SRA_FM<2, 0>, ISA_MIPS1; - def SRA : MMRel, shift_rotate_imm<"sra", uimm5, GPR32Opnd, II_SRA, sra, + def SRA : MMRel, shift_rotate_imm<"sra", uimm5, GPR32Opnd, II_SRA, msra_32, immZExt5>, SRA_FM<3, 0>, ISA_MIPS1; - def SLLV : MMRel, shift_rotate_reg<"sllv", GPR32Opnd, II_SLLV, shl>, + def SLLV : MMRel, shift_rotate_reg<"sllv", GPR32Opnd, II_SLLV, mshl_32>, SRLV_FM<4, 0>, ISA_MIPS1; - def SRLV : MMRel, shift_rotate_reg<"srlv", GPR32Opnd, II_SRLV, srl>, + def SRLV : MMRel, shift_rotate_reg<"srlv", GPR32Opnd, II_SRLV, msrl_32>, SRLV_FM<6, 0>, ISA_MIPS1; - def SRAV : MMRel, shift_rotate_reg<"srav", GPR32Opnd, II_SRAV, sra>, + def SRAV : MMRel, shift_rotate_reg<"srav", GPR32Opnd, II_SRAV, msra_32>, SRLV_FM<7, 0>, ISA_MIPS1; // Rotate Instructions diff --git a/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp b/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp index 8c865afd4207..0ed87ee0809a 100644 --- a/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp +++ b/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp @@ -831,8 +831,7 @@ bool MipsSEDAGToDAGISel::trySelect(SDNode *Node) { } case ISD::INTRINSIC_W_CHAIN: { - const unsigned IntrinsicOpcode = - cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue(); + const unsigned IntrinsicOpcode = Node->getConstantOperandVal(1); switch (IntrinsicOpcode) { default: break; @@ -885,7 +884,7 @@ bool MipsSEDAGToDAGISel::trySelect(SDNode *Node) { } case ISD::INTRINSIC_WO_CHAIN: { - switch (cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue()) { + switch (Node->getConstantOperandVal(0)) { default: break; @@ -901,8 +900,7 @@ bool MipsSEDAGToDAGISel::trySelect(SDNode *Node) { } case ISD::INTRINSIC_VOID: { - const unsigned IntrinsicOpcode = - cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue(); + const unsigned IntrinsicOpcode = Node->getConstantOperandVal(1); switch (IntrinsicOpcode) { default: break; diff --git a/llvm/lib/Target/Mips/MipsSEISelLowering.cpp b/llvm/lib/Target/Mips/MipsSEISelLowering.cpp index 5c34067c8888..e9788fa7ed73 100644 --- a/llvm/lib/Target/Mips/MipsSEISelLowering.cpp +++ b/llvm/lib/Target/Mips/MipsSEISelLowering.cpp @@ -1519,7 +1519,7 @@ static SDValue lowerMSABitClearImm(SDValue Op, SelectionDAG &DAG) { SDLoc DL(Op); EVT ResTy = Op->getValueType(0); APInt BitImm = APInt(ResTy.getScalarSizeInBits(), 1) - << cast<ConstantSDNode>(Op->getOperand(2))->getAPIntValue(); + << Op->getConstantOperandAPInt(2); SDValue BitMask = DAG.getConstant(~BitImm, DL, ResTy); return DAG.getNode(ISD::AND, DL, ResTy, Op->getOperand(1), BitMask); @@ -1528,7 +1528,7 @@ static SDValue lowerMSABitClearImm(SDValue Op, SelectionDAG &DAG) { SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); - unsigned Intrinsic = cast<ConstantSDNode>(Op->getOperand(0))->getZExtValue(); + unsigned Intrinsic = Op->getConstantOperandVal(0); switch (Intrinsic) { default: return SDValue(); @@ -2300,7 +2300,7 @@ static SDValue lowerMSALoadIntr(SDValue Op, SelectionDAG &DAG, unsigned Intr, SDValue MipsSETargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const { - unsigned Intr = cast<ConstantSDNode>(Op->getOperand(1))->getZExtValue(); + unsigned Intr = Op->getConstantOperandVal(1); switch (Intr) { default: return SDValue(); @@ -2375,7 +2375,7 @@ static SDValue lowerMSAStoreIntr(SDValue Op, SelectionDAG &DAG, unsigned Intr, SDValue MipsSETargetLowering::lowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const { - unsigned Intr = cast<ConstantSDNode>(Op->getOperand(1))->getZExtValue(); + unsigned Intr = Op->getConstantOperandVal(1); switch (Intr) { default: return SDValue(); diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp index 894a8636f458..815c46edb6fa 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp @@ -513,7 +513,7 @@ void NVPTXDAGToDAGISel::Select(SDNode *N) { } bool NVPTXDAGToDAGISel::tryIntrinsicChain(SDNode *N) { - unsigned IID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); + unsigned IID = N->getConstantOperandVal(1); switch (IID) { default: return false; @@ -730,7 +730,7 @@ static bool canLowerToLDG(MemSDNode *N, const NVPTXSubtarget &Subtarget, } bool NVPTXDAGToDAGISel::tryIntrinsicNoChain(SDNode *N) { - unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); + unsigned IID = N->getConstantOperandVal(0); switch (IID) { default: return false; @@ -1246,7 +1246,7 @@ bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) { if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) { Op1 = N->getOperand(2); Mem = cast<MemIntrinsicSDNode>(N); - unsigned IID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); + unsigned IID = N->getConstantOperandVal(1); switch (IID) { default: return false; diff --git a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index b57d185bb638..ed96339240d9 100644 --- a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -4902,8 +4902,7 @@ bool PPCDAGToDAGISel::trySelectLoopCountIntrinsic(SDNode *N) { return false; if (LHS.getOperand(0).getOpcode() != ISD::INTRINSIC_W_CHAIN || - cast<ConstantSDNode>(LHS.getOperand(0).getOperand(1))->getZExtValue() != - Intrinsic::loop_decrement) + LHS.getOperand(0).getConstantOperandVal(1) != Intrinsic::loop_decrement) return false; if (!isa<ConstantSDNode>(RHS)) @@ -6011,7 +6010,7 @@ void PPCDAGToDAGISel::Select(SDNode *N) { // Op #3 is the Dest MBB // Op #4 is the Flag. // Prevent PPC::PRED_* from being selected into LI. - unsigned PCC = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); + unsigned PCC = N->getConstantOperandVal(1); if (EnableBranchHint) PCC |= getBranchHint(PCC, *FuncInfo, N->getOperand(3)); diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index ae0d3b76f89a..8f27e6677afa 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -1176,6 +1176,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setTruncStoreAction(MVT::f128, MVT::f32, Expand); // No implementation for these ops for PowerPC. + setOperationAction(ISD::FSINCOS, MVT::f128, Expand); setOperationAction(ISD::FSIN, MVT::f128, Expand); setOperationAction(ISD::FCOS, MVT::f128, Expand); setOperationAction(ISD::FPOW, MVT::f128, Expand); @@ -1411,6 +1412,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setLibcallName(RTLIB::EXP2_F128, "exp2f128"); setLibcallName(RTLIB::SIN_F128, "sinf128"); setLibcallName(RTLIB::COS_F128, "cosf128"); + setLibcallName(RTLIB::SINCOS_F128, "sincosf128"); setLibcallName(RTLIB::POW_F128, "powf128"); setLibcallName(RTLIB::FMIN_F128, "fminf128"); setLibcallName(RTLIB::FMAX_F128, "fmaxf128"); @@ -2815,8 +2817,8 @@ bool PPCTargetLowering::SelectAddressRegImm( return true; // [r+i] } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) { // Match LOAD (ADD (X, Lo(G))). - assert(!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue() - && "Cannot handle constant offsets yet!"); + assert(!N.getOperand(1).getConstantOperandVal(1) && + "Cannot handle constant offsets yet!"); Disp = N.getOperand(1).getOperand(0); // The global address. assert(Disp.getOpcode() == ISD::TargetGlobalAddress || Disp.getOpcode() == ISD::TargetGlobalTLSAddress || @@ -3822,8 +3824,7 @@ SDValue PPCTargetLowering::LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const { // Check all operands that may contain the LR. for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) { - const InlineAsm::Flag Flags( - cast<ConstantSDNode>(Op.getOperand(i))->getZExtValue()); + const InlineAsm::Flag Flags(Op.getConstantOperandVal(i)); unsigned NumVals = Flags.getNumOperandRegisters(); ++i; // Skip the ID value. @@ -10440,8 +10441,7 @@ SDValue PPCTargetLowering::LowerVPERM(SDValue Op, SelectionDAG &DAG, /// information about the intrinsic. static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc, bool &isDot, const PPCSubtarget &Subtarget) { - unsigned IntrinsicID = - cast<ConstantSDNode>(Intrin.getOperand(0))->getZExtValue(); + unsigned IntrinsicID = Intrin.getConstantOperandVal(0); CompareOpc = -1; isDot = false; switch (IntrinsicID) { @@ -10726,8 +10726,7 @@ static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc, /// lower, do it, otherwise return null. SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const { - unsigned IntrinsicID = - cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); + unsigned IntrinsicID = Op.getConstantOperandVal(0); SDLoc dl(Op); @@ -10945,7 +10944,7 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, // Unpack the result based on how the target uses it. unsigned BitNo; // Bit # of CR6. bool InvertBit; // Invert result? - switch (cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue()) { + switch (Op.getConstantOperandVal(1)) { default: // Can't happen, don't crash on invalid number though. case 0: // Return the value of the EQ bit of CR6. BitNo = 0; InvertBit = false; @@ -10981,7 +10980,7 @@ SDValue PPCTargetLowering::LowerINTRINSIC_VOID(SDValue Op, // the beginning of the argument list. int ArgStart = isa<ConstantSDNode>(Op.getOperand(0)) ? 0 : 1; SDLoc DL(Op); - switch (cast<ConstantSDNode>(Op.getOperand(ArgStart))->getZExtValue()) { + switch (Op.getConstantOperandVal(ArgStart)) { case Intrinsic::ppc_cfence: { assert(ArgStart == 1 && "llvm.ppc.cfence must carry a chain argument."); SDValue Val = Op.getOperand(ArgStart + 1); @@ -11546,7 +11545,7 @@ SDValue PPCTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const { return SDValue(); // Custom lower is only done for high or low doubleword. - int Idx = cast<ConstantSDNode>(Op0.getOperand(1))->getZExtValue(); + int Idx = Op0.getConstantOperandVal(1); if (Idx % 2 != 0) return SDValue(); @@ -11715,8 +11714,7 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N, break; } case ISD::INTRINSIC_W_CHAIN: { - if (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() != - Intrinsic::loop_decrement) + if (N->getConstantOperandVal(1) != Intrinsic::loop_decrement) break; assert(N->getValueType(0) == MVT::i1 && @@ -11732,7 +11730,7 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N, break; } case ISD::INTRINSIC_WO_CHAIN: { - switch (cast<ConstantSDNode>(N->getOperand(0))->getZExtValue()) { + switch (N->getConstantOperandVal(0)) { case Intrinsic::ppc_pack_longdouble: Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::ppcf128, N->getOperand(2), N->getOperand(1))); @@ -13652,7 +13650,7 @@ static bool isConsecutiveLS(SDNode *N, LSBaseSDNode *Base, if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) { EVT VT; - switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) { + switch (N->getConstantOperandVal(1)) { default: return false; case Intrinsic::ppc_altivec_lvx: case Intrinsic::ppc_altivec_lvxl: @@ -13680,7 +13678,7 @@ static bool isConsecutiveLS(SDNode *N, LSBaseSDNode *Base, if (N->getOpcode() == ISD::INTRINSIC_VOID) { EVT VT; - switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) { + switch (N->getConstantOperandVal(1)) { default: return false; case Intrinsic::ppc_altivec_stvx: case Intrinsic::ppc_altivec_stvxl: @@ -15544,8 +15542,7 @@ SDValue PPCTargetLowering::combineVReverseMemOP(ShuffleVectorSDNode *SVN, } static bool isStoreConditional(SDValue Intrin, unsigned &StoreWidth) { - unsigned IntrinsicID = - cast<ConstantSDNode>(Intrin.getOperand(1))->getZExtValue(); + unsigned IntrinsicID = Intrin.getConstantOperandVal(1); if (IntrinsicID == Intrinsic::ppc_stdcx) StoreWidth = 8; else if (IntrinsicID == Intrinsic::ppc_stwcx) @@ -15977,7 +15974,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, break; case ISD::INTRINSIC_WO_CHAIN: { bool isLittleEndian = Subtarget.isLittleEndian(); - unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); + unsigned IID = N->getConstantOperandVal(0); Intrinsic::ID Intr = (isLittleEndian ? Intrinsic::ppc_altivec_lvsr : Intrinsic::ppc_altivec_lvsl); if (IID == Intr && N->getOperand(1)->getOpcode() == ISD::ADD) { @@ -15990,36 +15987,34 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, .zext(Add.getScalarValueSizeInBits()))) { SDNode *BasePtr = Add->getOperand(0).getNode(); for (SDNode *U : BasePtr->uses()) { - if (U->getOpcode() == ISD::INTRINSIC_WO_CHAIN && - cast<ConstantSDNode>(U->getOperand(0))->getZExtValue() == IID) { - // We've found another LVSL/LVSR, and this address is an aligned - // multiple of that one. The results will be the same, so use the - // one we've just found instead. + if (U->getOpcode() == ISD::INTRINSIC_WO_CHAIN && + U->getConstantOperandVal(0) == IID) { + // We've found another LVSL/LVSR, and this address is an aligned + // multiple of that one. The results will be the same, so use the + // one we've just found instead. - return SDValue(U, 0); - } + return SDValue(U, 0); + } } } if (isa<ConstantSDNode>(Add->getOperand(1))) { SDNode *BasePtr = Add->getOperand(0).getNode(); for (SDNode *U : BasePtr->uses()) { - if (U->getOpcode() == ISD::ADD && - isa<ConstantSDNode>(U->getOperand(1)) && - (cast<ConstantSDNode>(Add->getOperand(1))->getZExtValue() - - cast<ConstantSDNode>(U->getOperand(1))->getZExtValue()) % - (1ULL << Bits) == - 0) { - SDNode *OtherAdd = U; - for (SDNode *V : OtherAdd->uses()) { - if (V->getOpcode() == ISD::INTRINSIC_WO_CHAIN && - cast<ConstantSDNode>(V->getOperand(0))->getZExtValue() == - IID) { - return SDValue(V, 0); - } + if (U->getOpcode() == ISD::ADD && + isa<ConstantSDNode>(U->getOperand(1)) && + (Add->getConstantOperandVal(1) - U->getConstantOperandVal(1)) % + (1ULL << Bits) == + 0) { + SDNode *OtherAdd = U; + for (SDNode *V : OtherAdd->uses()) { + if (V->getOpcode() == ISD::INTRINSIC_WO_CHAIN && + V->getConstantOperandVal(0) == IID) { + return SDValue(V, 0); } } } + } } } @@ -16059,30 +16054,30 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, break; case ISD::INTRINSIC_W_CHAIN: - switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) { - default: - break; - case Intrinsic::ppc_altivec_vsum4sbs: - case Intrinsic::ppc_altivec_vsum4shs: - case Intrinsic::ppc_altivec_vsum4ubs: { - // These sum-across intrinsics only have a chain due to the side effect - // that they may set the SAT bit. If we know the SAT bit will not be set - // for some inputs, we can replace any uses of their chain with the input - // chain. - if (BuildVectorSDNode *BVN = - dyn_cast<BuildVectorSDNode>(N->getOperand(3))) { - APInt APSplatBits, APSplatUndef; - unsigned SplatBitSize; - bool HasAnyUndefs; - bool BVNIsConstantSplat = BVN->isConstantSplat( - APSplatBits, APSplatUndef, SplatBitSize, HasAnyUndefs, 0, - !Subtarget.isLittleEndian()); - // If the constant splat vector is 0, the SAT bit will not be set. - if (BVNIsConstantSplat && APSplatBits == 0) - DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), N->getOperand(0)); + switch (N->getConstantOperandVal(1)) { + default: + break; + case Intrinsic::ppc_altivec_vsum4sbs: + case Intrinsic::ppc_altivec_vsum4shs: + case Intrinsic::ppc_altivec_vsum4ubs: { + // These sum-across intrinsics only have a chain due to the side effect + // that they may set the SAT bit. If we know the SAT bit will not be set + // for some inputs, we can replace any uses of their chain with the + // input chain. + if (BuildVectorSDNode *BVN = + dyn_cast<BuildVectorSDNode>(N->getOperand(3))) { + APInt APSplatBits, APSplatUndef; + unsigned SplatBitSize; + bool HasAnyUndefs; + bool BVNIsConstantSplat = BVN->isConstantSplat( + APSplatBits, APSplatUndef, SplatBitSize, HasAnyUndefs, 0, + !Subtarget.isLittleEndian()); + // If the constant splat vector is 0, the SAT bit will not be set. + if (BVNIsConstantSplat && APSplatBits == 0) + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), N->getOperand(0)); + } + return SDValue(); } - return SDValue(); - } case Intrinsic::ppc_vsx_lxvw4x: case Intrinsic::ppc_vsx_lxvd2x: // For little endian, VSX loads require generating lxvd2x/xxswapd. @@ -16096,7 +16091,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, // For little endian, VSX stores require generating xxswapd/stxvd2x. // Not needed on ISA 3.0 based CPUs since we have a non-permuting store. if (Subtarget.needsSwapsForVSXMemOps()) { - switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) { + switch (N->getConstantOperandVal(1)) { default: break; case Intrinsic::ppc_vsx_stxvw4x: @@ -16325,7 +16320,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, // Unpack the result based on how the target uses it. PPC::Predicate CompOpc; - switch (cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue()) { + switch (LHS.getConstantOperandVal(1)) { default: // Can't happen, don't crash on invalid number though. case 0: // Branch on the value of the EQ bit of CR6. CompOpc = BranchOnWhenPredTrue ? PPC::PRED_EQ : PPC::PRED_NE; @@ -16404,7 +16399,7 @@ void PPCTargetLowering::computeKnownBitsForTargetNode(const SDValue Op, break; } case ISD::INTRINSIC_WO_CHAIN: { - switch (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue()) { + switch (Op.getConstantOperandVal(0)) { default: break; case Intrinsic::ppc_altivec_vcmpbfp_p: case Intrinsic::ppc_altivec_vcmpeqfp_p: @@ -16431,7 +16426,7 @@ void PPCTargetLowering::computeKnownBitsForTargetNode(const SDValue Op, break; } case ISD::INTRINSIC_W_CHAIN: { - switch (cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue()) { + switch (Op.getConstantOperandVal(1)) { default: break; case Intrinsic::ppc_load2r: @@ -16866,7 +16861,7 @@ SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op, return SDValue(); SDLoc dl(Op); - unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); + unsigned Depth = Op.getConstantOperandVal(0); // Make sure the function does not optimize away the store of the RA to // the stack. @@ -16899,7 +16894,7 @@ SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op, SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); - unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); + unsigned Depth = Op.getConstantOperandVal(0); MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo &MFI = MF.getFrameInfo(); @@ -18084,8 +18079,7 @@ static void computeFlagsForAddressComputation(SDValue N, unsigned &FlagSet, FlagSet |= PPC::MOF_RPlusSImm34; // Signed 34-bit immediates. else FlagSet |= PPC::MOF_RPlusR; // Register. - } else if (RHS.getOpcode() == PPCISD::Lo && - !cast<ConstantSDNode>(RHS.getOperand(1))->getZExtValue()) + } else if (RHS.getOpcode() == PPCISD::Lo && !RHS.getConstantOperandVal(1)) FlagSet |= PPC::MOF_RPlusLo; // PPCISD::Lo. else FlagSet |= PPC::MOF_RPlusR; @@ -18129,7 +18123,7 @@ unsigned PPCTargetLowering::computeMOFlags(const SDNode *Parent, SDValue N, unsigned ParentOp = Parent->getOpcode(); if (Subtarget.isISA3_1() && ((ParentOp == ISD::INTRINSIC_W_CHAIN) || (ParentOp == ISD::INTRINSIC_VOID))) { - unsigned ID = cast<ConstantSDNode>(Parent->getOperand(1))->getZExtValue(); + unsigned ID = Parent->getConstantOperandVal(1); if ((ID == Intrinsic::ppc_vsx_lxvp) || (ID == Intrinsic::ppc_vsx_stxvp)) { SDValue IntrinOp = (ID == Intrinsic::ppc_vsx_lxvp) ? Parent->getOperand(2) diff --git a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp index f3ea0f597eec..4759aa951664 100644 --- a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp +++ b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp @@ -1832,57 +1832,18 @@ ParseStatus RISCVAsmParser::parseCSRSystemRegister(OperandVector &Operands) { if (getParser().parseIdentifier(Identifier)) return ParseStatus::Failure; - // Check for CSR names conflicts. - // Custom CSR names might conflict with CSR names in privileged spec. - // E.g. - SiFive mnscratch(0x350) and privileged spec mnscratch(0x740). - auto CheckCSRNameConflict = [&]() { - if (!(RISCVSysReg::lookupSysRegByName(Identifier))) { - Error(S, "system register use requires an option to be enabled"); - return true; - } - return false; - }; - - // First check for vendor specific CSRs. - auto SiFiveReg = RISCVSysReg::lookupSiFiveRegByName(Identifier); - if (SiFiveReg) { - if (SiFiveReg->haveVendorRequiredFeatures(getSTI().getFeatureBits())) { - Operands.push_back( - RISCVOperand::createSysReg(Identifier, S, SiFiveReg->Encoding)); - return ParseStatus::Success; - } - if (CheckCSRNameConflict()) - return ParseStatus::Failure; - } - auto SysReg = RISCVSysReg::lookupSysRegByName(Identifier); if (!SysReg) + SysReg = RISCVSysReg::lookupSysRegByAltName(Identifier); + if (!SysReg) if ((SysReg = RISCVSysReg::lookupSysRegByDeprecatedName(Identifier))) Warning(S, "'" + Identifier + "' is a deprecated alias for '" + SysReg->Name + "'"); - // Check for CSR encoding conflicts. - // Custom CSR encoding might conflict with CSR encoding in privileged spec. - // E.g. - SiFive mnscratch(0x350) and privileged spec miselect(0x350). - auto CheckCSREncodingConflict = [&]() { - auto Reg = RISCVSysReg::lookupSiFiveRegByEncoding(SysReg->Encoding); - if (Reg && Reg->haveVendorRequiredFeatures(getSTI().getFeatureBits())) { - Warning(S, "'" + Identifier + "' CSR is not available on the current " + - "subtarget. Instead '" + Reg->Name + - "' CSR will be used."); - Operands.push_back( - RISCVOperand::createSysReg(Reg->Name, S, Reg->Encoding)); - return true; - } - return false; - }; - - // Accept a named SysReg if the required features are present. + // Accept a named Sys Reg if the required features are present. if (SysReg) { if (!SysReg->haveRequiredFeatures(getSTI().getFeatureBits())) return Error(S, "system register use requires an option to be enabled"); - if (CheckCSREncodingConflict()) - return ParseStatus::Success; Operands.push_back( RISCVOperand::createSysReg(Identifier, S, SysReg->Encoding)); return ParseStatus::Success; diff --git a/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp b/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp index 53e2b6b4d94e..ed80da14c795 100644 --- a/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp +++ b/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp @@ -74,6 +74,17 @@ static DecodeStatus DecodeGPRRegisterClass(MCInst &Inst, uint32_t RegNo, return MCDisassembler::Success; } +static DecodeStatus DecodeGPRX1X5RegisterClass(MCInst &Inst, uint32_t RegNo, + uint64_t Address, + const MCDisassembler *Decoder) { + MCRegister Reg = RISCV::X0 + RegNo; + if (Reg != RISCV::X1 && Reg != RISCV::X5) + return MCDisassembler::Fail; + + Inst.addOperand(MCOperand::createReg(Reg)); + return MCDisassembler::Success; +} + static DecodeStatus DecodeFPR16RegisterClass(MCInst &Inst, uint32_t RegNo, uint64_t Address, const MCDisassembler *Decoder) { @@ -359,6 +370,10 @@ static DecodeStatus decodeRegReg(MCInst &Inst, uint32_t Insn, uint64_t Address, static DecodeStatus decodeZcmpSpimm(MCInst &Inst, unsigned Imm, uint64_t Address, const void *Decoder); +static DecodeStatus decodeCSSPushPopchk(MCInst &Inst, uint32_t Insn, + uint64_t Address, + const MCDisassembler *Decoder); + #include "RISCVGenDisassemblerTables.inc" static DecodeStatus decodeRVCInstrRdRs1ImmZero(MCInst &Inst, uint32_t Insn, @@ -373,6 +388,16 @@ static DecodeStatus decodeRVCInstrRdRs1ImmZero(MCInst &Inst, uint32_t Insn, return MCDisassembler::Success; } +static DecodeStatus decodeCSSPushPopchk(MCInst &Inst, uint32_t Insn, + uint64_t Address, + const MCDisassembler *Decoder) { + uint32_t Rs1 = fieldFromInstruction(Insn, 7, 5); + DecodeStatus Result = DecodeGPRX1X5RegisterClass(Inst, Rs1, Address, Decoder); + (void)Result; + assert(Result == MCDisassembler::Success && "Invalid register"); + return MCDisassembler::Success; +} + static DecodeStatus decodeRVCInstrRdSImm(MCInst &Inst, uint32_t Insn, uint64_t Address, const MCDisassembler *Decoder) { @@ -462,10 +487,8 @@ static DecodeStatus decodeRegReg(MCInst &Inst, uint32_t Insn, uint64_t Address, return MCDisassembler::Success; } -// spimm is based on rlist now. static DecodeStatus decodeZcmpSpimm(MCInst &Inst, unsigned Imm, uint64_t Address, const void *Decoder) { - // TODO: check if spimm matches rlist Inst.addOperand(MCOperand::createImm(Imm)); return MCDisassembler::Success; } @@ -568,8 +591,6 @@ DecodeStatus RISCVDisassembler::getInstruction(MCInst &MI, uint64_t &Size, TRY_TO_DECODE_FEATURE( RISCV::FeatureVendorXSfvfnrclipxfqf, DecoderTableXSfvfnrclipxfqf32, "SiFive FP32-to-int8 Ranged Clip Instructions opcode table"); - TRY_TO_DECODE_FEATURE(RISCV::FeatureVendorXSfcie, DecoderTableXSfcie32, - "Sifive CIE custom opcode table"); TRY_TO_DECODE_FEATURE(RISCV::FeatureVendorXCVbitmanip, DecoderTableXCVbitmanip32, "CORE-V Bit Manipulation custom opcode table"); @@ -600,6 +621,8 @@ DecodeStatus RISCVDisassembler::getInstruction(MCInst &MI, uint64_t &Size, TRY_TO_DECODE_AND_ADD_SP(!STI.hasFeature(RISCV::Feature64Bit), DecoderTableRISCV32Only_16, "RISCV32Only_16 table (16-bit Instruction)"); + TRY_TO_DECODE_FEATURE(RISCV::FeatureStdExtZicfiss, DecoderTableZicfiss16, + "RVZicfiss table (Shadow Stack)"); TRY_TO_DECODE_FEATURE(RISCV::FeatureStdExtZcmt, DecoderTableRVZcmt16, "Zcmt table (16-bit Table Jump Instructions)"); TRY_TO_DECODE_FEATURE( diff --git a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp index 28ec999157c6..079906d1958c 100644 --- a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp +++ b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp @@ -101,7 +101,7 @@ RISCVLegalizerInfo::RISCVLegalizerInfo(const RISCVSubtarget &ST) getActionDefinitionsBuilder({G_FSHL, G_FSHR}).lower(); auto &RotateActions = getActionDefinitionsBuilder({G_ROTL, G_ROTR}); - if (ST.hasStdExtZbb()) { + if (ST.hasStdExtZbb() || ST.hasStdExtZbkb()) { RotateActions.legalFor({{s32, sXLen}, {sXLen, sXLen}}); // Widen s32 rotate amount to s64 so SDAG patterns will match. if (ST.is64Bit()) diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.cpp index 66a46a485f53..74d0db545e55 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.cpp +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.cpp @@ -27,7 +27,6 @@ extern const SubtargetFeatureKV RISCVFeatureKV[RISCV::NumSubtargetFeatures]; namespace RISCVSysReg { #define GET_SysRegsList_IMPL -#define GET_SiFiveRegsList_IMPL #include "RISCVGenSearchableTables.inc" } // namespace RISCVSysReg diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h index 30ed36525e29..c32210fc1419 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h @@ -401,6 +401,7 @@ int getLoadFPImm(APFloat FPImm); namespace RISCVSysReg { struct SysReg { const char *Name; + const char *AltName; const char *DeprecatedName; unsigned Encoding; // FIXME: add these additional fields when needed. @@ -424,22 +425,9 @@ struct SysReg { return true; return (FeaturesRequired & ActiveFeatures) == FeaturesRequired; } - - bool haveVendorRequiredFeatures(const FeatureBitset &ActiveFeatures) const { - // Not in 32-bit mode. - if (isRV32Only && ActiveFeatures[RISCV::Feature64Bit]) - return false; - // No required feature associated with the system register. - if (FeaturesRequired.none()) - return false; - return (FeaturesRequired & ActiveFeatures) == FeaturesRequired; - } }; -struct SiFiveReg : SysReg {}; - #define GET_SysRegsList_DECL -#define GET_SiFiveRegsList_DECL #include "RISCVGenSearchableTables.inc" } // end namespace RISCVSysReg diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp index 195dda0b8b14..bd899495812f 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp @@ -121,11 +121,8 @@ void RISCVInstPrinter::printCSRSystemRegister(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O) { unsigned Imm = MI->getOperand(OpNo).getImm(); - auto SiFiveReg = RISCVSysReg::lookupSiFiveRegByEncoding(Imm); auto SysReg = RISCVSysReg::lookupSysRegByEncoding(Imm); - if (SiFiveReg && SiFiveReg->haveVendorRequiredFeatures(STI.getFeatureBits())) - markup(O, Markup::Register) << SiFiveReg->Name; - else if (SysReg && SysReg->haveRequiredFeatures(STI.getFeatureBits())) + if (SysReg && SysReg->haveRequiredFeatures(STI.getFeatureBits())) markup(O, Markup::Register) << SysReg->Name; else markup(O, Markup::Register) << formatImm(Imm); diff --git a/llvm/lib/Target/RISCV/RISCVCallingConv.td b/llvm/lib/Target/RISCV/RISCVCallingConv.td index 130a6ecc143d..3dd0b3723828 100644 --- a/llvm/lib/Target/RISCV/RISCVCallingConv.td +++ b/llvm/lib/Target/RISCV/RISCVCallingConv.td @@ -14,7 +14,7 @@ // RISCVISelLowering.cpp (CC_RISCV). def CSR_ILP32_LP64 - : CalleeSavedRegs<(add X1, X3, X4, X8, X9, (sequence "X%u", 18, 27))>; + : CalleeSavedRegs<(add X1, X8, X9, (sequence "X%u", 18, 27))>; def CSR_ILP32F_LP64F : CalleeSavedRegs<(add CSR_ILP32_LP64, @@ -29,7 +29,7 @@ def CSR_NoRegs : CalleeSavedRegs<(add)>; // Interrupt handler needs to save/restore all registers that are used, // both Caller and Callee saved registers. -def CSR_Interrupt : CalleeSavedRegs<(add X1, (sequence "X%u", 3, 31))>; +def CSR_Interrupt : CalleeSavedRegs<(add X1, (sequence "X%u", 5, 31))>; // Same as CSR_Interrupt, but including all 32-bit FP registers. def CSR_XLEN_F32_Interrupt: CalleeSavedRegs<(add CSR_Interrupt, diff --git a/llvm/lib/Target/RISCV/RISCVFeatures.td b/llvm/lib/Target/RISCV/RISCVFeatures.td index a66dd135ae5f..59b202606dad 100644 --- a/llvm/lib/Target/RISCV/RISCVFeatures.td +++ b/llvm/lib/Target/RISCV/RISCVFeatures.td @@ -687,6 +687,28 @@ def HasStdExtZicond : Predicate<"Subtarget->hasStdExtZicond()">, AssemblerPredicate<(all_of FeatureStdExtZicond), "'Zicond' (Integer Conditional Operations)">; +def FeatureStdExtZimop : SubtargetFeature<"experimental-zimop", "HasStdExtZimop", "true", + "'Zimop' (May-Be-Operations)">; +def HasStdExtZimop : Predicate<"Subtarget->hasStdExtZimop()">, + AssemblerPredicate<(all_of FeatureStdExtZimop), + "'Zimop' (May-Be-Operations)">; + +def FeatureStdExtZcmop : SubtargetFeature<"experimental-zcmop", "HasStdExtZcmop", "true", + "'Zcmop' (Compressed May-Be-Operations)", + [FeatureStdExtZca]>; +def HasStdExtZcmop : Predicate<"Subtarget->hasStdExtZcmop()">, + AssemblerPredicate<(all_of FeatureStdExtZcmop), + "'Zcmop' (Compressed May-Be-Operations)">; + +def FeatureStdExtZicfiss + : SubtargetFeature<"experimental-zicfiss", "HasStdExtZicfiss", "true", + "'Zicfiss' (Shadow stack)", + [FeatureStdExtZicsr, FeatureStdExtZimop]>; +def HasStdExtZicfiss : Predicate<"Subtarget->hasStdExtZicfiss()">, + AssemblerPredicate<(all_of FeatureStdExtZicfiss), + "'Zicfiss' (Shadow stack)">; +def NoHasStdExtZicfiss : Predicate<"!Subtarget->hasStdExtZicfiss()">; + def FeatureStdExtSmaia : SubtargetFeature<"smaia", "HasStdExtSmaia", "true", "'Smaia' (Smaia encompasses all added CSRs and all " @@ -813,13 +835,6 @@ def HasVendorXSfvcp : Predicate<"Subtarget->hasVendorXSfvcp()">, AssemblerPredicate<(all_of FeatureVendorXSfvcp), "'XSfvcp' (SiFive Custom Vector Coprocessor Interface Instructions)">; -def FeatureVendorXSfcie - : SubtargetFeature<"xsfcie", "HasVendorXSfcie", "true", - "'XSfcie' (SiFive Custom Instruction Extension SCIE.)">; -def HasVendorXSfcie : Predicate<"Subtarget->hasVendorXSfcie()">, - AssemblerPredicate<(all_of FeatureVendorXSfcie), - "'XSfcie' (SiFive Custom Instruction Extension SCIE.)">; - def FeatureVendorXSfvqmaccdod : SubtargetFeature<"xsfvqmaccdod", "HasVendorXSfvqmaccdod", "true", "'XSfvqmaccdod' (SiFive Int8 Matrix Multiplication Instructions (2-by-8 and 8-by-2))", diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp index 098a320c9153..bfa3bf3cc74e 100644 --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -1360,7 +1360,7 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) { if (N0.getOpcode() != ISD::AND || !isa<ConstantSDNode>(N0.getOperand(1))) break; - uint64_t C2 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue(); + uint64_t C2 = N0.getConstantOperandVal(1); // Constant should be a mask. if (!isMask_64(C2)) @@ -1604,7 +1604,7 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) { break; } case ISD::INTRINSIC_W_CHAIN: { - unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue(); + unsigned IntNo = Node->getConstantOperandVal(1); switch (IntNo) { // By default we do not custom select any intrinsic. default: @@ -1825,7 +1825,7 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) { break; } case ISD::INTRINSIC_VOID: { - unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue(); + unsigned IntNo = Node->getConstantOperandVal(1); switch (IntNo) { case Intrinsic::riscv_vsseg2: case Intrinsic::riscv_vsseg3: diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index c2508a158837..03a59f8a8b57 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -1374,8 +1374,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setPrefLoopAlignment(Subtarget.getPrefLoopAlignment()); setTargetDAGCombine({ISD::INTRINSIC_VOID, ISD::INTRINSIC_W_CHAIN, - ISD::INTRINSIC_WO_CHAIN, ISD::ADD, ISD::SUB, ISD::AND, - ISD::OR, ISD::XOR, ISD::SETCC, ISD::SELECT}); + ISD::INTRINSIC_WO_CHAIN, ISD::ADD, ISD::SUB, ISD::MUL, + ISD::AND, ISD::OR, ISD::XOR, ISD::SETCC, ISD::SELECT}); if (Subtarget.is64Bit()) setTargetDAGCombine(ISD::SRA); @@ -7235,7 +7235,7 @@ SDValue RISCVTargetLowering::lowerFRAMEADDR(SDValue Op, EVT VT = Op.getValueType(); SDLoc DL(Op); SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT); - unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); + unsigned Depth = Op.getConstantOperandVal(0); while (Depth--) { int Offset = -(XLenInBytes * 2); SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr, @@ -7260,7 +7260,7 @@ SDValue RISCVTargetLowering::lowerRETURNADDR(SDValue Op, EVT VT = Op.getValueType(); SDLoc DL(Op); - unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); + unsigned Depth = Op.getConstantOperandVal(0); if (Depth) { int Off = -XLenInBytes; SDValue FrameAddr = lowerFRAMEADDR(Op, DAG); @@ -11731,7 +11731,7 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N, break; } case ISD::INTRINSIC_WO_CHAIN: { - unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); + unsigned IntNo = N->getConstantOperandVal(0); switch (IntNo) { default: llvm_unreachable( @@ -12850,9 +12850,9 @@ struct CombineResult; /// Helper class for folding sign/zero extensions. /// In particular, this class is used for the following combines: -/// add_vl -> vwadd(u) | vwadd(u)_w -/// sub_vl -> vwsub(u) | vwsub(u)_w -/// mul_vl -> vwmul(u) | vwmul_su +/// add | add_vl -> vwadd(u) | vwadd(u)_w +/// sub | sub_vl -> vwsub(u) | vwsub(u)_w +/// mul | mul_vl -> vwmul(u) | vwmul_su /// /// An object of this class represents an operand of the operation we want to /// combine. @@ -12897,6 +12897,8 @@ struct NodeExtensionHelper { /// E.g., for zext(a), this would return a. SDValue getSource() const { switch (OrigOperand.getOpcode()) { + case ISD::ZERO_EXTEND: + case ISD::SIGN_EXTEND: case RISCVISD::VSEXT_VL: case RISCVISD::VZEXT_VL: return OrigOperand.getOperand(0); @@ -12913,7 +12915,8 @@ struct NodeExtensionHelper { /// Get or create a value that can feed \p Root with the given extension \p /// SExt. If \p SExt is std::nullopt, this returns the source of this operand. /// \see ::getSource(). - SDValue getOrCreateExtendedOp(const SDNode *Root, SelectionDAG &DAG, + SDValue getOrCreateExtendedOp(SDNode *Root, SelectionDAG &DAG, + const RISCVSubtarget &Subtarget, std::optional<bool> SExt) const { if (!SExt.has_value()) return OrigOperand; @@ -12928,8 +12931,10 @@ struct NodeExtensionHelper { // If we need an extension, we should be changing the type. SDLoc DL(Root); - auto [Mask, VL] = getMaskAndVL(Root); + auto [Mask, VL] = getMaskAndVL(Root, DAG, Subtarget); switch (OrigOperand.getOpcode()) { + case ISD::ZERO_EXTEND: + case ISD::SIGN_EXTEND: case RISCVISD::VSEXT_VL: case RISCVISD::VZEXT_VL: return DAG.getNode(ExtOpc, DL, NarrowVT, Source, Mask, VL); @@ -12969,12 +12974,15 @@ struct NodeExtensionHelper { /// \pre \p Opcode represents a supported root (\see ::isSupportedRoot()). static unsigned getSameExtensionOpcode(unsigned Opcode, bool IsSExt) { switch (Opcode) { + case ISD::ADD: case RISCVISD::ADD_VL: case RISCVISD::VWADD_W_VL: case RISCVISD::VWADDU_W_VL: return IsSExt ? RISCVISD::VWADD_VL : RISCVISD::VWADDU_VL; + case ISD::MUL: case RISCVISD::MUL_VL: return IsSExt ? RISCVISD::VWMUL_VL : RISCVISD::VWMULU_VL; + case ISD::SUB: case RISCVISD::SUB_VL: case RISCVISD::VWSUB_W_VL: case RISCVISD::VWSUBU_W_VL: @@ -12987,7 +12995,8 @@ struct NodeExtensionHelper { /// Get the opcode to materialize \p Opcode(sext(a), zext(b)) -> /// newOpcode(a, b). static unsigned getSUOpcode(unsigned Opcode) { - assert(Opcode == RISCVISD::MUL_VL && "SU is only supported for MUL"); + assert((Opcode == RISCVISD::MUL_VL || Opcode == ISD::MUL) && + "SU is only supported for MUL"); return RISCVISD::VWMULSU_VL; } @@ -12995,8 +13004,10 @@ struct NodeExtensionHelper { /// newOpcode(a, b). static unsigned getWOpcode(unsigned Opcode, bool IsSExt) { switch (Opcode) { + case ISD::ADD: case RISCVISD::ADD_VL: return IsSExt ? RISCVISD::VWADD_W_VL : RISCVISD::VWADDU_W_VL; + case ISD::SUB: case RISCVISD::SUB_VL: return IsSExt ? RISCVISD::VWSUB_W_VL : RISCVISD::VWSUBU_W_VL; default: @@ -13006,19 +13017,33 @@ struct NodeExtensionHelper { using CombineToTry = std::function<std::optional<CombineResult>( SDNode * /*Root*/, const NodeExtensionHelper & /*LHS*/, - const NodeExtensionHelper & /*RHS*/)>; + const NodeExtensionHelper & /*RHS*/, SelectionDAG &, + const RISCVSubtarget &)>; /// Check if this node needs to be fully folded or extended for all users. bool needToPromoteOtherUsers() const { return EnforceOneUse; } /// Helper method to set the various fields of this struct based on the /// type of \p Root. - void fillUpExtensionSupport(SDNode *Root, SelectionDAG &DAG) { + void fillUpExtensionSupport(SDNode *Root, SelectionDAG &DAG, + const RISCVSubtarget &Subtarget) { SupportsZExt = false; SupportsSExt = false; EnforceOneUse = true; CheckMask = true; - switch (OrigOperand.getOpcode()) { + unsigned Opc = OrigOperand.getOpcode(); + switch (Opc) { + case ISD::ZERO_EXTEND: + case ISD::SIGN_EXTEND: { + if (OrigOperand.getValueType().isVector()) { + SupportsZExt = Opc == ISD::ZERO_EXTEND; + SupportsSExt = Opc == ISD::SIGN_EXTEND; + SDLoc DL(Root); + MVT VT = Root->getSimpleValueType(0); + std::tie(Mask, VL) = getDefaultScalableVLOps(VT, DL, DAG, Subtarget); + } + break; + } case RISCVISD::VZEXT_VL: SupportsZExt = true; Mask = OrigOperand.getOperand(1); @@ -13074,8 +13099,16 @@ struct NodeExtensionHelper { } /// Check if \p Root supports any extension folding combines. - static bool isSupportedRoot(const SDNode *Root) { + static bool isSupportedRoot(const SDNode *Root, const SelectionDAG &DAG) { switch (Root->getOpcode()) { + case ISD::ADD: + case ISD::SUB: + case ISD::MUL: { + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + if (!TLI.isTypeLegal(Root->getValueType(0))) + return false; + return Root->getValueType(0).isScalableVector(); + } case RISCVISD::ADD_VL: case RISCVISD::MUL_VL: case RISCVISD::VWADD_W_VL: @@ -13090,9 +13123,10 @@ struct NodeExtensionHelper { } /// Build a NodeExtensionHelper for \p Root.getOperand(\p OperandIdx). - NodeExtensionHelper(SDNode *Root, unsigned OperandIdx, SelectionDAG &DAG) { - assert(isSupportedRoot(Root) && "Trying to build an helper with an " - "unsupported root"); + NodeExtensionHelper(SDNode *Root, unsigned OperandIdx, SelectionDAG &DAG, + const RISCVSubtarget &Subtarget) { + assert(isSupportedRoot(Root, DAG) && "Trying to build an helper with an " + "unsupported root"); assert(OperandIdx < 2 && "Requesting something else than LHS or RHS"); OrigOperand = Root->getOperand(OperandIdx); @@ -13108,7 +13142,7 @@ struct NodeExtensionHelper { SupportsZExt = Opc == RISCVISD::VWADDU_W_VL || Opc == RISCVISD::VWSUBU_W_VL; SupportsSExt = !SupportsZExt; - std::tie(Mask, VL) = getMaskAndVL(Root); + std::tie(Mask, VL) = getMaskAndVL(Root, DAG, Subtarget); CheckMask = true; // There's no existing extension here, so we don't have to worry about // making sure it gets removed. @@ -13117,7 +13151,7 @@ struct NodeExtensionHelper { } [[fallthrough]]; default: - fillUpExtensionSupport(Root, DAG); + fillUpExtensionSupport(Root, DAG, Subtarget); break; } } @@ -13133,14 +13167,27 @@ struct NodeExtensionHelper { } /// Helper function to get the Mask and VL from \p Root. - static std::pair<SDValue, SDValue> getMaskAndVL(const SDNode *Root) { - assert(isSupportedRoot(Root) && "Unexpected root"); - return std::make_pair(Root->getOperand(3), Root->getOperand(4)); + static std::pair<SDValue, SDValue> + getMaskAndVL(const SDNode *Root, SelectionDAG &DAG, + const RISCVSubtarget &Subtarget) { + assert(isSupportedRoot(Root, DAG) && "Unexpected root"); + switch (Root->getOpcode()) { + case ISD::ADD: + case ISD::SUB: + case ISD::MUL: { + SDLoc DL(Root); + MVT VT = Root->getSimpleValueType(0); + return getDefaultScalableVLOps(VT, DL, DAG, Subtarget); + } + default: + return std::make_pair(Root->getOperand(3), Root->getOperand(4)); + } } /// Check if the Mask and VL of this operand are compatible with \p Root. - bool areVLAndMaskCompatible(const SDNode *Root) const { - auto [Mask, VL] = getMaskAndVL(Root); + bool areVLAndMaskCompatible(SDNode *Root, SelectionDAG &DAG, + const RISCVSubtarget &Subtarget) const { + auto [Mask, VL] = getMaskAndVL(Root, DAG, Subtarget); return isMaskCompatible(Mask) && isVLCompatible(VL); } @@ -13148,11 +13195,14 @@ struct NodeExtensionHelper { /// foldings that are supported by this class. static bool isCommutative(const SDNode *N) { switch (N->getOpcode()) { + case ISD::ADD: + case ISD::MUL: case RISCVISD::ADD_VL: case RISCVISD::MUL_VL: case RISCVISD::VWADD_W_VL: case RISCVISD::VWADDU_W_VL: return true; + case ISD::SUB: case RISCVISD::SUB_VL: case RISCVISD::VWSUB_W_VL: case RISCVISD::VWSUBU_W_VL: @@ -13197,14 +13247,25 @@ struct CombineResult { /// Return a value that uses TargetOpcode and that can be used to replace /// Root. /// The actual replacement is *not* done in that method. - SDValue materialize(SelectionDAG &DAG) const { + SDValue materialize(SelectionDAG &DAG, + const RISCVSubtarget &Subtarget) const { SDValue Mask, VL, Merge; - std::tie(Mask, VL) = NodeExtensionHelper::getMaskAndVL(Root); - Merge = Root->getOperand(2); + std::tie(Mask, VL) = + NodeExtensionHelper::getMaskAndVL(Root, DAG, Subtarget); + switch (Root->getOpcode()) { + default: + Merge = Root->getOperand(2); + break; + case ISD::ADD: + case ISD::SUB: + case ISD::MUL: + Merge = DAG.getUNDEF(Root->getValueType(0)); + break; + } return DAG.getNode(TargetOpcode, SDLoc(Root), Root->getValueType(0), - LHS.getOrCreateExtendedOp(Root, DAG, SExtLHS), - RHS.getOrCreateExtendedOp(Root, DAG, SExtRHS), Merge, - Mask, VL); + LHS.getOrCreateExtendedOp(Root, DAG, Subtarget, SExtLHS), + RHS.getOrCreateExtendedOp(Root, DAG, Subtarget, SExtRHS), + Merge, Mask, VL); } }; @@ -13221,15 +13282,16 @@ struct CombineResult { static std::optional<CombineResult> canFoldToVWWithSameExtensionImpl(SDNode *Root, const NodeExtensionHelper &LHS, const NodeExtensionHelper &RHS, bool AllowSExt, - bool AllowZExt) { + bool AllowZExt, SelectionDAG &DAG, + const RISCVSubtarget &Subtarget) { assert((AllowSExt || AllowZExt) && "Forgot to set what you want?"); - if (!LHS.areVLAndMaskCompatible(Root) || !RHS.areVLAndMaskCompatible(Root)) + if (!LHS.areVLAndMaskCompatible(Root, DAG, Subtarget) || + !RHS.areVLAndMaskCompatible(Root, DAG, Subtarget)) return std::nullopt; if (AllowZExt && LHS.SupportsZExt && RHS.SupportsZExt) return CombineResult(NodeExtensionHelper::getSameExtensionOpcode( Root->getOpcode(), /*IsSExt=*/false), - Root, LHS, /*SExtLHS=*/false, RHS, - /*SExtRHS=*/false); + Root, LHS, /*SExtLHS=*/false, RHS, /*SExtRHS=*/false); if (AllowSExt && LHS.SupportsSExt && RHS.SupportsSExt) return CombineResult(NodeExtensionHelper::getSameExtensionOpcode( Root->getOpcode(), /*IsSExt=*/true), @@ -13246,9 +13308,10 @@ canFoldToVWWithSameExtensionImpl(SDNode *Root, const NodeExtensionHelper &LHS, /// can be used to apply the pattern. static std::optional<CombineResult> canFoldToVWWithSameExtension(SDNode *Root, const NodeExtensionHelper &LHS, - const NodeExtensionHelper &RHS) { + const NodeExtensionHelper &RHS, SelectionDAG &DAG, + const RISCVSubtarget &Subtarget) { return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, /*AllowSExt=*/true, - /*AllowZExt=*/true); + /*AllowZExt=*/true, DAG, Subtarget); } /// Check if \p Root follows a pattern Root(LHS, ext(RHS)) @@ -13257,8 +13320,9 @@ canFoldToVWWithSameExtension(SDNode *Root, const NodeExtensionHelper &LHS, /// can be used to apply the pattern. static std::optional<CombineResult> canFoldToVW_W(SDNode *Root, const NodeExtensionHelper &LHS, - const NodeExtensionHelper &RHS) { - if (!RHS.areVLAndMaskCompatible(Root)) + const NodeExtensionHelper &RHS, SelectionDAG &DAG, + const RISCVSubtarget &Subtarget) { + if (!RHS.areVLAndMaskCompatible(Root, DAG, Subtarget)) return std::nullopt; // FIXME: Is it useful to form a vwadd.wx or vwsub.wx if it removes a scalar @@ -13282,9 +13346,10 @@ canFoldToVW_W(SDNode *Root, const NodeExtensionHelper &LHS, /// can be used to apply the pattern. static std::optional<CombineResult> canFoldToVWWithSEXT(SDNode *Root, const NodeExtensionHelper &LHS, - const NodeExtensionHelper &RHS) { + const NodeExtensionHelper &RHS, SelectionDAG &DAG, + const RISCVSubtarget &Subtarget) { return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, /*AllowSExt=*/true, - /*AllowZExt=*/false); + /*AllowZExt=*/false, DAG, Subtarget); } /// Check if \p Root follows a pattern Root(zext(LHS), zext(RHS)) @@ -13293,9 +13358,10 @@ canFoldToVWWithSEXT(SDNode *Root, const NodeExtensionHelper &LHS, /// can be used to apply the pattern. static std::optional<CombineResult> canFoldToVWWithZEXT(SDNode *Root, const NodeExtensionHelper &LHS, - const NodeExtensionHelper &RHS) { + const NodeExtensionHelper &RHS, SelectionDAG &DAG, + const RISCVSubtarget &Subtarget) { return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, /*AllowSExt=*/false, - /*AllowZExt=*/true); + /*AllowZExt=*/true, DAG, Subtarget); } /// Check if \p Root follows a pattern Root(sext(LHS), zext(RHS)) @@ -13304,10 +13370,13 @@ canFoldToVWWithZEXT(SDNode *Root, const NodeExtensionHelper &LHS, /// can be used to apply the pattern. static std::optional<CombineResult> canFoldToVW_SU(SDNode *Root, const NodeExtensionHelper &LHS, - const NodeExtensionHelper &RHS) { + const NodeExtensionHelper &RHS, SelectionDAG &DAG, + const RISCVSubtarget &Subtarget) { + if (!LHS.SupportsSExt || !RHS.SupportsZExt) return std::nullopt; - if (!LHS.areVLAndMaskCompatible(Root) || !RHS.areVLAndMaskCompatible(Root)) + if (!LHS.areVLAndMaskCompatible(Root, DAG, Subtarget) || + !RHS.areVLAndMaskCompatible(Root, DAG, Subtarget)) return std::nullopt; return CombineResult(NodeExtensionHelper::getSUOpcode(Root->getOpcode()), Root, LHS, /*SExtLHS=*/true, RHS, /*SExtRHS=*/false); @@ -13317,6 +13386,8 @@ SmallVector<NodeExtensionHelper::CombineToTry> NodeExtensionHelper::getSupportedFoldings(const SDNode *Root) { SmallVector<CombineToTry> Strategies; switch (Root->getOpcode()) { + case ISD::ADD: + case ISD::SUB: case RISCVISD::ADD_VL: case RISCVISD::SUB_VL: // add|sub -> vwadd(u)|vwsub(u) @@ -13324,6 +13395,7 @@ NodeExtensionHelper::getSupportedFoldings(const SDNode *Root) { // add|sub -> vwadd(u)_w|vwsub(u)_w Strategies.push_back(canFoldToVW_W); break; + case ISD::MUL: case RISCVISD::MUL_VL: // mul -> vwmul(u) Strategies.push_back(canFoldToVWWithSameExtension); @@ -13354,12 +13426,14 @@ NodeExtensionHelper::getSupportedFoldings(const SDNode *Root) { /// mul_vl -> vwmul(u) | vwmul_su /// vwadd_w(u) -> vwadd(u) /// vwub_w(u) -> vwadd(u) -static SDValue -combineBinOp_VLToVWBinOp_VL(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { +static SDValue combineBinOp_VLToVWBinOp_VL(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI, + const RISCVSubtarget &Subtarget) { SelectionDAG &DAG = DCI.DAG; - assert(NodeExtensionHelper::isSupportedRoot(N) && - "Shouldn't have called this method"); + if (!NodeExtensionHelper::isSupportedRoot(N, DAG)) + return SDValue(); + SmallVector<SDNode *> Worklist; SmallSet<SDNode *, 8> Inserted; Worklist.push_back(N); @@ -13368,11 +13442,11 @@ combineBinOp_VLToVWBinOp_VL(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { while (!Worklist.empty()) { SDNode *Root = Worklist.pop_back_val(); - if (!NodeExtensionHelper::isSupportedRoot(Root)) + if (!NodeExtensionHelper::isSupportedRoot(Root, DAG)) return SDValue(); - NodeExtensionHelper LHS(N, 0, DAG); - NodeExtensionHelper RHS(N, 1, DAG); + NodeExtensionHelper LHS(N, 0, DAG, Subtarget); + NodeExtensionHelper RHS(N, 1, DAG, Subtarget); auto AppendUsersIfNeeded = [&Worklist, &Inserted](const NodeExtensionHelper &Op) { if (Op.needToPromoteOtherUsers()) { @@ -13399,7 +13473,8 @@ combineBinOp_VLToVWBinOp_VL(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { for (NodeExtensionHelper::CombineToTry FoldingStrategy : FoldingStrategies) { - std::optional<CombineResult> Res = FoldingStrategy(N, LHS, RHS); + std::optional<CombineResult> Res = + FoldingStrategy(N, LHS, RHS, DAG, Subtarget); if (Res) { Matched = true; CombinesToApply.push_back(*Res); @@ -13428,7 +13503,7 @@ combineBinOp_VLToVWBinOp_VL(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { SmallVector<std::pair<SDValue, SDValue>> ValuesToReplace; ValuesToReplace.reserve(CombinesToApply.size()); for (CombineResult Res : CombinesToApply) { - SDValue NewValue = Res.materialize(DAG); + SDValue NewValue = Res.materialize(DAG, Subtarget); if (!InputRootReplacement) { assert(Res.Root == N && "First element is expected to be the current node"); @@ -14078,7 +14153,7 @@ static SDValue performSRACombine(SDNode *N, SelectionDAG &DAG, for (SDNode *U : N0->uses()) { if (U->getOpcode() != ISD::SRA || !isa<ConstantSDNode>(U->getOperand(1)) || - cast<ConstantSDNode>(U->getOperand(1))->getZExtValue() > 32) + U->getConstantOperandVal(1) > 32) return SDValue(); } @@ -14700,13 +14775,20 @@ static SDValue performCONCAT_VECTORSCombine(SDNode *N, SelectionDAG &DAG, static SDValue combineToVWMACC(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget) { - assert(N->getOpcode() == RISCVISD::ADD_VL); + + assert(N->getOpcode() == RISCVISD::ADD_VL || N->getOpcode() == ISD::ADD); + + if (N->getValueType(0).isFixedLengthVector()) + return SDValue(); + SDValue Addend = N->getOperand(0); SDValue MulOp = N->getOperand(1); - SDValue AddMergeOp = N->getOperand(2); - if (!AddMergeOp.isUndef()) - return SDValue(); + if (N->getOpcode() == RISCVISD::ADD_VL) { + SDValue AddMergeOp = N->getOperand(2); + if (!AddMergeOp.isUndef()) + return SDValue(); + } auto IsVWMulOpc = [](unsigned Opc) { switch (Opc) { @@ -14730,8 +14812,16 @@ static SDValue combineToVWMACC(SDNode *N, SelectionDAG &DAG, if (!MulMergeOp.isUndef()) return SDValue(); - SDValue AddMask = N->getOperand(3); - SDValue AddVL = N->getOperand(4); + auto [AddMask, AddVL] = [](SDNode *N, SelectionDAG &DAG, + const RISCVSubtarget &Subtarget) { + if (N->getOpcode() == ISD::ADD) { + SDLoc DL(N); + return getDefaultScalableVLOps(N->getSimpleValueType(0), DL, DAG, + Subtarget); + } + return std::make_pair(N->getOperand(3), N->getOperand(4)); + }(N, DAG, Subtarget); + SDValue MulMask = MulOp.getOperand(3); SDValue MulVL = MulOp.getOperand(4); @@ -14997,10 +15087,18 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, return DAG.getNode(ISD::AND, DL, VT, NewFMV, DAG.getConstant(~SignBit, DL, VT)); } - case ISD::ADD: + case ISD::ADD: { + if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget)) + return V; + if (SDValue V = combineToVWMACC(N, DAG, Subtarget)) + return V; return performADDCombine(N, DAG, Subtarget); - case ISD::SUB: + } + case ISD::SUB: { + if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget)) + return V; return performSUBCombine(N, DAG, Subtarget); + } case ISD::AND: return performANDCombine(N, DCI, Subtarget); case ISD::OR: @@ -15008,6 +15106,8 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, case ISD::XOR: return performXORCombine(N, DAG, Subtarget); case ISD::MUL: + if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget)) + return V; return performMULCombine(N, DAG); case ISD::FADD: case ISD::UMAX: @@ -15484,7 +15584,7 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, break; } case RISCVISD::ADD_VL: - if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI)) + if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget)) return V; return combineToVWMACC(N, DAG, Subtarget); case RISCVISD::SUB_VL: @@ -15493,7 +15593,7 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, case RISCVISD::VWSUB_W_VL: case RISCVISD::VWSUBU_W_VL: case RISCVISD::MUL_VL: - return combineBinOp_VLToVWBinOp_VL(N, DCI); + return combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget); case RISCVISD::VFMADD_VL: case RISCVISD::VFNMADD_VL: case RISCVISD::VFMSUB_VL: diff --git a/llvm/lib/Target/RISCV/RISCVInsertWriteVXRM.cpp b/llvm/lib/Target/RISCV/RISCVInsertWriteVXRM.cpp index de2227f82192..e487cc8b2e20 100644 --- a/llvm/lib/Target/RISCV/RISCVInsertWriteVXRM.cpp +++ b/llvm/lib/Target/RISCV/RISCVInsertWriteVXRM.cpp @@ -198,13 +198,23 @@ char RISCVInsertWriteVXRM::ID = 0; INITIALIZE_PASS(RISCVInsertWriteVXRM, DEBUG_TYPE, RISCV_INSERT_WRITE_VXRM_NAME, false, false) +static bool ignoresVXRM(const MachineInstr &MI) { + switch (RISCV::getRVVMCOpcode(MI.getOpcode())) { + default: + return false; + case RISCV::VNCLIP_WI: + case RISCV::VNCLIPU_WI: + return MI.getOperand(3).getImm() == 0; + } +} + bool RISCVInsertWriteVXRM::computeVXRMChanges(const MachineBasicBlock &MBB) { BlockData &BBInfo = BlockInfo[MBB.getNumber()]; bool NeedVXRMWrite = false; for (const MachineInstr &MI : MBB) { int VXRMIdx = RISCVII::getVXRMOpNum(MI.getDesc()); - if (VXRMIdx >= 0) { + if (VXRMIdx >= 0 && !ignoresVXRM(MI)) { unsigned NewVXRMImm = MI.getOperand(VXRMIdx).getImm(); if (!BBInfo.VXRMUse.isValid()) @@ -356,7 +366,7 @@ void RISCVInsertWriteVXRM::emitWriteVXRM(MachineBasicBlock &MBB) { for (MachineInstr &MI : MBB) { int VXRMIdx = RISCVII::getVXRMOpNum(MI.getDesc()); - if (VXRMIdx >= 0) { + if (VXRMIdx >= 0 && !ignoresVXRM(MI)) { unsigned NewVXRMImm = MI.getOperand(VXRMIdx).getImm(); if (PendingInsert || !Info.isStatic() || diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp index 1dcff7eb563e..cd98438eed88 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp @@ -2282,9 +2282,14 @@ bool RISCVInstrInfo::shouldClusterMemOps( return false; } - // TODO: Use a more carefully chosen heuristic, e.g. only cluster if offsets - // indicate they likely share a cache line. - return ClusterSize <= 4; + unsigned CacheLineSize = + BaseOps1.front()->getParent()->getMF()->getSubtarget().getCacheLineSize(); + // Assume a cache line size of 64 bytes if no size is set in RISCVSubtarget. + CacheLineSize = CacheLineSize ? CacheLineSize : 64; + // Cluster if the memory operations are on the same or a neighbouring cache + // line, but limit the maximum ClusterSize to avoid creating too much + // additional register pressure. + return ClusterSize <= 4 && std::abs(Offset1 - Offset2) < CacheLineSize; } // Set BaseReg (the base register operand), Offset (the byte offset being diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/llvm/lib/Target/RISCV/RISCVInstrInfo.td index edc08187d8f7..35e8edf5d2fa 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td @@ -2111,13 +2111,16 @@ include "RISCVInstrInfoZk.td" include "RISCVInstrInfoV.td" include "RISCVInstrInfoZvk.td" -// Integer -include "RISCVInstrInfoZicbo.td" -include "RISCVInstrInfoZicond.td" - // Compressed include "RISCVInstrInfoC.td" include "RISCVInstrInfoZc.td" +include "RISCVInstrInfoZcmop.td" + +// Integer +include "RISCVInstrInfoZimop.td" +include "RISCVInstrInfoZicbo.td" +include "RISCVInstrInfoZicond.td" +include "RISCVInstrInfoZicfiss.td" //===----------------------------------------------------------------------===// // Vendor extensions diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td index 488ffa73f4e4..30deeaa06448 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td @@ -257,13 +257,13 @@ class SegRegClass<LMULInfo m, int nf> { // Vector register and vector group type information. //===----------------------------------------------------------------------===// -class VTypeInfo<ValueType Vec, ValueType Mas, int Sew, VReg Reg, LMULInfo M, +class VTypeInfo<ValueType Vec, ValueType Mas, int Sew, LMULInfo M, ValueType Scal = XLenVT, RegisterClass ScalarReg = GPR> { ValueType Vector = Vec; ValueType Mask = Mas; int SEW = Sew; int Log2SEW = !logtwo(Sew); - VReg RegClass = Reg; + VReg RegClass = M.vrclass; LMULInfo LMul = M; ValueType Scalar = Scal; RegisterClass ScalarRegClass = ScalarReg; @@ -279,9 +279,9 @@ class VTypeInfo<ValueType Vec, ValueType Mas, int Sew, VReg Reg, LMULInfo M, } class GroupVTypeInfo<ValueType Vec, ValueType VecM1, ValueType Mas, int Sew, - VReg Reg, LMULInfo M, ValueType Scal = XLenVT, + LMULInfo M, ValueType Scal = XLenVT, RegisterClass ScalarReg = GPR> - : VTypeInfo<Vec, Mas, Sew, Reg, M, Scal, ScalarReg> { + : VTypeInfo<Vec, Mas, Sew, M, Scal, ScalarReg> { ValueType VectorM1 = VecM1; } @@ -289,70 +289,70 @@ defset list<VTypeInfo> AllVectors = { defset list<VTypeInfo> AllIntegerVectors = { defset list<VTypeInfo> NoGroupIntegerVectors = { defset list<VTypeInfo> FractionalGroupIntegerVectors = { - def VI8MF8: VTypeInfo<vint8mf8_t, vbool64_t, 8, VR, V_MF8>; - def VI8MF4: VTypeInfo<vint8mf4_t, vbool32_t, 8, VR, V_MF4>; - def VI8MF2: VTypeInfo<vint8mf2_t, vbool16_t, 8, VR, V_MF2>; - def VI16MF4: VTypeInfo<vint16mf4_t, vbool64_t, 16, VR, V_MF4>; - def VI16MF2: VTypeInfo<vint16mf2_t, vbool32_t, 16, VR, V_MF2>; - def VI32MF2: VTypeInfo<vint32mf2_t, vbool64_t, 32, VR, V_MF2>; + def VI8MF8: VTypeInfo<vint8mf8_t, vbool64_t, 8, V_MF8>; + def VI8MF4: VTypeInfo<vint8mf4_t, vbool32_t, 8, V_MF4>; + def VI8MF2: VTypeInfo<vint8mf2_t, vbool16_t, 8, V_MF2>; + def VI16MF4: VTypeInfo<vint16mf4_t, vbool64_t, 16, V_MF4>; + def VI16MF2: VTypeInfo<vint16mf2_t, vbool32_t, 16, V_MF2>; + def VI32MF2: VTypeInfo<vint32mf2_t, vbool64_t, 32, V_MF2>; } - def VI8M1: VTypeInfo<vint8m1_t, vbool8_t, 8, VR, V_M1>; - def VI16M1: VTypeInfo<vint16m1_t, vbool16_t, 16, VR, V_M1>; - def VI32M1: VTypeInfo<vint32m1_t, vbool32_t, 32, VR, V_M1>; - def VI64M1: VTypeInfo<vint64m1_t, vbool64_t, 64, VR, V_M1>; + def VI8M1: VTypeInfo<vint8m1_t, vbool8_t, 8, V_M1>; + def VI16M1: VTypeInfo<vint16m1_t, vbool16_t, 16, V_M1>; + def VI32M1: VTypeInfo<vint32m1_t, vbool32_t, 32, V_M1>; + def VI64M1: VTypeInfo<vint64m1_t, vbool64_t, 64, V_M1>; } defset list<GroupVTypeInfo> GroupIntegerVectors = { - def VI8M2: GroupVTypeInfo<vint8m2_t, vint8m1_t, vbool4_t, 8, VRM2, V_M2>; - def VI8M4: GroupVTypeInfo<vint8m4_t, vint8m1_t, vbool2_t, 8, VRM4, V_M4>; - def VI8M8: GroupVTypeInfo<vint8m8_t, vint8m1_t, vbool1_t, 8, VRM8, V_M8>; + def VI8M2: GroupVTypeInfo<vint8m2_t, vint8m1_t, vbool4_t, 8, V_M2>; + def VI8M4: GroupVTypeInfo<vint8m4_t, vint8m1_t, vbool2_t, 8, V_M4>; + def VI8M8: GroupVTypeInfo<vint8m8_t, vint8m1_t, vbool1_t, 8, V_M8>; - def VI16M2: GroupVTypeInfo<vint16m2_t,vint16m1_t,vbool8_t, 16,VRM2, V_M2>; - def VI16M4: GroupVTypeInfo<vint16m4_t,vint16m1_t,vbool4_t, 16,VRM4, V_M4>; - def VI16M8: GroupVTypeInfo<vint16m8_t,vint16m1_t,vbool2_t, 16,VRM8, V_M8>; + def VI16M2: GroupVTypeInfo<vint16m2_t, vint16m1_t, vbool8_t, 16, V_M2>; + def VI16M4: GroupVTypeInfo<vint16m4_t, vint16m1_t, vbool4_t, 16, V_M4>; + def VI16M8: GroupVTypeInfo<vint16m8_t, vint16m1_t, vbool2_t, 16, V_M8>; - def VI32M2: GroupVTypeInfo<vint32m2_t,vint32m1_t,vbool16_t,32,VRM2, V_M2>; - def VI32M4: GroupVTypeInfo<vint32m4_t,vint32m1_t,vbool8_t, 32,VRM4, V_M4>; - def VI32M8: GroupVTypeInfo<vint32m8_t,vint32m1_t,vbool4_t, 32,VRM8, V_M8>; + def VI32M2: GroupVTypeInfo<vint32m2_t, vint32m1_t, vbool16_t, 32, V_M2>; + def VI32M4: GroupVTypeInfo<vint32m4_t, vint32m1_t, vbool8_t, 32, V_M4>; + def VI32M8: GroupVTypeInfo<vint32m8_t, vint32m1_t, vbool4_t, 32, V_M8>; - def VI64M2: GroupVTypeInfo<vint64m2_t,vint64m1_t,vbool32_t,64,VRM2, V_M2>; - def VI64M4: GroupVTypeInfo<vint64m4_t,vint64m1_t,vbool16_t,64,VRM4, V_M4>; - def VI64M8: GroupVTypeInfo<vint64m8_t,vint64m1_t,vbool8_t, 64,VRM8, V_M8>; + def VI64M2: GroupVTypeInfo<vint64m2_t, vint64m1_t, vbool32_t, 64, V_M2>; + def VI64M4: GroupVTypeInfo<vint64m4_t, vint64m1_t, vbool16_t, 64, V_M4>; + def VI64M8: GroupVTypeInfo<vint64m8_t, vint64m1_t, vbool8_t, 64, V_M8>; } } defset list<VTypeInfo> AllFloatVectors = { defset list<VTypeInfo> NoGroupFloatVectors = { defset list<VTypeInfo> FractionalGroupFloatVectors = { - def VF16MF4: VTypeInfo<vfloat16mf4_t, vbool64_t, 16, VR, V_MF4, f16, FPR16>; - def VF16MF2: VTypeInfo<vfloat16mf2_t, vbool32_t, 16, VR, V_MF2, f16, FPR16>; - def VF32MF2: VTypeInfo<vfloat32mf2_t,vbool64_t, 32, VR, V_MF2, f32, FPR32>; + def VF16MF4: VTypeInfo<vfloat16mf4_t, vbool64_t, 16, V_MF4, f16, FPR16>; + def VF16MF2: VTypeInfo<vfloat16mf2_t, vbool32_t, 16, V_MF2, f16, FPR16>; + def VF32MF2: VTypeInfo<vfloat32mf2_t, vbool64_t, 32, V_MF2, f32, FPR32>; } - def VF16M1: VTypeInfo<vfloat16m1_t, vbool16_t, 16, VR, V_M1, f16, FPR16>; - def VF32M1: VTypeInfo<vfloat32m1_t, vbool32_t, 32, VR, V_M1, f32, FPR32>; - def VF64M1: VTypeInfo<vfloat64m1_t, vbool64_t, 64, VR, V_M1, f64, FPR64>; + def VF16M1: VTypeInfo<vfloat16m1_t, vbool16_t, 16, V_M1, f16, FPR16>; + def VF32M1: VTypeInfo<vfloat32m1_t, vbool32_t, 32, V_M1, f32, FPR32>; + def VF64M1: VTypeInfo<vfloat64m1_t, vbool64_t, 64, V_M1, f64, FPR64>; } defset list<GroupVTypeInfo> GroupFloatVectors = { def VF16M2: GroupVTypeInfo<vfloat16m2_t, vfloat16m1_t, vbool8_t, 16, - VRM2, V_M2, f16, FPR16>; + V_M2, f16, FPR16>; def VF16M4: GroupVTypeInfo<vfloat16m4_t, vfloat16m1_t, vbool4_t, 16, - VRM4, V_M4, f16, FPR16>; + V_M4, f16, FPR16>; def VF16M8: GroupVTypeInfo<vfloat16m8_t, vfloat16m1_t, vbool2_t, 16, - VRM8, V_M8, f16, FPR16>; + V_M8, f16, FPR16>; def VF32M2: GroupVTypeInfo<vfloat32m2_t, vfloat32m1_t, vbool16_t, 32, - VRM2, V_M2, f32, FPR32>; + V_M2, f32, FPR32>; def VF32M4: GroupVTypeInfo<vfloat32m4_t, vfloat32m1_t, vbool8_t, 32, - VRM4, V_M4, f32, FPR32>; + V_M4, f32, FPR32>; def VF32M8: GroupVTypeInfo<vfloat32m8_t, vfloat32m1_t, vbool4_t, 32, - VRM8, V_M8, f32, FPR32>; + V_M8, f32, FPR32>; def VF64M2: GroupVTypeInfo<vfloat64m2_t, vfloat64m1_t, vbool32_t, 64, - VRM2, V_M2, f64, FPR64>; + V_M2, f64, FPR64>; def VF64M4: GroupVTypeInfo<vfloat64m4_t, vfloat64m1_t, vbool16_t, 64, - VRM4, V_M4, f64, FPR64>; + V_M4, f64, FPR64>; def VF64M8: GroupVTypeInfo<vfloat64m8_t, vfloat64m1_t, vbool8_t, 64, - VRM8, V_M8, f64, FPR64>; + V_M8, f64, FPR64>; } } } @@ -360,19 +360,19 @@ defset list<VTypeInfo> AllVectors = { defset list<VTypeInfo> AllBFloatVectors = { defset list<VTypeInfo> NoGroupBFloatVectors = { defset list<VTypeInfo> FractionalGroupBFloatVectors = { - def VBF16MF4: VTypeInfo<vbfloat16mf4_t, vbool64_t, 16, VR, V_MF4, bf16, FPR16>; - def VBF16MF2: VTypeInfo<vbfloat16mf2_t, vbool32_t, 16, VR, V_MF2, bf16, FPR16>; + def VBF16MF4: VTypeInfo<vbfloat16mf4_t, vbool64_t, 16, V_MF4, bf16, FPR16>; + def VBF16MF2: VTypeInfo<vbfloat16mf2_t, vbool32_t, 16, V_MF2, bf16, FPR16>; } - def VBF16M1: VTypeInfo<vbfloat16m1_t, vbool16_t, 16, VR, V_M1, bf16, FPR16>; + def VBF16M1: VTypeInfo<vbfloat16m1_t, vbool16_t, 16, V_M1, bf16, FPR16>; } defset list<GroupVTypeInfo> GroupBFloatVectors = { def VBF16M2: GroupVTypeInfo<vbfloat16m2_t, vbfloat16m1_t, vbool8_t, 16, - VRM2, V_M2, bf16, FPR16>; + V_M2, bf16, FPR16>; def VBF16M4: GroupVTypeInfo<vbfloat16m4_t, vbfloat16m1_t, vbool4_t, 16, - VRM4, V_M4, bf16, FPR16>; + V_M4, bf16, FPR16>; def VBF16M8: GroupVTypeInfo<vbfloat16m8_t, vbfloat16m1_t, vbool2_t, 16, - VRM8, V_M8, bf16, FPR16>; + V_M8, bf16, FPR16>; } } @@ -1069,7 +1069,8 @@ class VPseudoUnaryMask<VReg RetClass, class VPseudoUnaryMaskRoundingMode<VReg RetClass, VReg OpClass, - string Constraint = ""> : + string Constraint = "", + int TargetConstraintType = 1> : Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd), (ins GetVRegNoV0<RetClass>.R:$merge, OpClass:$rs2, VMaskOp:$vm, ixlenimm:$rm, @@ -1079,6 +1080,7 @@ class VPseudoUnaryMaskRoundingMode<VReg RetClass, let mayStore = 0; let hasSideEffects = 0; let Constraints = !interleave([Constraint, "$rd = $merge"], ","); + let TargetOverlapConstraintType = TargetConstraintType; let HasVLOp = 1; let HasSEWOp = 1; let HasVecPolicyOp = 1; @@ -1106,7 +1108,8 @@ class VPseudoUnaryMask_NoExcept<VReg RetClass, class VPseudoUnaryNoMask_FRM<VReg RetClass, VReg OpClass, - string Constraint = ""> : + string Constraint = "", + int TargetConstraintType = 1> : Pseudo<(outs RetClass:$rd), (ins RetClass:$merge, OpClass:$rs2, ixlenimm:$frm, AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>, @@ -1115,6 +1118,7 @@ class VPseudoUnaryNoMask_FRM<VReg RetClass, let mayStore = 0; let hasSideEffects = 0; let Constraints = !interleave([Constraint, "$rd = $merge"], ","); + let TargetOverlapConstraintType = TargetConstraintType; let HasVLOp = 1; let HasSEWOp = 1; let HasVecPolicyOp = 1; @@ -1123,7 +1127,8 @@ class VPseudoUnaryNoMask_FRM<VReg RetClass, class VPseudoUnaryMask_FRM<VReg RetClass, VReg OpClass, - string Constraint = ""> : + string Constraint = "", + int TargetConstraintType = 1> : Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd), (ins GetVRegNoV0<RetClass>.R:$merge, OpClass:$rs2, VMaskOp:$vm, ixlenimm:$frm, @@ -1133,6 +1138,7 @@ class VPseudoUnaryMask_FRM<VReg RetClass, let mayStore = 0; let hasSideEffects = 0; let Constraints = !interleave([Constraint, "$rd = $merge"], ","); + let TargetOverlapConstraintType = TargetConstraintType; let HasVLOp = 1; let HasSEWOp = 1; let HasVecPolicyOp = 1; @@ -1528,7 +1534,8 @@ class VPseudoTiedBinaryCarryIn<VReg RetClass, DAGOperand Op2Class, LMULInfo MInfo, bit CarryIn, - string Constraint> : + string Constraint, + int TargetConstraintType = 1> : Pseudo<(outs RetClass:$rd), !if(CarryIn, (ins RetClass:$merge, Op1Class:$rs2, Op2Class:$rs1, @@ -1540,6 +1547,7 @@ class VPseudoTiedBinaryCarryIn<VReg RetClass, let mayStore = 0; let hasSideEffects = 0; let Constraints = !interleave([Constraint, "$rd = $merge"], ","); + let TargetOverlapConstraintType = TargetConstraintType; let HasVLOp = 1; let HasSEWOp = 1; let HasVecPolicyOp = 0; @@ -2447,10 +2455,11 @@ multiclass VPseudoBinaryV_VM<LMULInfo m, bit CarryOut = 0, bit CarryIn = 1, m.vrclass, m.vrclass, m, CarryIn, Constraint, TargetConstraintType>; } -multiclass VPseudoTiedBinaryV_VM<LMULInfo m> { +multiclass VPseudoTiedBinaryV_VM<LMULInfo m, int TargetConstraintType = 1> { def "_VVM" # "_" # m.MX: VPseudoTiedBinaryCarryIn<GetVRegNoV0<m.vrclass>.R, - m.vrclass, m.vrclass, m, 1, "">; + m.vrclass, m.vrclass, m, 1, "", + TargetConstraintType>; } multiclass VPseudoBinaryV_XM<LMULInfo m, bit CarryOut = 0, bit CarryIn = 1, @@ -2462,10 +2471,11 @@ multiclass VPseudoBinaryV_XM<LMULInfo m, bit CarryOut = 0, bit CarryIn = 1, m.vrclass, GPR, m, CarryIn, Constraint, TargetConstraintType>; } -multiclass VPseudoTiedBinaryV_XM<LMULInfo m> { +multiclass VPseudoTiedBinaryV_XM<LMULInfo m, int TargetConstraintType = 1> { def "_VXM" # "_" # m.MX: VPseudoTiedBinaryCarryIn<GetVRegNoV0<m.vrclass>.R, - m.vrclass, GPR, m, 1, "">; + m.vrclass, GPR, m, 1, "", + TargetConstraintType>; } multiclass VPseudoVMRG_FM { @@ -2596,45 +2606,48 @@ multiclass VPseudoVRCP_V_RM { } } -multiclass PseudoVEXT_VF2<int TargetConstraintType = 1> { +multiclass PseudoVEXT_VF2 { defvar constraints = "@earlyclobber $rd"; foreach m = MxListVF2 in { defvar mx = m.MX; + defvar CurrTypeConstraints = !if(!or(!eq(mx, "MF4"), !eq(mx, "MF2"), !eq(mx, "M1")), 1, 3); let VLMul = m.value in { - def "_" # mx : VPseudoUnaryNoMask<m.vrclass, m.f2vrclass, constraints, TargetConstraintType>, + def "_" # mx : VPseudoUnaryNoMask<m.vrclass, m.f2vrclass, constraints, CurrTypeConstraints>, SchedUnary<"WriteVExtV", "ReadVExtV", mx, forceMergeOpRead=true>; def "_" # mx # "_MASK" : - VPseudoUnaryMask<m.vrclass, m.f2vrclass, constraints, TargetConstraintType>, + VPseudoUnaryMask<m.vrclass, m.f2vrclass, constraints, CurrTypeConstraints>, RISCVMaskedPseudo<MaskIdx=2>, SchedUnary<"WriteVExtV", "ReadVExtV", mx, forceMergeOpRead=true>; } } } -multiclass PseudoVEXT_VF4<int TargetConstraintType = 1> { +multiclass PseudoVEXT_VF4 { defvar constraints = "@earlyclobber $rd"; foreach m = MxListVF4 in { defvar mx = m.MX; + defvar CurrTypeConstraints = !if(!or(!eq(mx, "MF2"), !eq(mx, "M1"), !eq(mx, "M2")), 1, 3); let VLMul = m.value in { - def "_" # mx : VPseudoUnaryNoMask<m.vrclass, m.f4vrclass, constraints, TargetConstraintType>, + def "_" # mx : VPseudoUnaryNoMask<m.vrclass, m.f4vrclass, constraints, CurrTypeConstraints>, SchedUnary<"WriteVExtV", "ReadVExtV", mx, forceMergeOpRead=true>; def "_" # mx # "_MASK" : - VPseudoUnaryMask<m.vrclass, m.f4vrclass, constraints, TargetConstraintType>, + VPseudoUnaryMask<m.vrclass, m.f4vrclass, constraints, CurrTypeConstraints>, RISCVMaskedPseudo<MaskIdx=2>, SchedUnary<"WriteVExtV", "ReadVExtV", mx, forceMergeOpRead=true>; } } } -multiclass PseudoVEXT_VF8<int TargetConstraintType = 1> { +multiclass PseudoVEXT_VF8 { defvar constraints = "@earlyclobber $rd"; foreach m = MxListVF8 in { defvar mx = m.MX; + defvar CurrTypeConstraints = !if(!or(!eq(mx, "M1"), !eq(mx, "M2"), !eq(mx, "M4")), 1, 3); let VLMul = m.value in { - def "_" # mx : VPseudoUnaryNoMask<m.vrclass, m.f8vrclass, constraints, TargetConstraintType>, + def "_" # mx : VPseudoUnaryNoMask<m.vrclass, m.f8vrclass, constraints, CurrTypeConstraints>, SchedUnary<"WriteVExtV", "ReadVExtV", mx, forceMergeOpRead=true>; def "_" # mx # "_MASK" : - VPseudoUnaryMask<m.vrclass, m.f8vrclass, constraints, TargetConstraintType>, + VPseudoUnaryMask<m.vrclass, m.f8vrclass, constraints, CurrTypeConstraints>, RISCVMaskedPseudo<MaskIdx=2>, SchedUnary<"WriteVExtV", "ReadVExtV", mx, forceMergeOpRead=true>; } @@ -3619,7 +3632,7 @@ multiclass VPseudoConversionRoundingMode<VReg RetClass, let VLMul = MInfo.value in { def "_" # MInfo.MX : VPseudoUnaryNoMaskRoundingMode<RetClass, Op1Class, Constraint, TargetConstraintType>; def "_" # MInfo.MX # "_MASK" : VPseudoUnaryMaskRoundingMode<RetClass, Op1Class, - Constraint>, + Constraint, TargetConstraintType>, RISCVMaskedPseudo<MaskIdx=2>; } } @@ -3628,12 +3641,13 @@ multiclass VPseudoConversionRoundingMode<VReg RetClass, multiclass VPseudoConversionRM<VReg RetClass, VReg Op1Class, LMULInfo MInfo, - string Constraint = ""> { + string Constraint = "", + int TargetConstraintType = 1> { let VLMul = MInfo.value in { def "_" # MInfo.MX : VPseudoUnaryNoMask_FRM<RetClass, Op1Class, - Constraint>; + Constraint, TargetConstraintType>; def "_" # MInfo.MX # "_MASK" : VPseudoUnaryMask_FRM<RetClass, Op1Class, - Constraint>, + Constraint, TargetConstraintType>, RISCVMaskedPseudo<MaskIdx=2>; } } @@ -3761,7 +3775,7 @@ multiclass VPseudoVNCVTI_W_RM { multiclass VPseudoVNCVTI_RM_W { defvar constraint = "@earlyclobber $rd"; foreach m = MxListW in { - defm _W : VPseudoConversionRM<m.vrclass, m.wvrclass, m, constraint>, + defm _W : VPseudoConversionRM<m.vrclass, m.wvrclass, m, constraint, TargetConstraintType=2>, SchedUnary<"WriteVFNCvtFToIV", "ReadVFNCvtFToIV", m.MX, forceMergeOpRead=true>; } diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td index 33bdc3366aa3..5b50a4a78c01 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td @@ -2338,6 +2338,64 @@ defm : VPatBinaryVL_VV_VX_VI<riscv_uaddsat_vl, "PseudoVSADDU">; defm : VPatBinaryVL_VV_VX<riscv_ssubsat_vl, "PseudoVSSUB">; defm : VPatBinaryVL_VV_VX<riscv_usubsat_vl, "PseudoVSSUBU">; +// 12.5. Vector Narrowing Fixed-Point Clip Instructions +class VPatTruncSatClipMaxMinBase<string inst, + VTypeInfo vti, + VTypeInfo wti, + SDPatternOperator op1, + int op1_value, + SDPatternOperator op2, + int op2_value> : + Pat<(vti.Vector (riscv_trunc_vector_vl + (wti.Vector (op1 + (wti.Vector (op2 + (wti.Vector wti.RegClass:$rs1), + (wti.Vector (riscv_vmv_v_x_vl (wti.Vector undef), op2_value, (XLenVT srcvalue))), + (wti.Vector undef),(wti.Mask V0), VLOpFrag)), + (wti.Vector (riscv_vmv_v_x_vl (wti.Vector undef), op1_value, (XLenVT srcvalue))), + (wti.Vector undef), (wti.Mask V0), VLOpFrag)), + (vti.Mask V0), VLOpFrag)), + (!cast<Instruction>(inst#"_WI_"#vti.LMul.MX#"_MASK") + (vti.Vector (IMPLICIT_DEF)), wti.RegClass:$rs1, 0, + (vti.Mask V0), 0, GPR:$vl, vti.Log2SEW, TA_MA)>; + +class VPatTruncSatClipUMin<VTypeInfo vti, + VTypeInfo wti, + int uminval> : + Pat<(vti.Vector (riscv_trunc_vector_vl + (wti.Vector (riscv_umin_vl + (wti.Vector wti.RegClass:$rs1), + (wti.Vector (riscv_vmv_v_x_vl (wti.Vector undef), uminval, (XLenVT srcvalue))), + (wti.Vector undef), (wti.Mask V0), VLOpFrag)), + (vti.Mask V0), VLOpFrag)), + (!cast<Instruction>("PseudoVNCLIPU_WI_"#vti.LMul.MX#"_MASK") + (vti.Vector (IMPLICIT_DEF)), wti.RegClass:$rs1, 0, + (vti.Mask V0), 0, GPR:$vl, vti.Log2SEW, TA_MA)>; + +multiclass VPatTruncSatClipMaxMin<string inst, VTypeInfo vti, VTypeInfo wti, + SDPatternOperator max, int maxval, SDPatternOperator min, int minval> { + def : VPatTruncSatClipMaxMinBase<inst, vti, wti, max, maxval, min, minval>; + def : VPatTruncSatClipMaxMinBase<inst, vti, wti, min, minval, max, maxval>; +} + +multiclass VPatTruncSatClip<VTypeInfo vti, VTypeInfo wti> { + defvar sew = vti.SEW; + defvar uminval = !sub(!shl(1, sew), 1); + defvar sminval = !sub(!shl(1, !sub(sew, 1)), 1); + defvar smaxval = !sub(0, !shl(1, !sub(sew, 1))); + + let Predicates = !listconcat(GetVTypePredicates<vti>.Predicates, + GetVTypePredicates<wti>.Predicates) in { + defm : VPatTruncSatClipMaxMin<"PseudoVNCLIP", vti, wti, riscv_smin_vl, + sminval, riscv_smax_vl, smaxval>; + def : VPatTruncSatClipUMin<vti, wti, uminval>; + } + +} + +foreach vtiToWti = AllWidenableIntVectors in + defm : VPatTruncSatClip<vtiToWti.Vti, vtiToWti.Wti>; + // 13. Vector Floating-Point Instructions // 13.2. Vector Single-Width Floating-Point Add/Subtract Instructions diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td index 0b1d5b664df9..31f832dfd84c 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td @@ -349,20 +349,26 @@ multiclass VPseudoSiFiveVMACC<string mx, VReg vd_type, VReg vs2_type, : VPseudoTernaryNoMaskWithPolicy<vd_type, V_M1.vrclass, vs2_type, Constraint>; } -multiclass VPseudoSiFiveVQMACC<string Constraint = ""> { +multiclass VPseudoSiFiveVQMACCDOD<string Constraint = ""> { foreach m = MxListVF8 in let VLMul = m.value in defm NAME : VPseudoSiFiveVMACC<m.MX, m.vrclass, m.vrclass, Constraint>; } +multiclass VPseudoSiFiveVQMACCQOQ<string Constraint = ""> { + foreach m = [V_MF2, V_M1, V_M2, V_M4] in + let VLMul = m.value in + defm NAME : VPseudoSiFiveVMACC<m.MX, m.wvrclass, m.vrclass, Constraint>; +} + multiclass VPseudoSiFiveVFWMACC<string Constraint = ""> { - foreach m = MxListFW in + foreach m = MxListVF2 in let VLMul = m.value in defm NAME : VPseudoSiFiveVMACC<m.MX, m.wvrclass, m.vrclass, Constraint>; } multiclass VPseudoSiFiveVFNRCLIP<string Constraint = "@earlyclobber $rd"> { - foreach i = [0, 1, 2, 3, 4] in + foreach i = 0-4 in let hasSideEffects = 0 in defm "Pseudo" # NAME : VPseudoBinaryRoundingMode<MxListW[i].vrclass, MxListVF4[i].vrclass, @@ -400,17 +406,17 @@ let Predicates = [HasVendorXSfvcp] in { } let Predicates = [HasVendorXSfvqmaccdod] in { - defm VQMACCU_2x8x2 : VPseudoSiFiveVQMACC; - defm VQMACC_2x8x2 : VPseudoSiFiveVQMACC; - defm VQMACCUS_2x8x2 : VPseudoSiFiveVQMACC; - defm VQMACCSU_2x8x2 : VPseudoSiFiveVQMACC; + defm VQMACCU_2x8x2 : VPseudoSiFiveVQMACCDOD; + defm VQMACC_2x8x2 : VPseudoSiFiveVQMACCDOD; + defm VQMACCUS_2x8x2 : VPseudoSiFiveVQMACCDOD; + defm VQMACCSU_2x8x2 : VPseudoSiFiveVQMACCDOD; } let Predicates = [HasVendorXSfvqmaccqoq] in { - defm VQMACCU_4x8x4 : VPseudoSiFiveVQMACC; - defm VQMACC_4x8x4 : VPseudoSiFiveVQMACC; - defm VQMACCUS_4x8x4 : VPseudoSiFiveVQMACC; - defm VQMACCSU_4x8x4 : VPseudoSiFiveVQMACC; + defm VQMACCU_4x8x4 : VPseudoSiFiveVQMACCQOQ; + defm VQMACC_4x8x4 : VPseudoSiFiveVQMACCQOQ; + defm VQMACCUS_4x8x4 : VPseudoSiFiveVQMACCQOQ; + defm VQMACCSU_4x8x4 : VPseudoSiFiveVQMACCQOQ; } let Predicates = [HasVendorXSfvfwmaccqqq] in { @@ -566,16 +572,25 @@ multiclass VPatVMACC<string intrinsic, string instruction, string kind, } } -defset list<VTypeInfoToWide> VQMACCInfoPairs = { +defset list<VTypeInfoToWide> VQMACCDODInfoPairs = { def : VTypeInfoToWide<VI8M1, VI32M1>; def : VTypeInfoToWide<VI8M2, VI32M2>; def : VTypeInfoToWide<VI8M4, VI32M4>; def : VTypeInfoToWide<VI8M8, VI32M8>; } -multiclass VPatVQMACC<string intrinsic, string instruction, string kind> - : VPatVMACC<intrinsic, instruction, kind, VQMACCInfoPairs, vint8m1_t>; +defset list<VTypeInfoToWide> VQMACCQOQInfoPairs = { + def : VTypeInfoToWide<VI8MF2, VI32M1>; + def : VTypeInfoToWide<VI8M1, VI32M2>; + def : VTypeInfoToWide<VI8M2, VI32M4>; + def : VTypeInfoToWide<VI8M4, VI32M8>; +} + +multiclass VPatVQMACCDOD<string intrinsic, string instruction, string kind> + : VPatVMACC<intrinsic, instruction, kind, VQMACCDODInfoPairs, vint8m1_t>; +multiclass VPatVQMACCQOQ<string intrinsic, string instruction, string kind> + : VPatVMACC<intrinsic, instruction, kind, VQMACCQOQInfoPairs, vint8m1_t>; multiclass VPatVFWMACC<string intrinsic, string instruction, string kind> : VPatVMACC<intrinsic, instruction, kind, AllWidenableBFloatToFloatVectors, @@ -637,17 +652,17 @@ let Predicates = [HasVendorXSfvcp] in { } let Predicates = [HasVendorXSfvqmaccdod] in { - defm : VPatVQMACC<"vqmaccu_2x8x2", "VQMACCU", "2x8x2">; - defm : VPatVQMACC<"vqmacc_2x8x2", "VQMACC", "2x8x2">; - defm : VPatVQMACC<"vqmaccus_2x8x2", "VQMACCUS", "2x8x2">; - defm : VPatVQMACC<"vqmaccsu_2x8x2", "VQMACCSU", "2x8x2">; + defm : VPatVQMACCDOD<"vqmaccu_2x8x2", "VQMACCU", "2x8x2">; + defm : VPatVQMACCDOD<"vqmacc_2x8x2", "VQMACC", "2x8x2">; + defm : VPatVQMACCDOD<"vqmaccus_2x8x2", "VQMACCUS", "2x8x2">; + defm : VPatVQMACCDOD<"vqmaccsu_2x8x2", "VQMACCSU", "2x8x2">; } let Predicates = [HasVendorXSfvqmaccqoq] in { - defm : VPatVQMACC<"vqmaccu_4x8x4", "VQMACCU", "4x8x4">; - defm : VPatVQMACC<"vqmacc_4x8x4", "VQMACC", "4x8x4">; - defm : VPatVQMACC<"vqmaccus_4x8x4", "VQMACCUS", "4x8x4">; - defm : VPatVQMACC<"vqmaccsu_4x8x4", "VQMACCSU", "4x8x4">; + defm : VPatVQMACCQOQ<"vqmaccu_4x8x4", "VQMACCU", "4x8x4">; + defm : VPatVQMACCQOQ<"vqmacc_4x8x4", "VQMACC", "4x8x4">; + defm : VPatVQMACCQOQ<"vqmaccus_4x8x4", "VQMACCUS", "4x8x4">; + defm : VPatVQMACCQOQ<"vqmaccsu_4x8x4", "VQMACCSU", "4x8x4">; } let Predicates = [HasVendorXSfvfwmaccqqq] in { @@ -658,27 +673,3 @@ let Predicates = [HasVendorXSfvfnrclipxfqf] in { defm : VPatVFNRCLIP<"vfnrclip_xu_f_qf", "VFNRCLIP_XU_F_QF">; defm : VPatVFNRCLIP<"vfnrclip_x_f_qf", "VFNRCLIP_X_F_QF">; } - -let Predicates = [HasVendorXSfcie] in { -let hasSideEffects = 1, mayLoad = 0, mayStore = 0, DecoderNamespace = "XSfcie" in { -def SF_CFLUSH_D_L1 : RVInstI<0b000, OPC_SYSTEM, (outs), (ins GPR:$rs1), "cflush.d.l1","$rs1">, - Sched<[]> { - let rd = 0; - let imm12 = {0b1111,0b1100,0b0000}; -} - -def SF_CDISCARD_D_L1 : RVInstI<0b000, OPC_SYSTEM, (outs), (ins GPR:$rs1), "cdiscard.d.l1","$rs1">, - Sched<[]> { - let rd = 0; - let imm12 = {0b1111,0b1100,0b0010}; -} - -def SF_CEASE : RVInstI<0b000, OPC_SYSTEM, (outs), (ins), "cease","">, Sched<[]> { - let rs1 = 0; - let rd = 0; - let imm12 = {0b0011,0b0000,0b0101}; -} -} -def : InstAlias<"cflush.d.l1", (SF_CFLUSH_D_L1 X0)>; -def : InstAlias<"cdiscard.d.l1", (SF_CDISCARD_D_L1 X0)>; -} // Predicates = [HasVendorXScie] diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZc.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZc.td index a78f36244468..3506204d6c25 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoZc.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZc.td @@ -56,9 +56,8 @@ def rlist : Operand<OtherVT> { int64_t Imm; if (!MCOp.evaluateAsConstantImm(Imm)) return false; - if (!isUInt<4>(Imm)) return false; // 0~3 Reserved for EABI - return (Imm >= 4) && (Imm <= 15); + return isUInt<4>(Imm) && Imm >= 4; }]; } @@ -70,7 +69,7 @@ def spimm : Operand<OtherVT> { int64_t Imm; if (!MCOp.evaluateAsConstantImm(Imm)) return false; - return isShiftedUInt<5, 4>(Imm); + return isShiftedUInt<2, 4>(Imm); }]; } diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZcmop.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZcmop.td new file mode 100644 index 000000000000..6fbfde5ef488 --- /dev/null +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZcmop.td @@ -0,0 +1,34 @@ +//===-- RISCVInstrInfoZcmop.td -----------------------------*- tablegen -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file describes the RISC-V instructions from the standard Compressed +// May-Be-Operations Extension (Zcmop). +// This version is still experimental as the 'Zcmop' extension hasn't been +// ratified yet. It is based on v0.2 of the specification. +// +//===----------------------------------------------------------------------===// + +let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in +class CMOPInst<bits<3> imm3, string opcodestr> + : RVInst16CI<0b011, 0b01, (outs), (ins), opcodestr, ""> { + let Inst{6-2} = 0; + let Inst{7} = 1; + let Inst{10-8} = imm3; + let Inst{12-11} = 0; +} + +// CMOP1, CMOP5 is used by Zicfiss. +let Predicates = [HasStdExtZcmop, NoHasStdExtZicfiss] in { + def CMOP1 : CMOPInst<0, "cmop.1">, Sched<[]>; + def CMOP5 : CMOPInst<2, "cmop.5">, Sched<[]>; +} + +foreach n = [3, 7, 9, 11, 13, 15] in { + let Predicates = [HasStdExtZcmop] in + def CMOP # n : CMOPInst<!srl(n, 1), "cmop." # n>, Sched<[]>; +} diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZicfiss.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZicfiss.td new file mode 100644 index 000000000000..49a57f86cccd --- /dev/null +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZicfiss.td @@ -0,0 +1,72 @@ +//===------ RISCVInstrInfoZicfiss.td - RISC-V Zicfiss -*- tablegen -*------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Instruction class templates +//===----------------------------------------------------------------------===// + +class RVC_SSInst<bits<5> rs1val, RegisterClass reg_class, string opcodestr> : + RVInst16<(outs), (ins reg_class:$rs1), opcodestr, "$rs1", [], InstFormatOther> { + let Inst{15-13} = 0b011; + let Inst{12} = 0; + let Inst{11-7} = rs1val; + let Inst{6-2} = 0b00000; + let Inst{1-0} = 0b01; + let DecoderMethod = "decodeCSSPushPopchk"; +} + +//===----------------------------------------------------------------------===// +// Instructions +//===----------------------------------------------------------------------===// + +let Predicates = [HasStdExtZicfiss] in { +let Uses = [SSP], Defs = [SSP], hasSideEffects = 0, mayLoad = 1, mayStore = 0 in +def SSPOPCHK : RVInstI<0b100, OPC_SYSTEM, (outs), (ins GPRX1X5:$rs1), "sspopchk", + "$rs1"> { + let rd = 0; + let imm12 = 0b110011011100; +} // Uses = [SSP], Defs = [SSP], hasSideEffects = 0, mayLoad = 1, mayStore = 0 + +let Uses = [SSP], hasSideEffects = 0, mayLoad = 0, mayStore = 0 in { +def SSRDP : RVInstI<0b100, OPC_SYSTEM, (outs GPRNoX0:$rd), (ins), "ssrdp", "$rd"> { + let imm12 = 0b110011011100; + let rs1 = 0b00000; +} +} // Uses = [SSP], hasSideEffects = 0, mayLoad = 0, mayStore = 0 + +let Uses = [SSP], Defs = [SSP], hasSideEffects = 0, mayLoad = 0, mayStore = 1 in +def SSPUSH : RVInstR<0b1100111, 0b100, OPC_SYSTEM, (outs), (ins GPRX1X5:$rs2), + "sspush", "$rs2"> { + let rd = 0b00000; + let rs1 = 0b00000; +} +} // Predicates = [HasStdExtZicfiss] + +let Predicates = [HasStdExtZicfiss, HasStdExtZcmop], + DecoderNamespace = "Zicfiss" in { +let Uses = [SSP], Defs = [SSP], hasSideEffects = 0, mayLoad = 0, mayStore = 1 in +def C_SSPUSH : RVC_SSInst<0b00001, GPRX1, "c.sspush">; + +let Uses = [SSP], Defs = [SSP], hasSideEffects = 0, mayLoad = 1, mayStore = 0 in +def C_SSPOPCHK : RVC_SSInst<0b00101, GPRX5, "c.sspopchk">; +} // Predicates = [HasStdExtZicfiss, HasStdExtZcmop] + +let Predicates = [HasStdExtZicfiss] in +defm SSAMOSWAP_W : AMO_rr_aq_rl<0b01001, 0b010, "ssamoswap.w">; + +let Predicates = [HasStdExtZicfiss, IsRV64] in +defm SSAMOSWAP_D : AMO_rr_aq_rl<0b01001, 0b011, "ssamoswap.d">; + +//===----------------------------------------------------------------------===/ +// Compress Instruction tablegen backend. +//===----------------------------------------------------------------------===// + +let Predicates = [HasStdExtZicfiss, HasStdExtZcmop] in { +def : CompressPat<(SSPUSH X1), (C_SSPUSH X1)>; +def : CompressPat<(SSPOPCHK X5), (C_SSPOPCHK X5)>; +} // Predicates = [HasStdExtZicfiss, HasStdExtZcmop] diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZimop.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZimop.td new file mode 100644 index 000000000000..1e8c70046c63 --- /dev/null +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZimop.td @@ -0,0 +1,59 @@ +//===-- RISCVInstrInfoZimop.td -----------------------------*- tablegen -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file describes the RISC-V instructions from the standard +// May-Be-Operations Extension (Zimop). +// This version is still experimental as the 'Zimop' extension hasn't been +// ratified yet. It is based on v0.1 of the specification. +// +//===----------------------------------------------------------------------===// + +class RVInstIMopr<bits<7> imm7, bits<5> imm5, bits<3> funct3, RISCVOpcode opcode, + dag outs, dag ins, string opcodestr, string argstr> + : RVInstIBase<funct3, opcode, outs, ins, opcodestr, argstr> { + let Inst{31} = imm7{6}; + let Inst{30} = imm5{4}; + let Inst{29-28} = imm7{5-4}; + let Inst{27-26} = imm5{3-2}; + let Inst{25-22} = imm7{3-0}; + let Inst{21-20} = imm5{1-0}; +} + +class RVInstRMoprr<bits<4> imm4, bits<3> imm3, bits<3> funct3, RISCVOpcode opcode, + dag outs, dag ins, string opcodestr, string argstr> + : RVInstRBase<funct3, opcode, outs, ins, opcodestr, argstr> { + let Inst{31} = imm4{3}; + let Inst{30} = imm3{2}; + let Inst{29-28} = imm4{2-1}; + let Inst{27-26} = imm3{1-0}; + let Inst{25} = imm4{0}; +} + +let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in +class RVMopr<bits<7> imm7, bits<5> imm5, bits<3> funct3, + RISCVOpcode opcode, string opcodestr> + : RVInstIMopr<imm7, imm5, funct3, opcode, (outs GPR:$rd), (ins GPR:$rs1), + opcodestr, "$rd, $rs1">; + +let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in +class RVMoprr<bits<4> imm4, bits<3> imm3, bits<3> funct3, + RISCVOpcode opcode, string opcodestr> + : RVInstRMoprr<imm4, imm3, funct3, opcode, (outs GPR:$rd), (ins GPR:$rs1, GPR:$rs2), + opcodestr, "$rd, $rs1, $rs2">; + +foreach i = 0...31 in { + let Predicates = [HasStdExtZimop] in + def MOPR#i : RVMopr<0b1000111, i, 0b100, OPC_SYSTEM, "mop.r."#i>, + Sched<[]>; +} + +foreach i = 0...7 in { + let Predicates = [HasStdExtZimop] in + def MOPRR#i : RVMoprr<0b1001, i, 0b100, OPC_SYSTEM, "mop.rr."#i>, + Sched<[]>; +} diff --git a/llvm/lib/Target/RISCV/RISCVProcessors.td b/llvm/lib/Target/RISCV/RISCVProcessors.td index 6362a3bef6f2..ba8996e710ed 100644 --- a/llvm/lib/Target/RISCV/RISCVProcessors.td +++ b/llvm/lib/Target/RISCV/RISCVProcessors.td @@ -174,8 +174,7 @@ def SIFIVE_S76 : RISCVProcessorModel<"sifive-s76", FeatureStdExtF, FeatureStdExtD, FeatureStdExtC, - FeatureStdExtZihintpause, - FeatureVendorXSfcie], + FeatureStdExtZihintpause], [TuneSiFive7]>; def SIFIVE_U54 : RISCVProcessorModel<"sifive-u54", diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp index a3c19115bd31..24f8d600f1ea 100644 --- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp @@ -127,6 +127,9 @@ BitVector RISCVRegisterInfo::getReservedRegs(const MachineFunction &MF) const { markSuperRegs(Reserved, RISCV::X27); } + // Shadow stack pointer. + markSuperRegs(Reserved, RISCV::SSP); + assert(checkAllSuperRegsMarked(Reserved)); return Reserved; } diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.td b/llvm/lib/Target/RISCV/RISCVRegisterInfo.td index c59c9b294d79..840fd149d681 100644 --- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.td +++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.td @@ -137,6 +137,8 @@ def GPR : GPRRegisterClass<(add (sequence "X%u", 10, 17), (sequence "X%u", 0, 4))>; def GPRX0 : GPRRegisterClass<(add X0)>; +def GPRX1 : GPRRegisterClass<(add X1)>; +def GPRX5 : GPRRegisterClass<(add X5)>; def GPRNoX0 : GPRRegisterClass<(sub GPR, X0)>; @@ -165,6 +167,8 @@ def SP : GPRRegisterClass<(add X2)>; def SR07 : GPRRegisterClass<(add (sequence "X%u", 8, 9), (sequence "X%u", 18, 23))>; +def GPRX1X5 : GPRRegisterClass<(add X1, X5)>; + // Floating point registers let RegAltNameIndices = [ABIRegAltName] in { def F0_H : RISCVReg16<0, "f0", ["ft0"]>, DwarfRegNum<[32]>; @@ -591,3 +595,6 @@ foreach m = LMULList in { // Special registers def FFLAGS : RISCVReg<0, "fflags">; def FRM : RISCVReg<0, "frm">; + +// Shadow Stack register +def SSP : RISCVReg<0, "ssp">; diff --git a/llvm/lib/Target/RISCV/RISCVSystemOperands.td b/llvm/lib/Target/RISCV/RISCVSystemOperands.td index 953df7b15e2f..43475e825b46 100644 --- a/llvm/lib/Target/RISCV/RISCVSystemOperands.td +++ b/llvm/lib/Target/RISCV/RISCVSystemOperands.td @@ -19,9 +19,11 @@ include "llvm/TableGen/SearchableTable.td" class SysReg<string name, bits<12> op> { string Name = name; - // A maximum of one deprecated name is supported right now. It generates a - // diagnostic when the name is used to encourage software to migrate away from - // the name. + // A maximum of one alias is supported right now. + string AltName = name; + // A maximum of one deprecated name is supported right now. Unlike the + // `AltName` alias, a `DeprecatedName` generates a diagnostic when the name is + // used to encourage software to migrate away from the name. string DeprecatedName = ""; bits<12> Encoding = op; // FIXME: add these additional fields when needed. @@ -41,7 +43,7 @@ def SysRegsList : GenericTable { let FilterClass = "SysReg"; // FIXME: add "ReadWrite", "Mode", "Extra", "Number" fields when needed. let Fields = [ - "Name", "DeprecatedName", "Encoding", "FeaturesRequired", + "Name", "AltName", "DeprecatedName", "Encoding", "FeaturesRequired", "isRV32Only", ]; @@ -54,32 +56,13 @@ def lookupSysRegByName : SearchIndex { let Key = [ "Name" ]; } -def lookupSysRegByDeprecatedName : SearchIndex { +def lookupSysRegByAltName : SearchIndex { let Table = SysRegsList; - let Key = [ "DeprecatedName" ]; -} - -class SiFiveReg<string name, bits<12> op> : SysReg<name, op>; - -def SiFiveRegsList : GenericTable { - let FilterClass = "SiFiveReg"; - // FIXME: add "ReadWrite", "Mode", "Extra", "Number" fields when needed. - let Fields = [ - "Name", "DeprecatedName", "Encoding", "FeaturesRequired", - "isRV32Only", - ]; - - let PrimaryKey = [ "Encoding" ]; - let PrimaryKeyName = "lookupSiFiveRegByEncoding"; + let Key = [ "AltName" ]; } -def lookupSiFiveRegByName : SearchIndex { - let Table = SiFiveRegsList; - let Key = [ "Name" ]; -} - -def lookupSiFiveRegByDeprecatedName : SearchIndex { - let Table = SiFiveRegsList; +def lookupSysRegByDeprecatedName : SearchIndex { + let Table = SysRegsList; let Key = [ "DeprecatedName" ]; } @@ -309,7 +292,7 @@ foreach i = 3...31 in //===----------------------------------------------------------------------===// // Machine Counter Setup //===----------------------------------------------------------------------===// -let DeprecatedName = "mucounteren" in // Privileged spec v1.9.1 Name +let AltName = "mucounteren" in // Privileged spec v1.9.1 Name def : SysReg<"mcountinhibit", 0x320>; // mhpmevent3-mhpmevent31 at 0x323-0x33F. @@ -323,20 +306,6 @@ foreach i = 3...31 in { } //===----------------------------------------------------------------------===// -// SiFive Custom Machine Mode Registers -//===----------------------------------------------------------------------===// - -let FeaturesRequired = [{ {RISCV::FeatureVendorXSfcie} }] in { -def : SiFiveReg<"mnscratch", 0x350>; -def : SiFiveReg<"mnepc", 0x351>; -def : SiFiveReg<"mncause", 0x352>; -def : SiFiveReg<"mnstatus", 0x353>; -def : SiFiveReg<"mbpm", 0x7C0>; -def : SiFiveReg<"mfd", 0x7C1>; -def : SiFiveReg<"mpd", 0x7C8>; -} - -//===----------------------------------------------------------------------===// // Debug/ Trace Registers (shared with Debug Mode) //===----------------------------------------------------------------------===// def : SysReg<"tselect", 0x7A0>; @@ -353,7 +322,7 @@ def : SysReg<"dpc", 0x7B1>; // "dscratch" is an alternative name for "dscratch0" which appeared in earlier // drafts of the RISC-V debug spec -let DeprecatedName = "dscratch" in +let AltName = "dscratch" in def : SysReg<"dscratch0", 0x7B2>; def : SysReg<"dscratch1", 0x7B3>; diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h index 96ecc771863e..4c955744b37d 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h @@ -359,7 +359,8 @@ public: const TargetTransformInfo::LSRCost &C2); bool shouldFoldTerminatingConditionAfterLSR() const { - return true; + // FIXME: Enabling this causes miscompiles. + return false; } }; diff --git a/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp b/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp index 3a34a0bfae46..6c009b9e8dde 100644 --- a/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp @@ -959,8 +959,7 @@ SPIRVType *SPIRVGlobalRegistry::getOrCreateSPIRVTypeByName( // N is the number of elements of the vector. Type *Ty; - if (TypeStr.starts_with("atomic_")) - TypeStr = TypeStr.substr(strlen("atomic_")); + TypeStr.consume_front("atomic_"); if (TypeStr.starts_with("void")) { Ty = Type::getVoidTy(Ctx); @@ -1007,8 +1006,7 @@ SPIRVType *SPIRVGlobalRegistry::getOrCreateSPIRVTypeByName( // Handle "typeN*" or "type vector[N]*". bool IsPtrToVec = TypeStr.consume_back("*"); - if (TypeStr.starts_with(" vector[")) { - TypeStr = TypeStr.substr(strlen(" vector[")); + if (TypeStr.consume_front(" vector[")) { TypeStr = TypeStr.substr(0, TypeStr.find(']')); } TypeStr.getAsInteger(10, VecElts); diff --git a/llvm/lib/Target/Sparc/SparcISelLowering.cpp b/llvm/lib/Target/Sparc/SparcISelLowering.cpp index 4f0801479211..78bdf3ae9a84 100644 --- a/llvm/lib/Target/Sparc/SparcISelLowering.cpp +++ b/llvm/lib/Target/Sparc/SparcISelLowering.cpp @@ -2050,7 +2050,7 @@ static void LookThroughSetCC(SDValue &LHS, SDValue &RHS, LHS.getOperand(3).getOpcode() == SPISD::CMPFCC_V9))) && isOneConstant(LHS.getOperand(0)) && isNullConstant(LHS.getOperand(1))) { SDValue CMPCC = LHS.getOperand(3); - SPCC = cast<ConstantSDNode>(LHS.getOperand(2))->getZExtValue(); + SPCC = LHS.getConstantOperandVal(2); LHS = CMPCC.getOperand(0); RHS = CMPCC.getOperand(1); } @@ -3186,7 +3186,7 @@ static SDValue LowerATOMIC_LOAD_STORE(SDValue Op, SelectionDAG &DAG) { SDValue SparcTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const { - unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); + unsigned IntNo = Op.getConstantOperandVal(0); SDLoc dl(Op); switch (IntNo) { default: return SDValue(); // Don't custom lower most intrinsics. diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp index 559f2ca476d7..045c4c0aac07 100644 --- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -2186,7 +2186,7 @@ SystemZTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, // the mask of valid CC values if so. static bool isIntrinsicWithCCAndChain(SDValue Op, unsigned &Opcode, unsigned &CCValid) { - unsigned Id = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); + unsigned Id = Op.getConstantOperandVal(1); switch (Id) { case Intrinsic::s390_tbegin: Opcode = SystemZISD::TBEGIN; @@ -2212,7 +2212,7 @@ static bool isIntrinsicWithCCAndChain(SDValue Op, unsigned &Opcode, // CC value as its final argument. Provide the associated SystemZISD // opcode and the mask of valid CC values if so. static bool isIntrinsicWithCC(SDValue Op, unsigned &Opcode, unsigned &CCValid) { - unsigned Id = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); + unsigned Id = Op.getConstantOperandVal(0); switch (Id) { case Intrinsic::s390_vpkshs: case Intrinsic::s390_vpksfs: @@ -2600,10 +2600,9 @@ static bool shouldSwapCmpOperands(const Comparison &C) { return true; if (C.ICmpType != SystemZICMP::SignedOnly && Opcode0 == ISD::ZERO_EXTEND) return true; - if (C.ICmpType != SystemZICMP::SignedOnly && - Opcode0 == ISD::AND && + if (C.ICmpType != SystemZICMP::SignedOnly && Opcode0 == ISD::AND && C.Op0.getOperand(1).getOpcode() == ISD::Constant && - cast<ConstantSDNode>(C.Op0.getOperand(1))->getZExtValue() == 0xffffffff) + C.Op0.getConstantOperandVal(1) == 0xffffffff) return true; return false; @@ -3429,11 +3428,9 @@ SDValue SystemZTargetLowering::lowerBR_CC(SDValue Op, SelectionDAG &DAG) const { static bool isAbsolute(SDValue CmpOp, SDValue Pos, SDValue Neg) { return (Neg.getOpcode() == ISD::SUB && Neg.getOperand(0).getOpcode() == ISD::Constant && - cast<ConstantSDNode>(Neg.getOperand(0))->getZExtValue() == 0 && - Neg.getOperand(1) == Pos && - (Pos == CmpOp || - (Pos.getOpcode() == ISD::SIGN_EXTEND && - Pos.getOperand(0) == CmpOp))); + Neg.getConstantOperandVal(0) == 0 && Neg.getOperand(1) == Pos && + (Pos == CmpOp || (Pos.getOpcode() == ISD::SIGN_EXTEND && + Pos.getOperand(0) == CmpOp))); } // Return the absolute or negative absolute of Op; IsNegative decides which. @@ -3740,7 +3737,7 @@ SDValue SystemZTargetLowering::lowerFRAMEADDR(SDValue Op, MFI.setFrameAddressIsTaken(true); SDLoc DL(Op); - unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); + unsigned Depth = Op.getConstantOperandVal(0); EVT PtrVT = getPointerTy(DAG.getDataLayout()); // By definition, the frame address is the address of the back chain. (In @@ -3776,7 +3773,7 @@ SDValue SystemZTargetLowering::lowerRETURNADDR(SDValue Op, return SDValue(); SDLoc DL(Op); - unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); + unsigned Depth = Op.getConstantOperandVal(0); EVT PtrVT = getPointerTy(DAG.getDataLayout()); if (Depth > 0) { @@ -4226,7 +4223,7 @@ SDValue SystemZTargetLowering::lowerOR(SDValue Op, SelectionDAG &DAG) const { if (HighOp.getOpcode() == ISD::AND && HighOp.getOperand(1).getOpcode() == ISD::Constant) { SDValue HighOp0 = HighOp.getOperand(0); - uint64_t Mask = cast<ConstantSDNode>(HighOp.getOperand(1))->getZExtValue(); + uint64_t Mask = HighOp.getConstantOperandVal(1); if (DAG.MaskedValueIsZero(HighOp0, APInt(64, ~(Mask | 0xffffffff)))) HighOp = HighOp0; } @@ -4485,10 +4482,10 @@ SDValue SystemZTargetLowering::lowerCTPOP(SDValue Op, SDValue SystemZTargetLowering::lowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); - AtomicOrdering FenceOrdering = static_cast<AtomicOrdering>( - cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue()); - SyncScope::ID FenceSSID = static_cast<SyncScope::ID>( - cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue()); + AtomicOrdering FenceOrdering = + static_cast<AtomicOrdering>(Op.getConstantOperandVal(1)); + SyncScope::ID FenceSSID = + static_cast<SyncScope::ID>(Op.getConstantOperandVal(2)); // The only fence that needs an instruction is a sequentially-consistent // cross-thread fence. @@ -4773,13 +4770,13 @@ SDValue SystemZTargetLowering::lowerSTACKRESTORE(SDValue Op, SDValue SystemZTargetLowering::lowerPREFETCH(SDValue Op, SelectionDAG &DAG) const { - bool IsData = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue(); + bool IsData = Op.getConstantOperandVal(4); if (!IsData) // Just preserve the chain. return Op.getOperand(0); SDLoc DL(Op); - bool IsWrite = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue(); + bool IsWrite = Op.getConstantOperandVal(2); unsigned Code = IsWrite ? SystemZ::PFD_WRITE : SystemZ::PFD_READ; auto *Node = cast<MemIntrinsicSDNode>(Op.getNode()); SDValue Ops[] = {Op.getOperand(0), DAG.getTargetConstant(Code, DL, MVT::i32), @@ -4825,7 +4822,7 @@ SystemZTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, SDValue(Node, 0), getCCResult(DAG, SDValue(Node, 1))); } - unsigned Id = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); + unsigned Id = Op.getConstantOperandVal(0); switch (Id) { case Intrinsic::thread_pointer: return lowerThreadPointer(SDLoc(Op), DAG); @@ -5628,7 +5625,7 @@ static SDValue tryBuildVectorShuffle(SelectionDAG &DAG, Op = Op.getOperand(0); if (Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT && Op.getOperand(1).getOpcode() == ISD::Constant) { - unsigned Elem = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); + unsigned Elem = Op.getConstantOperandVal(1); if (!GS.add(Op.getOperand(0), Elem)) return SDValue(); FoundOne = true; @@ -6727,8 +6724,7 @@ SDValue SystemZTargetLowering::combineLOAD( int Index = 1; if (User->getOpcode() == ISD::SRL && User->getOperand(1).getOpcode() == ISD::Constant && - cast<ConstantSDNode>(User->getOperand(1))->getZExtValue() == 64 && - User->hasOneUse()) { + User->getConstantOperandVal(1) == 64 && User->hasOneUse()) { User = *User->use_begin(); Index = 0; } @@ -6857,7 +6853,7 @@ static bool isMovedFromParts(SDValue Val, SDValue &LoPart, SDValue &HiPart) { std::swap(Op0, Op1); if (Op1.getOpcode() != ISD::SHL || !Op1.getNode()->hasOneUse() || Op1.getOperand(1).getOpcode() != ISD::Constant || - cast<ConstantSDNode>(Op1.getOperand(1))->getZExtValue() != 64) + Op1.getConstantOperandVal(1) != 64) return false; Op1 = Op1.getOperand(0); @@ -7149,20 +7145,18 @@ SDValue SystemZTargetLowering::combineFP_ROUND( unsigned OpNo = N->isStrictFPOpcode() ? 1 : 0; SelectionDAG &DAG = DCI.DAG; SDValue Op0 = N->getOperand(OpNo); - if (N->getValueType(0) == MVT::f32 && - Op0.hasOneUse() && + if (N->getValueType(0) == MVT::f32 && Op0.hasOneUse() && Op0.getOpcode() == ISD::EXTRACT_VECTOR_ELT && Op0.getOperand(0).getValueType() == MVT::v2f64 && Op0.getOperand(1).getOpcode() == ISD::Constant && - cast<ConstantSDNode>(Op0.getOperand(1))->getZExtValue() == 0) { + Op0.getConstantOperandVal(1) == 0) { SDValue Vec = Op0.getOperand(0); for (auto *U : Vec->uses()) { - if (U != Op0.getNode() && - U->hasOneUse() && + if (U != Op0.getNode() && U->hasOneUse() && U->getOpcode() == ISD::EXTRACT_VECTOR_ELT && U->getOperand(0) == Vec && U->getOperand(1).getOpcode() == ISD::Constant && - cast<ConstantSDNode>(U->getOperand(1))->getZExtValue() == 1) { + U->getConstantOperandVal(1) == 1) { SDValue OtherRound = SDValue(*U->use_begin(), 0); if (OtherRound.getOpcode() == N->getOpcode() && OtherRound.getOperand(OpNo) == SDValue(U, 0) && @@ -7215,20 +7209,18 @@ SDValue SystemZTargetLowering::combineFP_EXTEND( unsigned OpNo = N->isStrictFPOpcode() ? 1 : 0; SelectionDAG &DAG = DCI.DAG; SDValue Op0 = N->getOperand(OpNo); - if (N->getValueType(0) == MVT::f64 && - Op0.hasOneUse() && + if (N->getValueType(0) == MVT::f64 && Op0.hasOneUse() && Op0.getOpcode() == ISD::EXTRACT_VECTOR_ELT && Op0.getOperand(0).getValueType() == MVT::v4f32 && Op0.getOperand(1).getOpcode() == ISD::Constant && - cast<ConstantSDNode>(Op0.getOperand(1))->getZExtValue() == 0) { + Op0.getConstantOperandVal(1) == 0) { SDValue Vec = Op0.getOperand(0); for (auto *U : Vec->uses()) { - if (U != Op0.getNode() && - U->hasOneUse() && + if (U != Op0.getNode() && U->hasOneUse() && U->getOpcode() == ISD::EXTRACT_VECTOR_ELT && U->getOperand(0) == Vec && U->getOperand(1).getOpcode() == ISD::Constant && - cast<ConstantSDNode>(U->getOperand(1))->getZExtValue() == 2) { + U->getConstantOperandVal(1) == 2) { SDValue OtherExtend = SDValue(*U->use_begin(), 0); if (OtherExtend.getOpcode() == N->getOpcode() && OtherExtend.getOperand(OpNo) == SDValue(U, 0) && @@ -7605,7 +7597,7 @@ SDValue SystemZTargetLowering::combineINTRINSIC( SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; - unsigned Id = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); + unsigned Id = N->getConstantOperandVal(1); switch (Id) { // VECTOR LOAD (RIGHTMOST) WITH LENGTH with a length operand of 15 // or larger is simply a vector load. @@ -7679,7 +7671,7 @@ static APInt getDemandedSrcElements(SDValue Op, const APInt &DemandedElts, APInt SrcDemE; unsigned Opcode = Op.getOpcode(); if (Opcode == ISD::INTRINSIC_WO_CHAIN) { - unsigned Id = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); + unsigned Id = Op.getConstantOperandVal(0); switch (Id) { case Intrinsic::s390_vpksh: // PACKS case Intrinsic::s390_vpksf: @@ -7723,7 +7715,7 @@ static APInt getDemandedSrcElements(SDValue Op, const APInt &DemandedElts, SrcDemE = APInt(NumElts, 0); if (!DemandedElts[OpNo - 1]) break; - unsigned Mask = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue(); + unsigned Mask = Op.getConstantOperandVal(3); unsigned MaskBit = ((OpNo - 1) ? 1 : 4); // Demand input element 0 or 1, given by the mask bit value. SrcDemE.setBit((Mask & MaskBit)? 1 : 0); @@ -7732,7 +7724,7 @@ static APInt getDemandedSrcElements(SDValue Op, const APInt &DemandedElts, case Intrinsic::s390_vsldb: { // VECTOR SHIFT LEFT DOUBLE BY BYTE assert(VT == MVT::v16i8 && "Unexpected type."); - unsigned FirstIdx = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue(); + unsigned FirstIdx = Op.getConstantOperandVal(3); assert (FirstIdx > 0 && FirstIdx < 16 && "Unused operand."); unsigned NumSrc0Els = 16 - FirstIdx; SrcDemE = APInt(NumElts, 0); @@ -7808,7 +7800,7 @@ SystemZTargetLowering::computeKnownBitsForTargetNode(const SDValue Op, unsigned Opcode = Op.getOpcode(); if (Opcode == ISD::INTRINSIC_WO_CHAIN) { bool IsLogical = false; - unsigned Id = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); + unsigned Id = Op.getConstantOperandVal(0); switch (Id) { case Intrinsic::s390_vpksh: // PACKS case Intrinsic::s390_vpksf: @@ -7908,7 +7900,7 @@ SystemZTargetLowering::ComputeNumSignBitsForTargetNode( return 1; unsigned Opcode = Op.getOpcode(); if (Opcode == ISD::INTRINSIC_WO_CHAIN) { - unsigned Id = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); + unsigned Id = Op.getConstantOperandVal(0); switch (Id) { case Intrinsic::s390_vpksh: // PACKS case Intrinsic::s390_vpksf: diff --git a/llvm/lib/Target/SystemZ/SystemZOperators.td b/llvm/lib/Target/SystemZ/SystemZOperators.td index af6cf340f8a3..d98bb886c185 100644 --- a/llvm/lib/Target/SystemZ/SystemZOperators.td +++ b/llvm/lib/Target/SystemZ/SystemZOperators.td @@ -507,11 +507,11 @@ def z_subcarry : PatFrag<(ops node:$lhs, node:$rhs), // Signed and unsigned comparisons. def z_scmp : PatFrag<(ops node:$a, node:$b), (z_icmp node:$a, node:$b, timm), [{ - unsigned Type = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue(); + unsigned Type = N->getConstantOperandVal(2); return Type != SystemZICMP::UnsignedOnly; }]>; def z_ucmp : PatFrag<(ops node:$a, node:$b), (z_icmp node:$a, node:$b, timm), [{ - unsigned Type = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue(); + unsigned Type = N->getConstantOperandVal(2); return Type != SystemZICMP::SignedOnly; }]>; diff --git a/llvm/lib/Target/VE/VEISelLowering.cpp b/llvm/lib/Target/VE/VEISelLowering.cpp index 0267aefd1e91..0e41a2d7aa03 100644 --- a/llvm/lib/Target/VE/VEISelLowering.cpp +++ b/llvm/lib/Target/VE/VEISelLowering.cpp @@ -1101,10 +1101,10 @@ Instruction *VETargetLowering::emitTrailingFence(IRBuilderBase &Builder, SDValue VETargetLowering::lowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); - AtomicOrdering FenceOrdering = static_cast<AtomicOrdering>( - cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue()); - SyncScope::ID FenceSSID = static_cast<SyncScope::ID>( - cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue()); + AtomicOrdering FenceOrdering = + static_cast<AtomicOrdering>(Op.getConstantOperandVal(1)); + SyncScope::ID FenceSSID = + static_cast<SyncScope::ID>(Op.getConstantOperandVal(2)); // VE uses Release consistency, so need a fence instruction if it is a // cross-thread fence. @@ -1766,7 +1766,7 @@ static SDValue lowerRETURNADDR(SDValue Op, SelectionDAG &DAG, SDValue VETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); - unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); + unsigned IntNo = Op.getConstantOperandVal(0); switch (IntNo) { default: // Don't custom lower most intrinsics. return SDValue(); @@ -2937,8 +2937,8 @@ static bool isI32Insn(const SDNode *User, const SDNode *N) { if (User->getOperand(1).getNode() != N && User->getOperand(2).getNode() != N && isa<ConstantSDNode>(User->getOperand(3))) { - VECC::CondCode VECCVal = static_cast<VECC::CondCode>( - cast<ConstantSDNode>(User->getOperand(3))->getZExtValue()); + VECC::CondCode VECCVal = + static_cast<VECC::CondCode>(User->getConstantOperandVal(3)); return isIntVECondCode(VECCVal); } [[fallthrough]]; diff --git a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp index bc5f562d9589..051f6caa8c04 100644 --- a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -108,6 +108,8 @@ class X86AsmParser : public MCTargetAsmParser { // Does this instruction use apx extended register? bool UseApxExtendedReg = false; + // Is this instruction explicitly required not to update flags? + bool ForcedNoFlag = false; private: SMLoc consumeToken() { @@ -2312,8 +2314,7 @@ bool X86AsmParser::ParseIntelDotOperator(IntelExprStateMachine &SM, // Drop the optional '.'. StringRef DotDispStr = Tok.getString(); - if (DotDispStr.starts_with(".")) - DotDispStr = DotDispStr.drop_front(1); + DotDispStr.consume_front("."); StringRef TrailingDot; // .Imm gets lexed as a real. @@ -3126,6 +3127,7 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, ForcedVEXEncoding = VEXEncoding_Default; ForcedDispEncoding = DispEncoding_Default; UseApxExtendedReg = false; + ForcedNoFlag = false; // Parse pseudo prefixes. while (true) { @@ -3150,6 +3152,8 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, ForcedDispEncoding = DispEncoding_Disp8; else if (Prefix == "disp32") ForcedDispEncoding = DispEncoding_Disp32; + else if (Prefix == "nf") + ForcedNoFlag = true; else return Error(NameLoc, "unknown prefix"); @@ -3997,6 +4001,8 @@ unsigned X86AsmParser::checkTargetMatchPredicate(MCInst &Inst) { if (UseApxExtendedReg && !X86II::canUseApxExtendedReg(MCID)) return Match_Unsupported; + if (ForcedNoFlag != !!(MCID.TSFlags & X86II::EVEX_NF)) + return Match_Unsupported; if (ForcedVEXEncoding == VEXEncoding_EVEX && (MCID.TSFlags & X86II::EncodingMask) != X86II::EVEX) diff --git a/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp b/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp index 59e2008f5632..347dc0d4ed43 100644 --- a/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp +++ b/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp @@ -1169,7 +1169,11 @@ static int getInstructionID(struct InternalInstruction *insn, attrMask |= ATTR_EVEXKZ; if (bFromEVEX4of4(insn->vectorExtensionPrefix[3])) attrMask |= ATTR_EVEXB; - if (aaaFromEVEX4of4(insn->vectorExtensionPrefix[3])) + // nf bit is the MSB of aaa + if (nfFromEVEX4of4(insn->vectorExtensionPrefix[3]) && + insn->opcodeType == MAP4) + attrMask |= ATTR_EVEXNF; + else if (aaaFromEVEX4of4(insn->vectorExtensionPrefix[3])) attrMask |= ATTR_EVEXK; if (lFromEVEX4of4(insn->vectorExtensionPrefix[3])) attrMask |= ATTR_VEXL; diff --git a/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h b/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h index decc45091941..4c7b1c094522 100644 --- a/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h +++ b/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h @@ -103,6 +103,7 @@ namespace X86Disassembler { #define bFromEVEX4of4(evex) bitFromOffset4(evex) #define v2FromEVEX4of4(evex) invertedBitFromOffset3(evex) #define aaaFromEVEX4of4(evex) threeBitsFromOffset0(evex) +#define nfFromEVEX4of4(evex) bitFromOffset2(evex) // These enums represent Intel registers for use by the decoder. #define REGS_8BIT \ diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h b/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h index b0fcaef5f4b0..e006dd877360 100644 --- a/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h +++ b/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h @@ -870,7 +870,10 @@ enum : uint64_t { ExplicitVEXPrefix = 2ULL << ExplicitOpPrefixShift, /// For instructions that are promoted to EVEX space for EGPR. ExplicitEVEXPrefix = 3ULL << ExplicitOpPrefixShift, - ExplicitOpPrefixMask = 3ULL << ExplicitOpPrefixShift + ExplicitOpPrefixMask = 3ULL << ExplicitOpPrefixShift, + /// EVEX_NF - Set if this instruction has EVEX.NF field set. + EVEX_NFShift = ExplicitOpPrefixShift + 2, + EVEX_NF = 1ULL << EVEX_NFShift }; /// \returns true if the instruction with given opcode is a prefix. @@ -992,6 +995,12 @@ inline unsigned getOperandBias(const MCInstrDesc &Desc) { } } +/// \returns true if the instruction has a NDD (new data destination). +inline bool hasNewDataDest(uint64_t TSFlags) { + return (TSFlags & X86II::OpMapMask) == X86II::T_MAP4 && + (TSFlags & X86II::EVEX_B) && (TSFlags & X86II::VEX_4V); +} + /// \returns operand # for the first field of the memory operand or -1 if no /// memory operands. /// NOTE: This ignores tied operands. If there is a tied register which is @@ -1018,7 +1027,7 @@ inline int getMemoryOperandNo(uint64_t TSFlags) { return -1; case X86II::MRMDestMem: case X86II::MRMDestMemFSIB: - return 0; + return hasNewDataDest(TSFlags); case X86II::MRMSrcMem: case X86II::MRMSrcMemFSIB: // Start from 1, skip any registers encoded in VEX_VVVV or I8IMM, or a diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp index cab2f0a2e1c1..1947313a9dfb 100644 --- a/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp +++ b/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp @@ -369,6 +369,9 @@ void X86InstPrinterCommon::printInstFlags(const MCInst *MI, raw_ostream &O, else if (Flags & X86::IP_HAS_REPEAT) O << "\trep\t"; + if (TSFlags & X86II::EVEX_NF) + O << "\t{nf}"; + // These all require a pseudo prefix if ((Flags & X86::IP_USE_VEX) || (TSFlags & X86II::ExplicitOpPrefixMask) == X86II::ExplicitVEXPrefix) diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp index 9e1f1eb97e70..924956295e7c 100644 --- a/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp +++ b/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp @@ -251,6 +251,7 @@ public: void setAAA(const MCInst &MI, unsigned OpNum) { EVEX_aaa = getRegEncoding(MI, OpNum); } + void setNF(bool V) { EVEX_aaa |= V << 2; } X86OpcodePrefixHelper(const MCRegisterInfo &MRI) : W(0), R(0), X(0), B(0), M(0), R2(0), X2(0), B2(0), VEX_4V(0), VEX_L(0), @@ -987,9 +988,11 @@ X86MCCodeEmitter::emitVEXOpcodePrefix(int MemOperand, const MCInst &MI, } Prefix.setW(TSFlags & X86II::REX_W); + Prefix.setNF(TSFlags & X86II::EVEX_NF); bool HasEVEX_K = TSFlags & X86II::EVEX_K; bool HasVEX_4V = TSFlags & X86II::VEX_4V; + bool IsND = X86II::hasNewDataDest(TSFlags); // IsND implies HasVEX_4V bool HasEVEX_RC = TSFlags & X86II::EVEX_RC; switch (TSFlags & X86II::OpMapMask) { @@ -1049,6 +1052,7 @@ X86MCCodeEmitter::emitVEXOpcodePrefix(int MemOperand, const MCInst &MI, bool EncodeRC = false; uint8_t EVEX_rc = 0; + unsigned CurOp = X86II::getOperandBias(Desc); switch (TSFlags & X86II::FormMask) { @@ -1073,16 +1077,21 @@ X86MCCodeEmitter::emitVEXOpcodePrefix(int MemOperand, const MCInst &MI, // MemAddr, src1(VEX_4V), src2(ModR/M) // MemAddr, src1(ModR/M), imm8 // + // NDD: + // dst(VEX_4V), MemAddr, src1(ModR/M) Prefix.setBB2(MI, MemOperand + X86::AddrBaseReg); Prefix.setXX2(MI, MemOperand + X86::AddrIndexReg); Prefix.setV2(MI, MemOperand + X86::AddrIndexReg, HasVEX_4V); + if (IsND) + Prefix.set4VV2(MI, CurOp++); + CurOp += X86::AddrNumOperands; if (HasEVEX_K) Prefix.setAAA(MI, CurOp++); - if (HasVEX_4V) + if (!IsND && HasVEX_4V) Prefix.set4VV2(MI, CurOp++); Prefix.setRR2(MI, CurOp++); @@ -1098,12 +1107,18 @@ X86MCCodeEmitter::emitVEXOpcodePrefix(int MemOperand, const MCInst &MI, // // FMA4: // dst(ModR/M.reg), src1(VEX_4V), src2(ModR/M), src3(Imm[7:4]) + // + // NDD: + // dst(VEX_4V), src1(ModR/M), MemAddr + if (IsND) + Prefix.set4VV2(MI, CurOp++); + Prefix.setRR2(MI, CurOp++); if (HasEVEX_K) Prefix.setAAA(MI, CurOp++); - if (HasVEX_4V) + if (!IsND && HasVEX_4V) Prefix.set4VV2(MI, CurOp++); Prefix.setBB2(MI, MemOperand + X86::AddrBaseReg); @@ -1160,12 +1175,17 @@ X86MCCodeEmitter::emitVEXOpcodePrefix(int MemOperand, const MCInst &MI, // // FMA4: // dst(ModR/M.reg), src1(VEX_4V), src2(Imm[7:4]), src3(ModR/M), + // + // NDD: + // dst(VEX_4V), src1(ModR/M.reg), src2(ModR/M) + if (IsND) + Prefix.set4VV2(MI, CurOp++); Prefix.setRR2(MI, CurOp++); if (HasEVEX_K) Prefix.setAAA(MI, CurOp++); - if (HasVEX_4V) + if (!IsND && HasVEX_4V) Prefix.set4VV2(MI, CurOp++); Prefix.setBB2(MI, CurOp); @@ -1209,6 +1229,11 @@ X86MCCodeEmitter::emitVEXOpcodePrefix(int MemOperand, const MCInst &MI, // dst(ModR/M), src(ModR/M) // dst(ModR/M), src(ModR/M), imm8 // dst(ModR/M), src1(VEX_4V), src2(ModR/M) + // + // NDD: + // dst(VEX_4V), src1(ModR/M), src2(ModR/M) + if (IsND) + Prefix.set4VV2(MI, CurOp++); Prefix.setBB2(MI, CurOp); Prefix.setX(MI, CurOp, 4); ++CurOp; @@ -1216,7 +1241,7 @@ X86MCCodeEmitter::emitVEXOpcodePrefix(int MemOperand, const MCInst &MI, if (HasEVEX_K) Prefix.setAAA(MI, CurOp++); - if (HasVEX_4V) + if (!IsND && HasVEX_4V) Prefix.set4VV2(MI, CurOp++); Prefix.setRR2(MI, CurOp++); @@ -1508,6 +1533,8 @@ void X86MCCodeEmitter::encodeInstruction(const MCInst &MI, unsigned OpcodeOffset = 0; + bool IsND = X86II::hasNewDataDest(TSFlags); + uint64_t Form = TSFlags & X86II::FormMask; switch (Form) { default: @@ -1576,6 +1603,8 @@ void X86MCCodeEmitter::encodeInstruction(const MCInst &MI, if (HasVEX_4V) // Skip 1st src (which is encoded in VEX_VVVV) ++SrcRegNum; + if (IsND) // Skip the NDD operand encoded in EVEX_VVVV + ++CurOp; emitRegModRMByte(MI.getOperand(CurOp), getX86RegNum(MI.getOperand(SrcRegNum)), CB); @@ -1602,6 +1631,9 @@ void X86MCCodeEmitter::encodeInstruction(const MCInst &MI, if (HasVEX_4V) // Skip 1st src (which is encoded in VEX_VVVV) ++SrcRegNum; + if (IsND) // Skip new data destination + ++CurOp; + bool ForceSIB = (Form == X86II::MRMDestMemFSIB); emitMemModRMByte(MI, CurOp, getX86RegNum(MI.getOperand(SrcRegNum)), TSFlags, Kind, StartByte, CB, Fixups, STI, ForceSIB); @@ -1669,6 +1701,9 @@ void X86MCCodeEmitter::encodeInstruction(const MCInst &MI, case X86II::MRMSrcMem: { unsigned FirstMemOp = CurOp + 1; + if (IsND) // Skip new data destination + CurOp++; + if (HasEVEX_K) // Skip writemask ++FirstMemOp; diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td index 5fd6828f4312..e89ddcc570c9 100644 --- a/llvm/lib/Target/X86/X86.td +++ b/llvm/lib/Target/X86/X86.td @@ -1256,11 +1256,6 @@ def ProcessorFeatures { list<SubtargetFeature> SRFFeatures = !listconcat(ADLFeatures, SRFAdditionalFeatures); - // Grandridge - list<SubtargetFeature> GRRAdditionalFeatures = [FeatureRAOINT]; - list<SubtargetFeature> GRRFeatures = - !listconcat(SRFFeatures, GRRAdditionalFeatures); - // Arrowlake S list<SubtargetFeature> ARLSAdditionalFeatures = [FeatureAVXVNNIINT16, FeatureSHA512, @@ -1706,10 +1701,10 @@ foreach P = ["goldmont_plus", "goldmont-plus"] in { } def : ProcModel<"tremont", SLMModel, ProcessorFeatures.TRMFeatures, ProcessorFeatures.TRMTuning>; -def : ProcModel<"sierraforest", AlderlakePModel, ProcessorFeatures.SRFFeatures, - ProcessorFeatures.TRMTuning>; -def : ProcModel<"grandridge", AlderlakePModel, ProcessorFeatures.GRRFeatures, +foreach P = ["sierraforest", "grandridge"] in { + def : ProcModel<P, AlderlakePModel, ProcessorFeatures.SRFFeatures, ProcessorFeatures.TRMTuning>; +} // "Arrandale" along with corei3 and corei5 foreach P = ["nehalem", "corei7", "core_i7_sse4_2"] in { diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp index 77a997588c4f..73b10cf3067e 100644 --- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -487,7 +487,7 @@ namespace { // from PatFrags in tablegen. bool isUnneededShiftMask(SDNode *N, unsigned Width) const { assert(N->getOpcode() == ISD::AND && "Unexpected opcode"); - const APInt &Val = cast<ConstantSDNode>(N->getOperand(1))->getAPIntValue(); + const APInt &Val = N->getConstantOperandAPInt(1); if (Val.countr_one() >= Width) return true; @@ -5233,7 +5233,7 @@ void X86DAGToDAGISel::Select(SDNode *Node) { break; case X86ISD::VPTERNLOG: { - uint8_t Imm = cast<ConstantSDNode>(Node->getOperand(3))->getZExtValue(); + uint8_t Imm = Node->getConstantOperandVal(3); if (matchVPTERNLOG(Node, Node, Node, Node, Node->getOperand(0), Node->getOperand(1), Node->getOperand(2), Imm)) return; diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 63bdf24d6b4f..1e4b1361f98a 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -2267,6 +2267,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::FDIV, VT, Expand); setOperationAction(ISD::BUILD_VECTOR, VT, Custom); setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); + setOperationAction(ISD::INSERT_SUBVECTOR, VT, Legal); + setOperationAction(ISD::CONCAT_VECTORS, VT, Custom); } setOperationAction(ISD::FP_ROUND, MVT::v8bf16, Custom); addLegalFPImmediate(APFloat::getZero(APFloat::BFloat())); @@ -2282,6 +2284,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::BUILD_VECTOR, MVT::v32bf16, Custom); setOperationAction(ISD::FP_ROUND, MVT::v16bf16, Custom); setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v32bf16, Custom); + setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v32bf16, Legal); + setOperationAction(ISD::CONCAT_VECTORS, MVT::v32bf16, Custom); } if (!Subtarget.useSoftFloat() && Subtarget.hasVLX()) { @@ -3737,9 +3741,11 @@ static SDValue getZeroVector(MVT VT, const X86Subtarget &Subtarget, // type. This ensures they get CSE'd. But if the integer type is not // available, use a floating-point +0.0 instead. SDValue Vec; + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); if (!Subtarget.hasSSE2() && VT.is128BitVector()) { Vec = DAG.getConstantFP(+0.0, dl, MVT::v4f32); - } else if (VT.isFloatingPoint()) { + } else if (VT.isFloatingPoint() && + TLI.isTypeLegal(VT.getVectorElementType())) { Vec = DAG.getConstantFP(+0.0, dl, VT); } else if (VT.getVectorElementType() == MVT::i1) { assert((Subtarget.hasBWI() || VT.getVectorNumElements() <= 16) && @@ -31752,7 +31758,7 @@ static SDValue LowerCVTPS2PH(SDValue Op, SelectionDAG &DAG) { static SDValue LowerPREFETCH(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG &DAG) { - unsigned IsData = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue(); + unsigned IsData = Op.getConstantOperandVal(4); // We don't support non-data prefetch without PREFETCHI. // Just preserve the chain. diff --git a/llvm/lib/Target/X86/X86InsertPrefetch.cpp b/llvm/lib/Target/X86/X86InsertPrefetch.cpp index 6c23928228d2..9aa70dff5f93 100644 --- a/llvm/lib/Target/X86/X86InsertPrefetch.cpp +++ b/llvm/lib/Target/X86/X86InsertPrefetch.cpp @@ -135,8 +135,7 @@ bool X86InsertPrefetch::findPrefetchInfo(const FunctionSamples *TopSamples, int64_t D = static_cast<int64_t>(S_V.second); unsigned IID = 0; for (const auto &HintType : HintTypes) { - if (Name.starts_with(HintType.first)) { - Name = Name.drop_front(HintType.first.size()); + if (Name.consume_front(HintType.first)) { IID = HintType.second; break; } diff --git a/llvm/lib/Target/X86/X86InstrAMX.td b/llvm/lib/Target/X86/X86InstrAMX.td index 7f3e193d9a1b..c47bee070e04 100644 --- a/llvm/lib/Target/X86/X86InstrAMX.td +++ b/llvm/lib/Target/X86/X86InstrAMX.td @@ -14,35 +14,45 @@ //===----------------------------------------------------------------------===// // AMX instructions -let Predicates = [HasAMXTILE, In64BitMode] in { - let SchedRW = [WriteSystem] in { - let hasSideEffects = 1, - Defs = [TMM0,TMM1,TMM2,TMM3,TMM4,TMM5,TMM6,TMM7] in - def LDTILECFG : I <0x49, MRM0m, (outs), (ins opaquemem:$src), - "ldtilecfg\t$src", - [(int_x86_ldtilecfg addr:$src)]>, VEX, T8; - let hasSideEffects = 1 in - def STTILECFG : I <0x49, MRM0m, (outs), (ins opaquemem:$src), - "sttilecfg\t$src", - [(int_x86_sttilecfg addr:$src)]>, VEX, T8, PD; - let mayLoad = 1 in - def TILELOADD : I<0x4b, MRMSrcMemFSIB, (outs TILE:$dst), - (ins sibmem:$src), - "tileloadd\t{$src, $dst|$dst, $src}", []>, - VEX, T8, XD; - let mayLoad = 1 in - def TILELOADDT1 : I<0x4b, MRMSrcMemFSIB, (outs TILE:$dst), - (ins sibmem:$src), - "tileloaddt1\t{$src, $dst|$dst, $src}", []>, - VEX, T8, PD; +multiclass AMX_TILE_COMMON<string Suffix, Predicate HasEGPR> { +let Predicates = [HasAMXTILE, HasEGPR, In64BitMode] in { + let hasSideEffects = 1, + Defs = [TMM0,TMM1,TMM2,TMM3,TMM4,TMM5,TMM6,TMM7] in + def LDTILECFG#Suffix : I<0x49, MRM0m, (outs), (ins opaquemem:$src), + "ldtilecfg\t$src", + [(int_x86_ldtilecfg addr:$src)]>, + T8, PS; + let hasSideEffects = 1 in + def STTILECFG#Suffix : I<0x49, MRM0m, (outs), (ins opaquemem:$src), + "sttilecfg\t$src", + [(int_x86_sttilecfg addr:$src)]>, + T8, PD; + let mayLoad = 1 in + def TILELOADD#Suffix : I<0x4b, MRMSrcMemFSIB, (outs TILE:$dst), + (ins sibmem:$src), + "tileloadd\t{$src, $dst|$dst, $src}", []>, + T8, XD; + let mayLoad = 1 in + def TILELOADDT1#Suffix : I<0x4b, MRMSrcMemFSIB, (outs TILE:$dst), + (ins sibmem:$src), + "tileloaddt1\t{$src, $dst|$dst, $src}", []>, + T8, PD; + let mayStore = 1 in + def TILESTORED#Suffix : I<0x4b, MRMDestMemFSIB, (outs), + (ins sibmem:$dst, TILE:$src), + "tilestored\t{$src, $dst|$dst, $src}", []>, + T8, XS; +} +} + +let SchedRW = [WriteSystem] in { + defm "" : AMX_TILE_COMMON<"", NoEGPR>, VEX; + defm "" : AMX_TILE_COMMON<"_EVEX", HasEGPR>, EVEX, NoCD8; + + let Predicates = [HasAMXTILE, In64BitMode] in { let Defs = [TMM0,TMM1,TMM2,TMM3,TMM4,TMM5,TMM6,TMM7] in def TILERELEASE : I<0x49, MRM_C0, (outs), (ins), - "tilerelease", [(int_x86_tilerelease)]>, VEX, T8; - let mayStore = 1 in - def TILESTORED : I<0x4b, MRMDestMemFSIB, (outs), - (ins sibmem:$dst, TILE:$src), - "tilestored\t{$src, $dst|$dst, $src}", []>, - VEX, T8, XS; + "tilerelease", [(int_x86_tilerelease)]>, VEX, T8, PS; def TILEZERO : I<0x49, MRMr0, (outs TILE:$dst), (ins), "tilezero\t$dst", []>, VEX, T8, XD; @@ -82,8 +92,8 @@ let Predicates = [HasAMXTILE, In64BitMode] in { def PTILEZERO : PseudoI<(outs), (ins u8imm:$src), [(int_x86_tilezero timm:$src)]>; } - } // SchedRW -} // HasAMXTILE + } // Predicates +} // SchedRW let Predicates = [HasAMXINT8, In64BitMode] in { let SchedRW = [WriteSystem] in { diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index 7c3c1d5fe42b..c3a673f97d34 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -1447,6 +1447,17 @@ def : Pat<(vselect_mask VK8WM:$mask, (VBROADCASTI32X4Z256rmk VR256X:$src0, VK8WM:$mask, addr:$src)>; } +let Predicates = [HasBF16] in { + def : Pat<(v32bf16 (X86SubVBroadcastld256 addr:$src)), + (VBROADCASTF64X4rm addr:$src)>; + def : Pat<(v32bf16 (X86SubVBroadcastld128 addr:$src)), + (VBROADCASTF32X4rm addr:$src)>; +} + +let Predicates = [HasBF16, HasVLX] in + def : Pat<(v16bf16 (X86SubVBroadcastld128 addr:$src)), + (VBROADCASTF32X4Z256rm addr:$src)>; + let Predicates = [HasVLX, HasDQI] in { defm VBROADCASTI64X2Z128 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2", X86SubVBroadcastld128, v4i64x_info, v2i64x_info>, VEX_W1X, diff --git a/llvm/lib/Target/X86/X86InstrArithmetic.td b/llvm/lib/Target/X86/X86InstrArithmetic.td index 936db48bb9df..6b0c1b8c28c9 100644 --- a/llvm/lib/Target/X86/X86InstrArithmetic.td +++ b/llvm/lib/Target/X86/X86InstrArithmetic.td @@ -44,591 +44,298 @@ def PLEA32r : PseudoI<(outs GR32:$dst), (ins anymem:$src), []>; def PLEA64r : PseudoI<(outs GR64:$dst), (ins anymem:$src), []>; } -// BinOpRR - Instructions that read "reg, reg". -class BinOpRR<bits<8> o, string m, X86TypeInfo t, dag out, list<dag> p> - : ITy<o, MRMDestReg, t, out, (ins t.RegClass:$src1, t.RegClass:$src2), m, - binop_args, p>, Sched<[WriteALU]>; -// BinOpRR_F - Instructions that read "reg, reg" and write EFLAGS only. -class BinOpRR_F<bits<8> o, string m, X86TypeInfo t, SDPatternOperator node> - : BinOpRR<o, m, t, (outs), - [(set EFLAGS, (node t.RegClass:$src1, t.RegClass:$src2))]>, - DefEFLAGS; -// BinOpRR_F_Rev - Reversed encoding of BinOpRR_F -class BinOpRR_F_Rev<bits<8> o, string m, X86TypeInfo t> - : BinOpRR_F<o, m, t, null_frag>, DisassembleOnly { - let Form = MRMSrcReg; -} -// BinOpRR_RF - Instructions that read "reg, reg", and write "reg", EFLAGS. -class BinOpRR_RF<bits<8> o, string m, X86TypeInfo t, SDPatternOperator node> - : BinOpRR<o, m, t, (outs t.RegClass:$dst), - [(set t.RegClass:$dst, EFLAGS, - (node t.RegClass:$src1, t.RegClass:$src2))]>, DefEFLAGS; -// BinOpRR_RF_Rev - Reversed encoding of BinOpRR_RF. -class BinOpRR_RF_Rev<bits<8> o, string m, X86TypeInfo t> - : BinOpRR_RF<o, m, t, null_frag>, DisassembleOnly { - let Form = MRMSrcReg; -} -// BinOpRRF_RF - Instructions that read "reg, reg", write "reg" and read/write -// EFLAGS. -class BinOpRRF_RF<bits<8> o, string m, X86TypeInfo t, SDPatternOperator node> - : BinOpRR<o, m, t, (outs t.RegClass:$dst), - [(set t.RegClass:$dst, EFLAGS, - (node t.RegClass:$src1, t.RegClass:$src2, - EFLAGS))]>, DefEFLAGS, UseEFLAGS { - let SchedRW = [WriteADC]; -} -// BinOpRRF_RF_Rev - Reversed encoding of BinOpRRF_RF -class BinOpRRF_RF_Rev<bits<8> o, string m, X86TypeInfo t> - : BinOpRRF_RF<o, m, t, null_frag>, DisassembleOnly { - let Form = MRMSrcReg; -} - -// BinOpRM - Instructions that read "reg, [mem]". -class BinOpRM<bits<8> o, string m, X86TypeInfo t, dag out, list<dag> p> - : ITy<o, MRMSrcMem, t, out, (ins t.RegClass:$src1, t.MemOperand:$src2), m, - binop_args, p>, - Sched<[WriteALU.Folded, WriteALU.ReadAfterFold]> { - let mayLoad = 1; -} -// BinOpRM_F - Instructions that read "reg, [mem]" and write EFLAGS only. -class BinOpRM_F<bits<8> o, string m, X86TypeInfo t, SDNode node> - : BinOpRM<o, m, t, (outs), - [(set EFLAGS, (node t.RegClass:$src1, - (t.LoadNode addr:$src2)))]>, DefEFLAGS; -// BinOpRM_RF - Instructions that read "reg, reg", and write "reg", EFLAGS. -class BinOpRM_RF<bits<8> o, string m, X86TypeInfo t, SDPatternOperator node> - : BinOpRM<o, m, t, (outs t.RegClass:$dst), - [(set t.RegClass:$dst, EFLAGS, (node t.RegClass:$src1, - (t.LoadNode addr:$src2)))]>, DefEFLAGS; -// BinOpRMF_RF - Instructions that read "reg, [mem]", write "reg" and read/write -// EFLAGS. -class BinOpRMF_RF<bits<8> o, string m, X86TypeInfo t, SDPatternOperator node> - : BinOpRM<o, m, t, (outs t.RegClass:$dst), - [(set t.RegClass:$dst, EFLAGS, - (node t.RegClass:$src1, (t.LoadNode addr:$src2), EFLAGS))]>, - DefEFLAGS, UseEFLAGS { - let SchedRW = [WriteADC.Folded, WriteADC.ReadAfterFold, - // base, scale, index, offset, segment. - ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault, - // implicit register read. - WriteADC.ReadAfterFold]; -} - -// BinOpRI - Instructions that read "reg, imm". -class BinOpRI<bits<8> o, string m, X86TypeInfo t, Format f, dag out, list<dag> p> - : ITy<o, f, t, out, (ins t.RegClass:$src1, t.ImmOperand:$src2), m, - binop_args, p>, Sched<[WriteALU]> { - let ImmT = t.ImmEncoding; -} -// BinOpRI_F - Instructions that read "reg, imm" and write EFLAGS only. -class BinOpRI_F<bits<8> o, string m, X86TypeInfo t, SDPatternOperator node, - Format f> - : BinOpRI<o, m, t, f, (outs), - [(set EFLAGS, (node t.RegClass:$src1, - t.ImmOperator:$src2))]>, DefEFLAGS; -// BinOpRI_RF - Instructions that read "reg, imm" and write "reg", EFLAGS. -class BinOpRI_RF<bits<8> o, string m, X86TypeInfo t, SDNode node, Format f> - : BinOpRI<o, m, t, f, (outs t.RegClass:$dst), - [(set t.RegClass:$dst, EFLAGS, - (node t.RegClass:$src1, t.ImmOperator:$src2))]>, DefEFLAGS; -// BinOpRIF_RF - Instructions that read "reg, imm", write "reg" and read/write -// EFLAGS. -class BinOpRIF_RF<bits<8> o, string m, X86TypeInfo t, SDNode node, Format f> - : BinOpRI<o, m, t, f, (outs t.RegClass:$dst), - [(set t.RegClass:$dst, EFLAGS, - (node t.RegClass:$src1, t.ImmOperator:$src2, - EFLAGS))]>, DefEFLAGS, UseEFLAGS { - let SchedRW = [WriteADC]; -} -// BinOpRI8 - Instructions that read "reg, imm8". -class BinOpRI8<bits<8> o, string m, X86TypeInfo t, Format f, dag out> - : ITy<o, f, t, out, (ins t.RegClass:$src1, t.Imm8Operand:$src2), m, - binop_args, []>, Sched<[WriteALU]> { - let ImmT = Imm8; -} -// BinOpRI8_F - Instructions that read "reg, imm8" and write EFLAGS only. -class BinOpRI8_F<bits<8> o, string m, X86TypeInfo t, Format f> - : BinOpRI8<o, m, t, f, (outs)>, DefEFLAGS; -// BinOpRI8_RF - Instructions that read "reg, imm8" and write "reg", EFLAGS. -class BinOpRI8_RF<bits<8> o, string m, X86TypeInfo t, Format f> - : BinOpRI8<o, m, t, f, (outs t.RegClass:$dst)>, DefEFLAGS; -// BinOpRI8F_RF - Instructions that read "reg, imm", write "reg" and read/write -// EFLAGS. -class BinOpRI8F_RF<bits<8> o, string m, X86TypeInfo t, Format f> - : BinOpRI8<o, m, t, f, (outs t.RegClass:$dst)>, DefEFLAGS, UseEFLAGS { - let SchedRW = [WriteADC]; -} - -// BinOpMR - Instructions that read "[mem], reg". -class BinOpMR<bits<8> o, string m, X86TypeInfo t, list<dag> p> - : ITy<o, MRMDestMem, t, (outs), (ins t.MemOperand:$src1, t.RegClass:$src2), m, - binop_args, p> { - let mayLoad = 1; -} -// BinOpMR_F - Instructions that read "[mem], imm8" and write EFLAGS only. -class BinOpMR_F<bits<8> o, string m, X86TypeInfo t, SDPatternOperator node> - : BinOpMR<o, m, t, - [(set EFLAGS, (node (t.LoadNode addr:$src1), t.RegClass:$src2))]>, - Sched<[WriteALU.Folded, ReadDefault, ReadDefault, ReadDefault, - ReadDefault, ReadDefault, WriteALU.ReadAfterFold]>, DefEFLAGS; -// BinOpMR_MF - Instructions that read "[mem], reg" and write "[mem]", EFLAGS. -class BinOpMR_MF<bits<8> o, string m, X86TypeInfo t, SDNode node> - : BinOpMR<o, m, t, - [(store (node (load addr:$src1), t.RegClass:$src2), addr:$src1), - (implicit EFLAGS)]>, - Sched<[WriteALURMW, - // base, scale, index, offset, segment - ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault, - WriteALU.ReadAfterFold]>, // reg - DefEFLAGS { - let mayStore = 1; -} -// BinOpMRF_MF - Instructions that read "[mem], reg", write "[mem]" and -// read/write EFLAGS. -class BinOpMRF_MF<bits<8> o, string m, X86TypeInfo t, SDNode node> - : BinOpMR<o, m, t, - [(store (node (load addr:$src1), t.RegClass:$src2, EFLAGS), - addr:$src1), (implicit EFLAGS)]>, - Sched<[WriteADCRMW, - // base, scale, index, offset, segment - ReadDefault, ReadDefault, ReadDefault, - ReadDefault, ReadDefault, - WriteALU.ReadAfterFold, // reg - WriteALU.ReadAfterFold]>, // EFLAGS - DefEFLAGS, UseEFLAGS { - let mayStore = 1; -} - -// BinOpMI - Instructions that read "[mem], imm". -class BinOpMI<bits<8> o, string m, X86TypeInfo t, Format f, list<dag> p> - : ITy<o, f, t, (outs), (ins t.MemOperand:$src1, t.ImmOperand:$src2), m, - binop_args, p> { - let ImmT = t.ImmEncoding; - let mayLoad = 1; -} -// BinOpMI_F - Instructions that read "[mem], imm" and write EFLAGS only. -class BinOpMI_F<bits<8> o, string m, X86TypeInfo t, SDPatternOperator node, - Format f> - : BinOpMI<o, m, t, f, - [(set EFLAGS, (node (t.LoadNode addr:$src1), t.ImmOperator:$src2))]>, - Sched<[WriteALU.Folded]>, DefEFLAGS; -// BinOpMI_MF - Instructions that read "[mem], imm" and write "[mem]", EFLAGS. -class BinOpMI_MF<bits<8> o, string m, X86TypeInfo t, SDNode node, Format f> - : BinOpMI<o, m, t, f, - [(store (node (t.VT (load addr:$src1)), - t.ImmOperator:$src2), addr:$src1), (implicit EFLAGS)]>, - Sched<[WriteALURMW]>, DefEFLAGS { - let mayStore = 1; -} -// BinOpMIF_MF - Instructions that read "[mem], imm", write "[mem]" and -// read/write EFLAGS. -class BinOpMIF_MF<bits<8> o, string m, X86TypeInfo t, SDNode node, Format f> - : BinOpMI<o, m, t, f, - [(store (node (t.VT (load addr:$src1)), - t.ImmOperator:$src2, EFLAGS), addr:$src1), (implicit EFLAGS)]>, - Sched<[WriteADCRMW]>, DefEFLAGS, UseEFLAGS { - let mayStore = 1; -} - -// BinOpMI8 - Instructions that read "[mem], imm8". -class BinOpMI8<string m, X86TypeInfo t, Format f> - : ITy<0x83, f, t, (outs), (ins t.MemOperand:$src1, t.Imm8Operand:$src2), m, - binop_args, []> { - let ImmT = Imm8; - let mayLoad = 1; -} -// BinOpMI8_F - Instructions that read "[mem], imm8" and write EFLAGS only. -class BinOpMI8_F<string m, X86TypeInfo t, Format f> - : BinOpMI8<m, t, f>, Sched<[WriteALU.Folded]>, DefEFLAGS; -// BinOpMI8_MF - Instructions that read "[mem], imm8" and write "[mem]", EFLAGS. -class BinOpMI8_MF<string m, X86TypeInfo t, Format f> - : BinOpMI8<m, t, f>, Sched<[WriteALURMW]>, DefEFLAGS { - let mayStore = 1; -} -// BinOpMI8F_MF - Instructions that read "[mem], imm8", write "[mem]" and -// read/write EFLAGS. -class BinOpMI8F_MF<string m, X86TypeInfo t, Format f> - : BinOpMI8<m, t, f>, Sched<[WriteADCRMW]>, DefEFLAGS, UseEFLAGS { - let mayStore = 1; -} - -// BinOpAI - Instructions that read "a-reg imm" (Accumulator register). -class BinOpAI<bits<8> o, string m, X86TypeInfo t, Register areg, string args> - : ITy<o, RawFrm, t, (outs), (ins t.ImmOperand:$src), m, args, []>, - Sched<[WriteALU]> { - let ImmT = t.ImmEncoding; - let Uses = [areg]; -} -// BinOpAI_F - Instructions that read "a-reg imm" and write EFLAGS only. -class BinOpAI_F<bits<8> o, string m, X86TypeInfo t, Register areg, string args> - : BinOpAI<o, m, t, areg, args>, DefEFLAGS; - -// BinOpAI_AF - Instructions that read "a-reg imm" and write a-reg/EFLAGS. -class BinOpAI_AF<bits<8> o, string m, X86TypeInfo t, Register areg, - string args> : BinOpAI<o, m, t, areg, args> { - let Defs = [areg, EFLAGS]; -} -// BinOpAIF_AF - Instructions that read "a-reg imm", write a-reg and read/write -// EFLAGS. -class BinOpAIF_AF<bits<8> o, string m, X86TypeInfo t, Register areg, - string args> : BinOpAI<o, m, t, areg, args> { - let Uses = [areg, EFLAGS]; - let Defs = [areg, EFLAGS]; - let SchedRW = [WriteADC]; +//===----------------------------------------------------------------------===// +// MUL/IMUL and DIV/IDIV Instructions +// +class MulDivOpR<bits<8> o, Format f, string m, X86TypeInfo t, + X86FoldableSchedWrite sched, list<dag> p> + : UnaryOpR<o, f, m, "$src1", t, (outs), p> { + let SchedRW = [sched]; } -// UnaryOpR - Instructions that read "reg" and write "reg". -class UnaryOpR<bits<8> o, Format f, string m, X86TypeInfo t, list<dag> p> - : ITy<o, f, t, (outs t.RegClass:$dst), - (ins t.RegClass:$src1), m, "$dst", p>, Sched<[WriteALU]>; - -// UnaryOpM - Instructions that read "[mem]" and writes "[mem]". -class UnaryOpM<bits<8> o, Format f, string m, X86TypeInfo t, list<dag> p> - : ITy<o, f, t, (outs), (ins t.MemOperand:$dst), m, "$dst", p>, - Sched<[WriteALURMW]> { - let mayLoad = 1; - let mayStore = 1; +class MulDivOpM<bits<8> o, Format f, string m, X86TypeInfo t, + X86FoldableSchedWrite sched, list<dag> p> + : UnaryOpM<o, f, m, "$src1", t, (outs), p> { + let SchedRW = + [sched.Folded, + // Memory operand. + ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault, + // Register reads (implicit or explicit). + sched.ReadAfterFold, sched.ReadAfterFold]; } -// INCDECR - Instructions like "inc reg". -class INCDECR<Format f, string m, X86TypeInfo t, SDPatternOperator node> - : UnaryOpR<0xFF, f, m, t, - [(set t.RegClass:$dst, EFLAGS, (node t.RegClass:$src1, 1))]>, - DefEFLAGS { - let isConvertibleToThreeAddress = 1; // Can xform into LEA. +multiclass Mul<bits<8> o, string m, Format RegMRM, Format MemMRM, SDPatternOperator node> { + // AL is really implied by AX, but the registers in Defs must match the + // SDNode results (i8, i32). + // + // FIXME: Used for 8-bit mul, ignore result upper 8 bits. + // This probably ought to be moved to a def : Pat<> if the + // syntax can be accepted. + let Defs = [AL,EFLAGS,AX], Uses = [AL] in + def 8r : MulDivOpR<o, RegMRM, m, Xi8, WriteIMul8, + [(set AL, (node AL, GR8:$src1)), (implicit EFLAGS)]>; + let Defs = [AX,DX,EFLAGS], Uses = [AX] in + def 16r : MulDivOpR<o, RegMRM, m, Xi16, WriteIMul16, []>, OpSize16; + let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in + def 32r : MulDivOpR<o, RegMRM, m, Xi32, WriteIMul32, []>, OpSize32; + let Defs = [RAX,RDX,EFLAGS], Uses = [RAX] in + def 64r : MulDivOpR<o, RegMRM, m, Xi64, WriteIMul64, []>; + let Defs = [AL,EFLAGS,AX], Uses = [AL] in + def 8m : MulDivOpM<o, MemMRM, m, Xi8, WriteIMul8, + [(set AL, (node AL, (loadi8 addr:$src1))), (implicit EFLAGS)]>; + let Defs = [AX,DX,EFLAGS], Uses = [AX] in + def 16m : MulDivOpM<o, MemMRM, m, Xi16, WriteIMul16, []>, OpSize16; + let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in + def 32m : MulDivOpM<o, MemMRM, m, Xi32, WriteIMul32, []>, OpSize32; + let Defs = [RAX,RDX,EFLAGS], Uses = [RAX] in + def 64m : MulDivOpM<o, MemMRM, m, Xi64, WriteIMul64, []>, Requires<[In64BitMode]>; } -// INCDECM - Instructions like "inc [mem]". -class INCDECM<Format f, string m, X86TypeInfo t, int num> - : UnaryOpM<0xFF, f, m, t, - [(store (add (t.LoadNode addr:$dst), num), addr:$dst), - (implicit EFLAGS)]>, DefEFLAGS; - -// INCDECR_ALT - Instructions like "inc reg" short forms. -class INCDECR_ALT<bits<8> o, string m, X86TypeInfo t> - : UnaryOpR<o, AddRegFrm, m, t, []>, DefEFLAGS { - // Short forms only valid in 32-bit mode. Selected during MCInst lowering. - let Predicates = [Not64BitMode]; +defm MUL : Mul<0xF7, "mul", MRM4r, MRM4m, mul>; +defm IMUL : Mul<0xF7, "imul", MRM5r, MRM5m, null_frag>; + +multiclass Div<bits<8> o, string m, Format RegMRM, Format MemMRM> { + defvar sched8 = !if(!eq(m, "div"), WriteDiv8, WriteIDiv8); + defvar sched16 = !if(!eq(m, "div"), WriteDiv16, WriteIDiv16); + defvar sched32 = !if(!eq(m, "div"), WriteDiv32, WriteIDiv32); + defvar sched64 = !if(!eq(m, "div"), WriteDiv64, WriteIDiv64); + let Defs = [AL,AH,EFLAGS], Uses = [AX] in + def 8r : MulDivOpR<o, RegMRM, m, Xi8, sched8, []>; + let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in + def 16r : MulDivOpR<o, RegMRM, m, Xi16, sched16, []>, OpSize16; + let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in + def 32r : MulDivOpR<o, RegMRM, m, Xi32, sched32, []>, OpSize32; + let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in + def 64r : MulDivOpR<o, RegMRM, m, Xi64, sched64, []>; + let Defs = [AL,AH,EFLAGS], Uses = [AX] in + def 8m : MulDivOpM<o, MemMRM, m, Xi8, sched8, []>; + let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in + def 16m : MulDivOpM<o, MemMRM, m, Xi16, sched16, []>, OpSize16; + let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in + def 32m : MulDivOpM<o, MemMRM, m, Xi32, sched32, []>, OpSize32; + let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in + def 64m : MulDivOpM<o, MemMRM, m, Xi64, sched64, []>, Requires<[In64BitMode]>; } - -// MulOpR - Instructions like "mul reg". -class MulOpR<bits<8> o, Format f, string m, X86TypeInfo t, - X86FoldableSchedWrite sched, list<dag> p> - : ITy<o, f, t, (outs), (ins t.RegClass:$src), m, "$src", p>, Sched<[sched]>; - -// MulOpM - Instructions like "mul [mem]". -class MulOpM<bits<8> o, Format f, string m, X86TypeInfo t, - X86FoldableSchedWrite sched, list<dag> p> - : ITy<o, f, t, (outs), (ins t.MemOperand:$src), m, - "$src", p>, SchedLoadReg<sched> { - let mayLoad = 1; +let hasSideEffects = 1 in { // so that we don't speculatively execute +defm DIV: Div<0xF7, "div", MRM6r, MRM6m>; +defm IDIV: Div<0xF7, "idiv", MRM7r, MRM7m>; } -// NegOpR - Instructions like "neg reg". -class NegOpR<bits<8> o, string m, X86TypeInfo t> - : UnaryOpR<o, MRM3r, m, t, - [(set t.RegClass:$dst, (ineg t.RegClass:$src1)), - (implicit EFLAGS)]>, DefEFLAGS; - -// NegOpM - Instructions like "neg [mem]". -class NegOpM<bits<8> o, string m, X86TypeInfo t> - : UnaryOpM<o, MRM3m, m, t, - [(store (ineg (t.LoadNode addr:$dst)), addr:$dst), - (implicit EFLAGS)]>, DefEFLAGS; - -// NOTE: NOT does not set EFLAGS! -// NotOpR - Instructions like "not reg". -class NotOpR<bits<8> o, string m, X86TypeInfo t> - : UnaryOpR<o, MRM2r, m, t, [(set t.RegClass:$dst, (not t.RegClass:$src1))]>; - -// NotOpM - Instructions like "neg [mem]". -class NotOpM<bits<8> o, string m, X86TypeInfo t> - : UnaryOpM<o, MRM2m, m, t, - [(store (not (t.LoadNode addr:$dst)), addr:$dst)]>; - -// IMulOpRR - Instructions like "imul reg, reg, i8". -class IMulOpRR<bits<8> o, string m, X86TypeInfo t, X86FoldableSchedWrite sched> - : BinOpRR_RF<o, m, t, X86smul_flag>, TB { +class IMulOpRR<X86TypeInfo t, X86FoldableSchedWrite sched> + : BinOpRR_RF<0xAF, "imul", t, X86smul_flag>, TB { let Form = MRMSrcReg; let SchedRW = [sched]; // X = IMUL Y, Z --> X = IMUL Z, Y let isCommutable = 1; } - -// IMulOpRM - Instructions like "imul reg, reg, [mem]". -class IMulOpRM<bits<8> o, string m, X86TypeInfo t, X86FoldableSchedWrite sched> - : BinOpRM_RF<o, m, t, X86smul_flag>, TB { +class IMulOpRM<X86TypeInfo t, X86FoldableSchedWrite sched> + : BinOpRM_RF<0xAF, "imul", t, X86smul_flag>, TB { let Form = MRMSrcMem; let SchedRW = [sched.Folded, sched.ReadAfterFold]; } -// IMulOpRRI8 - Instructions like "imul reg, reg, i8". -class IMulOpRRI8<bits<8> o, string m, X86TypeInfo t, - X86FoldableSchedWrite sched> - : ITy<o, MRMSrcReg, t, (outs t.RegClass:$dst), - (ins t.RegClass:$src1, t.Imm8Operand:$src2), m, - "{$src2, $src1, $dst|$dst, $src1, $src2}", []>, Sched<[sched]>, DefEFLAGS { - let ImmT = Imm8; -} +def IMUL16rr : IMulOpRR<Xi16, WriteIMul16Reg>, OpSize16; +def IMUL32rr : IMulOpRR<Xi32, WriteIMul32Reg>, OpSize32; +def IMUL64rr : IMulOpRR<Xi64, WriteIMul64Reg>; +def IMUL16rm : IMulOpRM<Xi16, WriteIMul16Reg>, OpSize16; +def IMUL32rm : IMulOpRM<Xi32, WriteIMul32Reg>, OpSize32; +def IMUL64rm : IMulOpRM<Xi64, WriteIMul64Reg>; -// IMulOpRRI - Instructions like "imul reg, reg, i16/i32/i64". -class IMulOpRRI<bits<8> o, string m, X86TypeInfo t, - X86FoldableSchedWrite sched> - : ITy<o, MRMSrcReg, t, (outs t.RegClass:$dst), - (ins t.RegClass:$src1, t.ImmOperand:$src2), m, - "{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set t.RegClass:$dst, EFLAGS, (X86smul_flag t.RegClass:$src1, - t.ImmNoSuOperator:$src2))]>, - Sched<[sched]>, DefEFLAGS { - let ImmT = t.ImmEncoding; +class IMulOpRI8_R<X86TypeInfo t, X86FoldableSchedWrite sched> + : BinOpRI8<0x6B, "imul", binop_ndd_args, t, MRMSrcReg, + (outs t.RegClass:$dst)>, DefEFLAGS { + let SchedRW = [sched]; } - -// IMulOpRMI8 - Instructions like "imul reg, [mem], i8". -class IMulOpRMI8<bits<8> o, string m, X86TypeInfo t, - X86FoldableSchedWrite sched> - : ITy<o, MRMSrcMem, t, (outs t.RegClass:$dst), - (ins t.MemOperand:$src1, t.Imm8Operand:$src2), m, - "{$src2, $src1, $dst|$dst, $src1, $src2}", []>, Sched<[sched.Folded]>, +class IMulOpRI_R<X86TypeInfo t, X86FoldableSchedWrite sched> + : BinOpRI<0x69, "imul", binop_ndd_args, t, MRMSrcReg, + (outs t.RegClass:$dst), + [(set t.RegClass:$dst, EFLAGS, (X86smul_flag t.RegClass:$src1, + t.ImmNoSuOperator:$src2))]>, DefEFLAGS { + let SchedRW = [sched]; +} +class IMulOpMI8_R<X86TypeInfo t, X86FoldableSchedWrite sched> + : BinOpMI8<"imul", binop_ndd_args, t, MRMSrcMem, (outs t.RegClass:$dst)>, DefEFLAGS { - let ImmT = Imm8; - let mayLoad = 1; + let Opcode = 0x6B; + let SchedRW = [sched.Folded]; } - -// IMulOpRMI - Instructions like "imul reg, [mem], i16/i32/i64". -class IMulOpRMI<bits<8> o, string m, X86TypeInfo t, - X86FoldableSchedWrite sched> - : ITy<o, MRMSrcMem, t, (outs t.RegClass:$dst), - (ins t.MemOperand:$src1, t.ImmOperand:$src2), m, - "{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set t.RegClass:$dst, EFLAGS, - (X86smul_flag (t.LoadNode addr:$src1), t.ImmNoSuOperator:$src2))]>, - Sched<[sched.Folded]>, DefEFLAGS { - let ImmT = t.ImmEncoding; +class IMulOpMI_R<X86TypeInfo t, X86FoldableSchedWrite sched> + : BinOpMI<0x69, "imul", binop_ndd_args, t, MRMSrcMem, + (outs t.RegClass:$dst), + [(set t.RegClass:$dst, EFLAGS, (X86smul_flag (t.LoadNode addr:$src1), + t.ImmNoSuOperator:$src2))]>, + DefEFLAGS { + let SchedRW = [sched.Folded]; } +def IMUL16rri8 : IMulOpRI8_R<Xi16, WriteIMul16Imm>, OpSize16; +def IMUL32rri8 : IMulOpRI8_R<Xi32, WriteIMul32Imm>, OpSize32; +def IMUL64rri8 : IMulOpRI8_R<Xi64, WriteIMul64Imm>; +def IMUL16rri : IMulOpRI_R<Xi16, WriteIMul16Imm>, OpSize16; +def IMUL32rri : IMulOpRI_R<Xi32, WriteIMul32Imm>, OpSize32; +def IMUL64rri32 : IMulOpRI_R<Xi64, WriteIMul64Imm>; + +def IMUL16rmi8 : IMulOpMI8_R<Xi16, WriteIMul16Imm>, OpSize16; +def IMUL32rmi8 : IMulOpMI8_R<Xi32, WriteIMul32Imm>, OpSize32; +def IMUL64rmi8 : IMulOpMI8_R<Xi64, WriteIMul64Imm>; +def IMUL16rmi : IMulOpMI_R<Xi16, WriteIMul16Imm>, OpSize16; +def IMUL32rmi : IMulOpMI_R<Xi32, WriteIMul32Imm>, OpSize32; +def IMUL64rmi32 : IMulOpMI_R<Xi64, WriteIMul64Imm>; -let Constraints = "$src1 = $dst" in { -def INC16r_alt : INCDECR_ALT<0x40, "inc", Xi16>, OpSize16; -def INC32r_alt : INCDECR_ALT<0x40, "inc", Xi32>, OpSize32; -def INC8r : INCDECR<MRM0r, "inc", Xi8, X86add_flag_nocf>; -def INC16r : INCDECR<MRM0r, "inc", Xi16, X86add_flag_nocf>, OpSize16; -def INC32r : INCDECR<MRM0r, "inc", Xi32, X86add_flag_nocf>, OpSize32; -def INC64r : INCDECR<MRM0r, "inc", Xi64, X86add_flag_nocf>; - -def DEC16r_alt : INCDECR_ALT<0x48, "dec", Xi16>, OpSize16; -def DEC32r_alt : INCDECR_ALT<0x48, "dec", Xi32>, OpSize32; -def DEC8r : INCDECR<MRM1r, "dec", Xi8, X86sub_flag_nocf>; -def DEC16r : INCDECR<MRM1r, "dec", Xi16, X86sub_flag_nocf>, OpSize16; -def DEC32r : INCDECR<MRM1r, "dec", Xi32, X86sub_flag_nocf>, OpSize32; -def DEC64r : INCDECR<MRM1r, "dec", Xi64, X86sub_flag_nocf>; +//===----------------------------------------------------------------------===// +// INC and DEC Instructions +// +class IncOpR_RF<X86TypeInfo t> : UnaryOpR_RF<0xFF, MRM0r, "inc", t, null_frag> { + let Pattern = [(set t.RegClass:$dst, EFLAGS, + (X86add_flag_nocf t.RegClass:$src1, 1))]; +} +class DecOpR_RF<X86TypeInfo t> : UnaryOpR_RF<0xFF, MRM1r, "dec", t, null_frag> { + let Pattern = [(set t.RegClass:$dst, EFLAGS, + (X86sub_flag_nocf t.RegClass:$src1, 1))]; +} +class IncOpM_M<X86TypeInfo t> : UnaryOpM_MF<0xFF, MRM0m, "inc", t, null_frag> { + let Pattern = [(store (add (t.LoadNode addr:$src1), 1), addr:$src1), + (implicit EFLAGS)]; +} +class DecOpM_M<X86TypeInfo t> : UnaryOpM_MF<0xFF, MRM1m, "dec", t, null_frag> { + let Pattern = [(store (add (t.LoadNode addr:$src1), -1), addr:$src1), + (implicit EFLAGS)]; +} +// IncDec_Alt - Instructions like "inc reg" short forms. +// Short forms only valid in 32-bit mode. Selected during MCInst lowering. +class IncDec_Alt<bits<8> o, string m, X86TypeInfo t> + : UnaryOpR_RF<o, AddRegFrm, m, t, null_frag>, Requires<[Not64BitMode]>; + +let isConvertibleToThreeAddress = 1 in { +def INC16r_alt : IncDec_Alt<0x40, "inc", Xi16>, OpSize16; +def INC32r_alt : IncDec_Alt<0x40, "inc", Xi32>, OpSize32; +def DEC16r_alt : IncDec_Alt<0x48, "dec", Xi16>, OpSize16; +def DEC32r_alt : IncDec_Alt<0x48, "dec", Xi32>, OpSize32; +def INC8r : IncOpR_RF<Xi8>; +def INC16r : IncOpR_RF<Xi16>, OpSize16; +def INC32r : IncOpR_RF<Xi32>, OpSize32; +def INC64r : IncOpR_RF<Xi64>; +def DEC8r : DecOpR_RF<Xi8>; +def DEC16r : DecOpR_RF<Xi16>, OpSize16; +def DEC32r : DecOpR_RF<Xi32>, OpSize32; +def DEC64r : DecOpR_RF<Xi64>; } - let Predicates = [UseIncDec] in { -def INC8m : INCDECM<MRM0m, "inc", Xi8, 1>; -def INC16m : INCDECM<MRM0m, "inc", Xi16, 1>, OpSize16; -def INC32m : INCDECM<MRM0m, "inc", Xi32, 1>, OpSize32; -def DEC8m : INCDECM<MRM1m, "dec", Xi8, -1>; -def DEC16m : INCDECM<MRM1m, "dec", Xi16, -1>, OpSize16; -def DEC32m : INCDECM<MRM1m, "dec", Xi32, -1>, OpSize32; +def INC8m : IncOpM_M<Xi8>; +def INC16m : IncOpM_M<Xi16>, OpSize16; +def INC32m : IncOpM_M<Xi32>, OpSize32; +def DEC8m : DecOpM_M<Xi8>; +def DEC16m : DecOpM_M<Xi16>, OpSize16; +def DEC32m : DecOpM_M<Xi32>, OpSize32; } let Predicates = [UseIncDec, In64BitMode] in { -def INC64m : INCDECM<MRM0m, "inc", Xi64, 1>; -def DEC64m : INCDECM<MRM1m, "dec", Xi64, -1>; +def INC64m : IncOpM_M<Xi64>; +def DEC64m : DecOpM_M<Xi64>; } -// Extra precision multiplication - -// AL is really implied by AX, but the registers in Defs must match the -// SDNode results (i8, i32). -// AL,AH = AL*GR8 -let Defs = [AL,EFLAGS,AX], Uses = [AL] in -def MUL8r : MulOpR<0xF6, MRM4r, "mul", Xi8, WriteIMul8, - // FIXME: Used for 8-bit mul, ignore result upper 8 bits. - // This probably ought to be moved to a def : Pat<> if the - // syntax can be accepted. - [(set AL, (mul AL, GR8:$src)), (implicit EFLAGS)]>; -// AX,DX = AX*GR16 -let Defs = [AX,DX,EFLAGS], Uses = [AX] in -def MUL16r : MulOpR<0xF7, MRM4r, "mul", Xi16, WriteIMul16, []>, OpSize16; -// EAX,EDX = EAX*GR32 -let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in -def MUL32r : MulOpR<0xF7, MRM4r, "mul", Xi32, WriteIMul32, - [/*(set EAX, EDX, EFLAGS, (X86umul_flag EAX, GR32:$src))*/]>, OpSize32; -// RAX,RDX = RAX*GR64 -let Defs = [RAX,RDX,EFLAGS], Uses = [RAX] in -def MUL64r : MulOpR<0xF7, MRM4r, "mul", Xi64, WriteIMul64, - [/*(set RAX, RDX, EFLAGS, (X86umul_flag RAX, GR64:$src))*/]>; -// AL,AH = AL*[mem8] -let Defs = [AL,EFLAGS,AX], Uses = [AL] in -def MUL8m : MulOpM<0xF6, MRM4m, "mul", Xi8, WriteIMul8, - // FIXME: Used for 8-bit mul, ignore result upper 8 bits. - // This probably ought to be moved to a def : Pat<> if the - // syntax can be accepted. - [(set AL, (mul AL, (loadi8 addr:$src))), - (implicit EFLAGS)]>; -// AX,DX = AX*[mem16] -let Defs = [AX,DX,EFLAGS], Uses = [AX] in -def MUL16m : MulOpM<0xF7, MRM4m, "mul", Xi16, WriteIMul16, []>, OpSize16; -// EAX,EDX = EAX*[mem32] -let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in -def MUL32m : MulOpM<0xF7, MRM4m, "mul", Xi32, WriteIMul32, []>, OpSize32; -// RAX,RDX = RAX*[mem64] -let Defs = [RAX,RDX,EFLAGS], Uses = [RAX] in -def MUL64m : MulOpM<0xF7, MRM4m, "mul", Xi64, WriteIMul64, []>, - Requires<[In64BitMode]>; - -// AL,AH = AL*GR8 -let Defs = [AL,EFLAGS,AX], Uses = [AL] in -def IMUL8r : MulOpR<0xF6, MRM5r, "imul", Xi8, WriteIMul8, []>; -// AX,DX = AX*GR16 -let Defs = [AX,DX,EFLAGS], Uses = [AX] in -def IMUL16r : MulOpR<0xF7, MRM5r, "imul", Xi16, WriteIMul16, []>, OpSize16; -// EAX,EDX = EAX*GR32 -let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in -def IMUL32r : MulOpR<0xF7, MRM5r, "imul", Xi32, WriteIMul32, []>, OpSize32; -// RAX,RDX = RAX*GR64 -let Defs = [RAX,RDX,EFLAGS], Uses = [RAX] in -def IMUL64r : MulOpR<0xF7, MRM5r, "imul", Xi64, WriteIMul64, []>; - -// AL,AH = AL*[mem8] -let Defs = [AL,EFLAGS,AX], Uses = [AL] in -def IMUL8m : MulOpM<0xF6, MRM5m, "imul", Xi8, WriteIMul8, []>; -// AX,DX = AX*[mem16] -let Defs = [AX,DX,EFLAGS], Uses = [AX] in -def IMUL16m : MulOpM<0xF7, MRM5m, "imul", Xi16, WriteIMul16, []>, OpSize16; -// EAX,EDX = EAX*[mem32] -let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in -def IMUL32m : MulOpM<0xF7, MRM5m, "imul", Xi32, WriteIMul32, []>, OpSize32; -// RAX,RDX = RAX*[mem64] -let Defs = [RAX,RDX,EFLAGS], Uses = [RAX] in -def IMUL64m : MulOpM<0xF7, MRM5m, "imul", Xi64, WriteIMul64, []>, - Requires<[In64BitMode]>; - -let Constraints = "$src1 = $dst" in { -// Register-Register Signed Integer Multiply -def IMUL16rr : IMulOpRR<0xAF, "imul", Xi16, WriteIMul16Reg>, OpSize16; -def IMUL32rr : IMulOpRR<0xAF, "imul", Xi32, WriteIMul32Reg>, OpSize32; -def IMUL64rr : IMulOpRR<0xAF, "imul", Xi64, WriteIMul64Reg>; - -// Register-Memory Signed Integer Multiply -def IMUL16rm : IMulOpRM<0xAF, "imul", Xi16, WriteIMul16Reg>, OpSize16; -def IMUL32rm : IMulOpRM<0xAF, "imul", Xi32, WriteIMul32Reg>, OpSize32; -def IMUL64rm : IMulOpRM<0xAF, "imul", Xi64, WriteIMul64Reg>; +//===----------------------------------------------------------------------===// +// NEG and NOT Instructions +// +class NegOpR_R<X86TypeInfo t, bit ndd = 0> + : UnaryOpR_R<0xF7, MRM3r, "neg", t, ineg, ndd>; +class NegOpR_RF<X86TypeInfo t, bit ndd = 0> + : UnaryOpR_RF<0xF7, MRM3r, "neg", t, ineg, ndd>; +class NegOpM_M<X86TypeInfo t> : UnaryOpM_M<0xF7, MRM3m, "neg", t, null_frag>; +class NegOpM_MF<X86TypeInfo t> : UnaryOpM_MF<0xF7, MRM3m, "neg", t, ineg>; +class NegOpM_R<X86TypeInfo t> : UnaryOpM_R<0xF7, MRM3m, "neg", t, null_frag>; +class NegOpM_RF<X86TypeInfo t> : UnaryOpM_RF<0xF7, MRM3m, "neg", t, ineg>; + +class NotOpR_R<X86TypeInfo t, bit ndd = 0> + : UnaryOpR_R<0xF7, MRM2r, "not", t, not, ndd>; +class NotOpM_M<X86TypeInfo t> : UnaryOpM_M<0xF7, MRM2m, "not", t, not>; +class NotOpM_R<X86TypeInfo t> : UnaryOpM_R<0xF7, MRM2m, "not", t, not>; + +let Predicates = [NoNDD] in { +def NEG8r : NegOpR_RF<Xi8>; +def NEG16r : NegOpR_RF<Xi16>, OpSize16; +def NEG32r : NegOpR_RF<Xi32>, OpSize32; +def NEG64r : NegOpR_RF<Xi64>; +def NOT8r : NotOpR_R<Xi8>; +def NOT16r : NotOpR_R<Xi16>, OpSize16; +def NOT32r : NotOpR_R<Xi32>, OpSize32; +def NOT64r : NotOpR_R<Xi64>; } -// Surprisingly enough, these are not two address instructions! -// NOTE: These are order specific, we want the ri8 forms to be listed -// first so that they are slightly preferred to the ri forms. - -// Register-Integer Signed Integer Multiply -// GR16 = GR16*I8 -def IMUL16rri8 : IMulOpRRI8<0x6B, "imul", Xi16, WriteIMul16Imm>, OpSize16; -// GR16 = GR16*I16 -def IMUL16rri : IMulOpRRI<0x69, "imul", Xi16, WriteIMul16Imm>, OpSize16; -// GR32 = GR32*I8 -def IMUL32rri8 : IMulOpRRI8<0x6B, "imul", Xi32, WriteIMul32Imm>, OpSize32; -// GR32 = GR32*I32 -def IMUL32rri : IMulOpRRI<0x69, "imul", Xi32, WriteIMul32Imm>, OpSize32; -// GR64 = GR64*I8 -def IMUL64rri8 : IMulOpRRI8<0x6B, "imul", Xi64, WriteIMul64Imm>; -// GR64 = GR64*I32 -def IMUL64rri32 : IMulOpRRI<0x69, "imul", Xi64, WriteIMul64Imm>; - -// Memory-Integer Signed Integer Multiply -// GR16 = [mem16]*I8 -def IMUL16rmi8 : IMulOpRMI8<0x6B, "imul", Xi16, WriteIMul16Imm>, OpSize16; -// GR16 = [mem16]*I16 -def IMUL16rmi : IMulOpRMI<0x69, "imul", Xi16, WriteIMul16Imm>, OpSize16; -// GR32 = [mem32]*I8 -def IMUL32rmi8 : IMulOpRMI8<0x6B, "imul", Xi32, WriteIMul32Imm>, OpSize32; -// GR32 = [mem32]*I32 -def IMUL32rmi : IMulOpRMI<0x69, "imul", Xi32, WriteIMul32Imm>, OpSize32; -// GR64 = [mem64]*I8 -def IMUL64rmi8 : IMulOpRMI8<0x6B, "imul", Xi64, WriteIMul64Imm>; -// GR64 = [mem64]*I32 -def IMUL64rmi32 : IMulOpRMI<0x69, "imul", Xi64, WriteIMul64Imm>; - -// unsigned division/remainder -let hasSideEffects = 1 in { // so that we don't speculatively execute -let Defs = [AL,AH,EFLAGS], Uses = [AX] in -// AX/r8 = AL,AH -def DIV8r : MulOpR<0xF6, MRM6r, "div", Xi8, WriteDiv8, []>; -let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in -// DX:AX/r16 = AX,DX -def DIV16r : MulOpR<0xF7, MRM6r, "div", Xi16, WriteDiv16, []>, OpSize16; -let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in -// EDX:EAX/r32 = EAX,EDX -def DIV32r : MulOpR<0xF7, MRM6r, "div", Xi32, WriteDiv32, []>, OpSize32; -// RDX:RAX/r64 = RAX,RDX -let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in -def DIV64r : MulOpR<0xF7, MRM6r, "div", Xi64, WriteDiv64, []>; - -let mayLoad = 1 in { -let Defs = [AL,AH,EFLAGS], Uses = [AX] in -// AX/[mem8] = AL,AH -def DIV8m : MulOpM<0xF6, MRM6m, "div", Xi8, WriteDiv8, []>; -let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in -// DX:AX/[mem16] = AX,DX -def DIV16m : MulOpM<0xF7, MRM6m, "div", Xi16, WriteDiv16, []>, OpSize16; -let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in // EDX:EAX/[mem32] = EAX,EDX -def DIV32m : MulOpM<0xF7, MRM6m, "div", Xi32, WriteDiv32, []>, OpSize32; -// RDX:RAX/[mem64] = RAX,RDX -let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in -def DIV64m : MulOpM<0xF7, MRM6m, "div", Xi64, WriteDiv64, []>, - Requires<[In64BitMode]>; +let Predicates = [HasNDD, In64BitMode] in { +def NEG8r_ND : NegOpR_RF<Xi8, 1>; +def NEG16r_ND : NegOpR_RF<Xi16, 1>, PD; +def NEG32r_ND : NegOpR_RF<Xi32, 1>; +def NEG64r_ND : NegOpR_RF<Xi64, 1>; + +def NOT8r_ND : NotOpR_R<Xi8, 1>; +def NOT16r_ND : NotOpR_R<Xi16, 1>, PD; +def NOT32r_ND : NotOpR_R<Xi32, 1>; +def NOT64r_ND : NotOpR_R<Xi64, 1>; + +def NEG8r_NF_ND : NegOpR_R<Xi8, 1>, EVEX_NF; +def NEG16r_NF_ND : NegOpR_R<Xi16, 1>, EVEX_NF, PD; +def NEG32r_NF_ND : NegOpR_R<Xi32, 1>, EVEX_NF; +def NEG64r_NF_ND : NegOpR_R<Xi64, 1>, EVEX_NF; } -// Signed division/remainder. -let Defs = [AL,AH,EFLAGS], Uses = [AX] in -// AX/r8 = AL,AH -def IDIV8r : MulOpR<0xF6, MRM7r, "idiv", Xi8, WriteIDiv8, []>; -let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in -// DX:AX/r16 = AX,DX -def IDIV16r: MulOpR<0xF7, MRM7r, "idiv", Xi16, WriteIDiv16, []>, OpSize16; -let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in -// EDX:EAX/r32 = EAX,EDX -def IDIV32r: MulOpR<0xF7, MRM7r, "idiv", Xi32, WriteIDiv32, []>, OpSize32; -// RDX:RAX/r64 = RAX,RDX -let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in -def IDIV64r: MulOpR<0xF7, MRM7r, "idiv", Xi64, WriteIDiv64, []>; - -let Defs = [AL,AH,EFLAGS], Uses = [AX] in -// AX/[mem8] = AL,AH -def IDIV8m : MulOpM<0xF6, MRM7m, "idiv", Xi8, WriteIDiv8, []>; -let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in -// DX:AX/[mem16] = AX,DX -def IDIV16m: MulOpM<0xF7, MRM7m, "idiv", Xi16, WriteIDiv16, []>, OpSize16; -let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in -// EDX:EAX/[mem32] = EAX,EDX -def IDIV32m: MulOpM<0xF7, MRM7m, "idiv", Xi32, WriteIDiv32, []>, OpSize32; -let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in // RDX:RAX/[mem64] = RAX,RDX -// RDX:RAX/[mem64] = RAX,RDX -def IDIV64m: MulOpM<0xF7, MRM7m, "idiv", Xi64, WriteIDiv64, []>, - Requires<[In64BitMode]>; -} // hasSideEffects = 1 - -let Constraints = "$src1 = $dst" in { -def NEG8r : NegOpR<0xF6, "neg", Xi8>; -def NEG16r : NegOpR<0xF7, "neg", Xi16>, OpSize16; -def NEG32r : NegOpR<0xF7, "neg", Xi32>, OpSize32; -def NEG64r : NegOpR<0xF7, "neg", Xi64>; +def NEG8m : NegOpM_MF<Xi8>; +def NEG16m : NegOpM_MF<Xi16>, OpSize16; +def NEG32m : NegOpM_MF<Xi32>, OpSize32; +def NEG64m : NegOpM_MF<Xi64>, Requires<[In64BitMode]>; + +let Predicates = [HasNDD, In64BitMode] in { +def NEG8m_ND : NegOpM_RF<Xi8>; +def NEG16m_ND : NegOpM_RF<Xi16>, PD; +def NEG32m_ND : NegOpM_RF<Xi32>; +def NEG64m_ND : NegOpM_RF<Xi64>; + +def NEG8m_NF_ND : NegOpM_R<Xi8>, EVEX_NF; +def NEG16m_NF_ND : NegOpM_R<Xi16>, EVEX_NF, PD; +def NEG32m_NF_ND : NegOpM_R<Xi32>, EVEX_NF; +def NEG64m_NF_ND : NegOpM_R<Xi64>, EVEX_NF; } -def NEG8m : NegOpM<0xF6, "neg", Xi8>; -def NEG16m : NegOpM<0xF7, "neg", Xi16>, OpSize16; -def NEG32m : NegOpM<0xF7, "neg", Xi32>, OpSize32; -def NEG64m : NegOpM<0xF7, "neg", Xi64>, Requires<[In64BitMode]>; +def NOT8m : NotOpM_M<Xi8>; +def NOT16m : NotOpM_M<Xi16>, OpSize16; +def NOT32m : NotOpM_M<Xi32>, OpSize32; +def NOT64m : NotOpM_M<Xi64>, Requires<[In64BitMode]>; -let Constraints = "$src1 = $dst" in { -def NOT8r : NotOpR<0xF6, "not", Xi8>; -def NOT16r : NotOpR<0xF7, "not", Xi16>, OpSize16; -def NOT32r : NotOpR<0xF7, "not", Xi32>, OpSize32; -def NOT64r : NotOpR<0xF7, "not", Xi64>; +let Predicates = [HasNDD, In64BitMode] in { +def NOT8m_ND : NotOpM_R<Xi8>; +def NOT16m_ND : NotOpM_R<Xi16>, PD; +def NOT32m_ND : NotOpM_R<Xi32>; +def NOT64m_ND : NotOpM_R<Xi64>; } -def NOT8m : NotOpM<0xF6, "not", Xi8>; -def NOT16m : NotOpM<0xF7, "not", Xi16>, OpSize16; -def NOT32m : NotOpM<0xF7, "not", Xi32>, OpSize32; -def NOT64m : NotOpM<0xF7, "not", Xi64>, Requires<[In64BitMode]>; +let Predicates = [In64BitMode], Pattern = [(null_frag)] in { +def NEG8r_NF : NegOpR_R<Xi8>, NF; +def NEG16r_NF : NegOpR_R<Xi16>, NF, PD; +def NEG32r_NF : NegOpR_R<Xi32>, NF; +def NEG64r_NF : NegOpR_R<Xi64>, NF; +def NEG8m_NF : NegOpM_M<Xi8>, NF; +def NEG16m_NF : NegOpM_M<Xi16>, NF, PD; +def NEG32m_NF : NegOpM_M<Xi32>, NF; +def NEG64m_NF : NegOpM_M<Xi64>, NF; + +def NEG8r_EVEX : NegOpR_RF<Xi8>, PL; +def NEG16r_EVEX : NegOpR_RF<Xi16>, PL, PD; +def NEG32r_EVEX : NegOpR_RF<Xi32>, PL; +def NEG64r_EVEX : NegOpR_RF<Xi64>, PL; + +def NOT8r_EVEX : NotOpR_R<Xi8>, PL; +def NOT16r_EVEX : NotOpR_R<Xi16>, PL, PD; +def NOT32r_EVEX : NotOpR_R<Xi32>, PL; +def NOT64r_EVEX : NotOpR_R<Xi64>, PL; + +def NEG8m_EVEX : NegOpM_MF<Xi8>, PL; +def NEG16m_EVEX : NegOpM_MF<Xi16>, PL, PD; +def NEG32m_EVEX : NegOpM_MF<Xi32>, PL; +def NEG64m_EVEX : NegOpM_MF<Xi64>, PL; + +def NOT8m_EVEX : NotOpM_M<Xi8>, PL; +def NOT16m_EVEX : NotOpM_M<Xi16>, PL, PD; +def NOT32m_EVEX : NotOpM_M<Xi32>, PL; +def NOT64m_EVEX : NotOpM_M<Xi64>, PL; +} /// ArithBinOp_RF - This is an arithmetic binary operator where the pattern is /// defined with "(set GPR:$dst, EFLAGS, (...". @@ -640,61 +347,204 @@ multiclass ArithBinOp_RF<bits<8> BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4, SDNode opnodeflag, SDNode opnode, bit CommutableRR, bit ConvertibleToThreeAddress, bit ConvertibleToThreeAddressRR> { - let Constraints = "$src1 = $dst" in { - let isCommutable = CommutableRR, - isConvertibleToThreeAddress = ConvertibleToThreeAddressRR in { - def NAME#8rr : BinOpRR_RF<BaseOpc, mnemonic, Xi8 , opnodeflag>; - def NAME#16rr : BinOpRR_RF<BaseOpc, mnemonic, Xi16, opnodeflag>, OpSize16; - def NAME#32rr : BinOpRR_RF<BaseOpc, mnemonic, Xi32, opnodeflag>, OpSize32; - def NAME#64rr : BinOpRR_RF<BaseOpc, mnemonic, Xi64, opnodeflag>; + let isCommutable = CommutableRR, + isConvertibleToThreeAddress = ConvertibleToThreeAddressRR in { + let Predicates = [NoNDD] in { + def NAME#8rr : BinOpRR_RF<BaseOpc, mnemonic, Xi8 , opnodeflag>; + def NAME#16rr : BinOpRR_RF<BaseOpc, mnemonic, Xi16, opnodeflag>, OpSize16; + def NAME#32rr : BinOpRR_RF<BaseOpc, mnemonic, Xi32, opnodeflag>, OpSize32; + def NAME#64rr : BinOpRR_RF<BaseOpc, mnemonic, Xi64, opnodeflag>; + } + let Predicates = [HasNDD, In64BitMode] in { + def NAME#8rr_ND : BinOpRR_RF<BaseOpc, mnemonic, Xi8 , opnodeflag, 1>; + def NAME#16rr_ND : BinOpRR_RF<BaseOpc, mnemonic, Xi16, opnodeflag, 1>, PD; + def NAME#32rr_ND : BinOpRR_RF<BaseOpc, mnemonic, Xi32, opnodeflag, 1>; + def NAME#64rr_ND : BinOpRR_RF<BaseOpc, mnemonic, Xi64, opnodeflag, 1>; + def NAME#8rr_NF_ND : BinOpRR_R<BaseOpc, mnemonic, Xi8, 1>, EVEX_NF; + def NAME#16rr_NF_ND : BinOpRR_R<BaseOpc, mnemonic, Xi16, 1>, EVEX_NF, PD; + def NAME#32rr_NF_ND : BinOpRR_R<BaseOpc, mnemonic, Xi32, 1>, EVEX_NF; + def NAME#64rr_NF_ND : BinOpRR_R<BaseOpc, mnemonic, Xi64, 1>, EVEX_NF; + } + let Predicates = [In64BitMode] in { + def NAME#8rr_NF : BinOpRR_R<BaseOpc, mnemonic, Xi8>, NF; + def NAME#16rr_NF : BinOpRR_R<BaseOpc, mnemonic, Xi16>, NF, PD; + def NAME#32rr_NF : BinOpRR_R<BaseOpc, mnemonic, Xi32>, NF; + def NAME#64rr_NF : BinOpRR_R<BaseOpc, mnemonic, Xi64>, NF; + def NAME#8rr_EVEX : BinOpRR_RF<BaseOpc, mnemonic, Xi8 , null_frag>, PL; + def NAME#16rr_EVEX : BinOpRR_RF<BaseOpc, mnemonic, Xi16, null_frag>, PL, PD; + def NAME#32rr_EVEX : BinOpRR_RF<BaseOpc, mnemonic, Xi32, null_frag>, PL; + def NAME#64rr_EVEX : BinOpRR_RF<BaseOpc, mnemonic, Xi64, null_frag>, PL; } + } - def NAME#8rr_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi8>; - def NAME#16rr_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi16>, OpSize16; - def NAME#32rr_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi32>, OpSize32; - def NAME#64rr_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi64>; + def NAME#8rr_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi8>; + def NAME#16rr_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi16>, OpSize16; + def NAME#32rr_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi32>, OpSize32; + def NAME#64rr_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi64>; + let Predicates = [In64BitMode] in { + def NAME#8rr_EVEX_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi8>, PL; + def NAME#16rr_EVEX_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi16>, PL, PD; + def NAME#32rr_EVEX_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi32>, PL; + def NAME#64rr_EVEX_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi64>, PL; + def NAME#8rr_ND_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi8, 1>; + def NAME#16rr_ND_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi16, 1>, PD; + def NAME#32rr_ND_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi32, 1>; + def NAME#64rr_ND_REV : BinOpRR_RF_Rev<BaseOpc2, mnemonic, Xi64, 1>; + def NAME#8rr_NF_REV : BinOpRR_R_Rev<BaseOpc2, mnemonic, Xi8>, NF; + def NAME#16rr_NF_REV : BinOpRR_R_Rev<BaseOpc2, mnemonic, Xi16>, NF, PD; + def NAME#32rr_NF_REV : BinOpRR_R_Rev<BaseOpc2, mnemonic, Xi32>, NF; + def NAME#64rr_NF_REV : BinOpRR_R_Rev<BaseOpc2, mnemonic, Xi64>, NF; + def NAME#8rr_NF_ND_REV : BinOpRR_R_Rev<BaseOpc2, mnemonic, Xi8, 1>, EVEX_NF; + def NAME#16rr_NF_ND_REV : BinOpRR_R_Rev<BaseOpc2, mnemonic, Xi16, 1>, EVEX_NF, PD; + def NAME#32rr_NF_ND_REV : BinOpRR_R_Rev<BaseOpc2, mnemonic, Xi32, 1>, EVEX_NF; + def NAME#64rr_NF_ND_REV : BinOpRR_R_Rev<BaseOpc2, mnemonic, Xi64, 1>, EVEX_NF; + } - def NAME#8rm : BinOpRM_RF<BaseOpc2, mnemonic, Xi8 , opnodeflag>; - def NAME#16rm : BinOpRM_RF<BaseOpc2, mnemonic, Xi16, opnodeflag>, OpSize16; - def NAME#32rm : BinOpRM_RF<BaseOpc2, mnemonic, Xi32, opnodeflag>, OpSize32; - def NAME#64rm : BinOpRM_RF<BaseOpc2, mnemonic, Xi64, opnodeflag>; + let Predicates = [NoNDD] in { + def NAME#8rm : BinOpRM_RF<BaseOpc2, mnemonic, Xi8 , opnodeflag>; + def NAME#16rm : BinOpRM_RF<BaseOpc2, mnemonic, Xi16, opnodeflag>, OpSize16; + def NAME#32rm : BinOpRM_RF<BaseOpc2, mnemonic, Xi32, opnodeflag>, OpSize32; + def NAME#64rm : BinOpRM_RF<BaseOpc2, mnemonic, Xi64, opnodeflag>; + } + let Predicates = [HasNDD, In64BitMode] in { + def NAME#8rm_ND : BinOpRM_RF<BaseOpc2, mnemonic, Xi8 , opnodeflag, 1>; + def NAME#16rm_ND : BinOpRM_RF<BaseOpc2, mnemonic, Xi16, opnodeflag, 1>, PD; + def NAME#32rm_ND : BinOpRM_RF<BaseOpc2, mnemonic, Xi32, opnodeflag, 1>; + def NAME#64rm_ND : BinOpRM_RF<BaseOpc2, mnemonic, Xi64, opnodeflag, 1>; + def NAME#8rm_NF_ND : BinOpRM_R<BaseOpc2, mnemonic, Xi8, 1>, EVEX_NF; + def NAME#16rm_NF_ND : BinOpRM_R<BaseOpc2, mnemonic, Xi16, 1>, EVEX_NF, PD; + def NAME#32rm_NF_ND : BinOpRM_R<BaseOpc2, mnemonic, Xi32, 1>, EVEX_NF; + def NAME#64rm_NF_ND : BinOpRM_R<BaseOpc2, mnemonic, Xi64, 1>, EVEX_NF; + } + let Predicates = [In64BitMode] in { + def NAME#8rm_NF : BinOpRM_R<BaseOpc2, mnemonic, Xi8>, NF; + def NAME#16rm_NF : BinOpRM_R<BaseOpc2, mnemonic, Xi16>, NF, PD; + def NAME#32rm_NF : BinOpRM_R<BaseOpc2, mnemonic, Xi32>, NF; + def NAME#64rm_NF : BinOpRM_R<BaseOpc2, mnemonic, Xi64>, NF; + def NAME#8rm_EVEX : BinOpRM_RF<BaseOpc2, mnemonic, Xi8 , null_frag>, PL; + def NAME#16rm_EVEX : BinOpRM_RF<BaseOpc2, mnemonic, Xi16, null_frag>, PL, PD; + def NAME#32rm_EVEX : BinOpRM_RF<BaseOpc2, mnemonic, Xi32, null_frag>, PL; + def NAME#64rm_EVEX : BinOpRM_RF<BaseOpc2, mnemonic, Xi64, null_frag>, PL; + } let isConvertibleToThreeAddress = ConvertibleToThreeAddress in { - def NAME#8ri : BinOpRI_RF<0x80, mnemonic, Xi8 , opnodeflag, RegMRM>; - // NOTE: These are order specific, we want the ri8 forms to be listed - // first so that they are slightly preferred to the ri forms. - def NAME#16ri8 : BinOpRI8_RF<0x83, mnemonic, Xi16, RegMRM>, OpSize16; - def NAME#32ri8 : BinOpRI8_RF<0x83, mnemonic, Xi32, RegMRM>, OpSize32; - def NAME#64ri8 : BinOpRI8_RF<0x83, mnemonic, Xi64, RegMRM>; - - def NAME#16ri : BinOpRI_RF<0x81, mnemonic, Xi16, opnodeflag, RegMRM>, OpSize16; - def NAME#32ri : BinOpRI_RF<0x81, mnemonic, Xi32, opnodeflag, RegMRM>, OpSize32; - def NAME#64ri32: BinOpRI_RF<0x81, mnemonic, Xi64, opnodeflag, RegMRM>; + let Predicates = [NoNDD] in { + // NOTE: These are order specific, we want the ri8 forms to be listed + // first so that they are slightly preferred to the ri forms. + def NAME#16ri8 : BinOpRI8_RF<0x83, mnemonic, Xi16, RegMRM>, OpSize16; + def NAME#32ri8 : BinOpRI8_RF<0x83, mnemonic, Xi32, RegMRM>, OpSize32; + def NAME#64ri8 : BinOpRI8_RF<0x83, mnemonic, Xi64, RegMRM>; + def NAME#8ri : BinOpRI_RF<0x80, mnemonic, Xi8 , opnodeflag, RegMRM>; + def NAME#16ri : BinOpRI_RF<0x81, mnemonic, Xi16, opnodeflag, RegMRM>, OpSize16; + def NAME#32ri : BinOpRI_RF<0x81, mnemonic, Xi32, opnodeflag, RegMRM>, OpSize32; + def NAME#64ri32: BinOpRI_RF<0x81, mnemonic, Xi64, opnodeflag, RegMRM>; + } + let Predicates = [HasNDD, In64BitMode] in { + def NAME#16ri8_ND : BinOpRI8_RF<0x83, mnemonic, Xi16, RegMRM, 1>, PD; + def NAME#32ri8_ND : BinOpRI8_RF<0x83, mnemonic, Xi32, RegMRM, 1>; + def NAME#64ri8_ND : BinOpRI8_RF<0x83, mnemonic, Xi64, RegMRM, 1>; + def NAME#8ri_ND : BinOpRI_RF<0x80, mnemonic, Xi8 , opnodeflag, RegMRM, 1>; + def NAME#16ri_ND : BinOpRI_RF<0x81, mnemonic, Xi16, opnodeflag, RegMRM, 1>, PD; + def NAME#32ri_ND : BinOpRI_RF<0x81, mnemonic, Xi32, opnodeflag, RegMRM, 1>; + def NAME#64ri32_ND: BinOpRI_RF<0x81, mnemonic, Xi64, opnodeflag, RegMRM, 1>; + def NAME#16ri8_NF_ND : BinOpRI8_R<0x83, mnemonic, Xi16, RegMRM, 1>, EVEX_NF, PD; + def NAME#32ri8_NF_ND : BinOpRI8_R<0x83, mnemonic, Xi32, RegMRM, 1>, EVEX_NF; + def NAME#64ri8_NF_ND : BinOpRI8_R<0x83, mnemonic, Xi64, RegMRM, 1>, EVEX_NF; + def NAME#8ri_NF_ND : BinOpRI_R<0x80, mnemonic, Xi8, RegMRM, 1>, EVEX_NF; + def NAME#16ri_NF_ND : BinOpRI_R<0x81, mnemonic, Xi16, RegMRM, 1>, EVEX_NF, PD; + def NAME#32ri_NF_ND : BinOpRI_R<0x81, mnemonic, Xi32, RegMRM, 1>, EVEX_NF; + def NAME#64ri32_NF_ND : BinOpRI_R<0x81, mnemonic, Xi64, RegMRM, 1>, EVEX_NF; + } + let Predicates = [In64BitMode] in { + def NAME#16ri8_NF : BinOpRI8_R<0x83, mnemonic, Xi16, RegMRM>, NF, PD; + def NAME#32ri8_NF : BinOpRI8_R<0x83, mnemonic, Xi32, RegMRM>, NF; + def NAME#64ri8_NF : BinOpRI8_R<0x83, mnemonic, Xi64, RegMRM>, NF; + def NAME#8ri_NF : BinOpRI_R<0x80, mnemonic, Xi8, RegMRM>, NF; + def NAME#16ri_NF : BinOpRI_R<0x81, mnemonic, Xi16, RegMRM>, NF, PD; + def NAME#32ri_NF : BinOpRI_R<0x81, mnemonic, Xi32, RegMRM>, NF; + def NAME#64ri32_NF : BinOpRI_R<0x81, mnemonic, Xi64, RegMRM>, NF; + def NAME#16ri8_EVEX : BinOpRI8_RF<0x83, mnemonic, Xi16, RegMRM>, PL, PD; + def NAME#32ri8_EVEX : BinOpRI8_RF<0x83, mnemonic, Xi32, RegMRM>, PL; + def NAME#64ri8_EVEX : BinOpRI8_RF<0x83, mnemonic, Xi64, RegMRM>, PL; + def NAME#8ri_EVEX : BinOpRI_RF<0x80, mnemonic, Xi8 , null_frag, RegMRM>, PL; + def NAME#16ri_EVEX : BinOpRI_RF<0x81, mnemonic, Xi16, null_frag, RegMRM>, PL, PD; + def NAME#32ri_EVEX : BinOpRI_RF<0x81, mnemonic, Xi32, null_frag, RegMRM>, PL; + def NAME#64ri32_EVEX: BinOpRI_RF<0x81, mnemonic, Xi64, null_frag, RegMRM>, PL; + } } - } // Constraints = "$src1 = $dst" - def NAME#8mr : BinOpMR_MF<BaseOpc, mnemonic, Xi8 , opnode>; - def NAME#16mr : BinOpMR_MF<BaseOpc, mnemonic, Xi16, opnode>, OpSize16; - def NAME#32mr : BinOpMR_MF<BaseOpc, mnemonic, Xi32, opnode>, OpSize32; - def NAME#64mr : BinOpMR_MF<BaseOpc, mnemonic, Xi64, opnode>; + def NAME#8mr : BinOpMR_MF<BaseOpc, mnemonic, Xi8 , opnode>; + def NAME#16mr : BinOpMR_MF<BaseOpc, mnemonic, Xi16, opnode>, OpSize16; + def NAME#32mr : BinOpMR_MF<BaseOpc, mnemonic, Xi32, opnode>, OpSize32; + def NAME#64mr : BinOpMR_MF<BaseOpc, mnemonic, Xi64, opnode>; + let Predicates = [HasNDD, In64BitMode] in { + def NAME#8mr_ND : BinOpMR_RF<BaseOpc, mnemonic, Xi8 , opnode>; + def NAME#16mr_ND : BinOpMR_RF<BaseOpc, mnemonic, Xi16, opnode>, PD; + def NAME#32mr_ND : BinOpMR_RF<BaseOpc, mnemonic, Xi32, opnode>; + def NAME#64mr_ND : BinOpMR_RF<BaseOpc, mnemonic, Xi64, opnode>; + def NAME#8mr_NF_ND : BinOpMR_R<BaseOpc, mnemonic, Xi8>, EVEX_NF; + def NAME#16mr_NF_ND : BinOpMR_R<BaseOpc, mnemonic, Xi16>, EVEX_NF, PD; + def NAME#32mr_NF_ND : BinOpMR_R<BaseOpc, mnemonic, Xi32>, EVEX_NF; + def NAME#64mr_NF_ND : BinOpMR_R<BaseOpc, mnemonic, Xi64>, EVEX_NF; + } + let Predicates = [In64BitMode] in { + def NAME#8mr_NF : BinOpMR_M<BaseOpc, mnemonic, Xi8>, NF; + def NAME#16mr_NF : BinOpMR_M<BaseOpc, mnemonic, Xi16>, NF, PD; + def NAME#32mr_NF : BinOpMR_M<BaseOpc, mnemonic, Xi32>, NF; + def NAME#64mr_NF : BinOpMR_M<BaseOpc, mnemonic, Xi64>, NF; + def NAME#8mr_EVEX : BinOpMR_MF<BaseOpc, mnemonic, Xi8 , null_frag>, PL; + def NAME#16mr_EVEX : BinOpMR_MF<BaseOpc, mnemonic, Xi16, null_frag>, PL, PD; + def NAME#32mr_EVEX : BinOpMR_MF<BaseOpc, mnemonic, Xi32, null_frag>, PL; + def NAME#64mr_EVEX : BinOpMR_MF<BaseOpc, mnemonic, Xi64, null_frag>, PL; + } // NOTE: These are order specific, we want the mi8 forms to be listed // first so that they are slightly preferred to the mi forms. def NAME#16mi8 : BinOpMI8_MF<mnemonic, Xi16, MemMRM>, OpSize16; def NAME#32mi8 : BinOpMI8_MF<mnemonic, Xi32, MemMRM>, OpSize32; let Predicates = [In64BitMode] in - def NAME#64mi8 : BinOpMI8_MF<mnemonic, Xi64, MemMRM>; - + def NAME#64mi8 : BinOpMI8_MF<mnemonic, Xi64, MemMRM>; def NAME#8mi : BinOpMI_MF<0x80, mnemonic, Xi8 , opnode, MemMRM>; def NAME#16mi : BinOpMI_MF<0x81, mnemonic, Xi16, opnode, MemMRM>, OpSize16; def NAME#32mi : BinOpMI_MF<0x81, mnemonic, Xi32, opnode, MemMRM>, OpSize32; let Predicates = [In64BitMode] in - def NAME#64mi32 : BinOpMI_MF<0x81, mnemonic, Xi64, opnode, MemMRM>; + def NAME#64mi32 : BinOpMI_MF<0x81, mnemonic, Xi64, opnode, MemMRM>; + let Predicates = [HasNDD, In64BitMode] in { + def NAME#16mi8_ND : BinOpMI8_RF<mnemonic, Xi16, MemMRM>, PD; + def NAME#32mi8_ND : BinOpMI8_RF<mnemonic, Xi32, MemMRM>; + def NAME#64mi8_ND : BinOpMI8_RF<mnemonic, Xi64, MemMRM>; + def NAME#8mi_ND : BinOpMI_RF<0x80, mnemonic, Xi8 , opnode, MemMRM>; + def NAME#16mi_ND : BinOpMI_RF<0x81, mnemonic, Xi16, opnode, MemMRM>, PD; + def NAME#32mi_ND : BinOpMI_RF<0x81, mnemonic, Xi32, opnode, MemMRM>; + def NAME#64mi32_ND : BinOpMI_RF<0x81, mnemonic, Xi64, opnode, MemMRM>; + def NAME#16mi8_NF_ND : BinOpMI8_R<mnemonic, Xi16, MemMRM>, NF, PD; + def NAME#32mi8_NF_ND : BinOpMI8_R<mnemonic, Xi32, MemMRM>, NF; + def NAME#64mi8_NF_ND : BinOpMI8_R<mnemonic, Xi64, MemMRM>, NF; + def NAME#8mi_NF_ND : BinOpMI_R<0x80, mnemonic, Xi8, MemMRM>, NF; + def NAME#16mi_NF_ND : BinOpMI_R<0x81, mnemonic, Xi16, MemMRM>, NF, PD; + def NAME#32mi_NF_ND : BinOpMI_R<0x81, mnemonic, Xi32, MemMRM>, NF; + def NAME#64mi32_NF_ND : BinOpMI_R<0x81, mnemonic, Xi64, MemMRM>, NF; + } + let Predicates = [In64BitMode] in { + def NAME#16mi8_NF : BinOpMI8_M<mnemonic, Xi16, MemMRM>, NF, PD; + def NAME#32mi8_NF : BinOpMI8_M<mnemonic, Xi32, MemMRM>, NF; + def NAME#64mi8_NF : BinOpMI8_M<mnemonic, Xi64, MemMRM>, NF; + def NAME#8mi_NF : BinOpMI_M<0x80, mnemonic, Xi8, MemMRM>, NF; + def NAME#16mi_NF : BinOpMI_M<0x81, mnemonic, Xi16, MemMRM>, NF, PD; + def NAME#32mi_NF : BinOpMI_M<0x81, mnemonic, Xi32, MemMRM>, NF; + def NAME#64mi32_NF : BinOpMI_M<0x81, mnemonic, Xi64, MemMRM>, NF; + def NAME#16mi8_EVEX : BinOpMI8_MF<mnemonic, Xi16, MemMRM>, PL, PD; + def NAME#32mi8_EVEX : BinOpMI8_MF<mnemonic, Xi32, MemMRM>, PL; + def NAME#64mi8_EVEX : BinOpMI8_MF<mnemonic, Xi64, MemMRM>, PL; + def NAME#8mi_EVEX : BinOpMI_MF<0x80, mnemonic, Xi8 , null_frag, MemMRM>, PL; + def NAME#16mi_EVEX : BinOpMI_MF<0x81, mnemonic, Xi16, null_frag, MemMRM>, PL, PD; + def NAME#32mi_EVEX : BinOpMI_MF<0x81, mnemonic, Xi32, null_frag, MemMRM>, PL; + def NAME#64mi32_EVEX : BinOpMI_MF<0x81, mnemonic, Xi64, null_frag, MemMRM>, PL; + } // These are for the disassembler since 0x82 opcode behaves like 0x80, but // not in 64-bit mode. let Predicates = [Not64BitMode] in { - let Constraints = "$src1 = $dst" in def NAME#8ri8 : BinOpRI8_RF<0x82, mnemonic, Xi8, RegMRM>, DisassembleOnly; def NAME#8mi8 : BinOpMI8_MF<mnemonic, Xi8, MemMRM>, DisassembleOnly; } @@ -719,62 +569,153 @@ multiclass ArithBinOp_RFF<bits<8> BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4, string mnemonic, Format RegMRM, Format MemMRM, SDNode opnode, bit CommutableRR, bit ConvertibleToThreeAddress> { - let Constraints = "$src1 = $dst" in { - let isCommutable = CommutableRR in { - def NAME#8rr : BinOpRRF_RF<BaseOpc, mnemonic, Xi8 , opnode>; + let isCommutable = CommutableRR in { + let Predicates = [NoNDD] in { + def NAME#8rr : BinOpRRF_RF<BaseOpc, mnemonic, Xi8 , opnode>; let isConvertibleToThreeAddress = ConvertibleToThreeAddress in { - def NAME#16rr : BinOpRRF_RF<BaseOpc, mnemonic, Xi16, opnode>, OpSize16; - def NAME#32rr : BinOpRRF_RF<BaseOpc, mnemonic, Xi32, opnode>, OpSize32; - def NAME#64rr : BinOpRRF_RF<BaseOpc, mnemonic, Xi64, opnode>; - } // isConvertibleToThreeAddress + def NAME#16rr : BinOpRRF_RF<BaseOpc, mnemonic, Xi16, opnode>, OpSize16; + def NAME#32rr : BinOpRRF_RF<BaseOpc, mnemonic, Xi32, opnode>, OpSize32; + def NAME#64rr : BinOpRRF_RF<BaseOpc, mnemonic, Xi64, opnode>; + } + } + let Predicates = [HasNDD, In64BitMode] in { + def NAME#8rr_ND : BinOpRRF_RF<BaseOpc, mnemonic, Xi8 , opnode, 1>; + let isConvertibleToThreeAddress = ConvertibleToThreeAddress in { + def NAME#16rr_ND : BinOpRRF_RF<BaseOpc, mnemonic, Xi16, opnode, 1>, PD; + def NAME#32rr_ND : BinOpRRF_RF<BaseOpc, mnemonic, Xi32, opnode, 1>; + def NAME#64rr_ND : BinOpRRF_RF<BaseOpc, mnemonic, Xi64, opnode, 1>; + } + } } // isCommutable + let Predicates = [In64BitMode] in { + def NAME#8rr_EVEX : BinOpRRF_RF<BaseOpc, mnemonic, Xi8 , null_frag>, PL; + def NAME#16rr_EVEX : BinOpRRF_RF<BaseOpc, mnemonic, Xi16, null_frag>, PL, PD; + def NAME#32rr_EVEX : BinOpRRF_RF<BaseOpc, mnemonic, Xi32, null_frag>, PL; + def NAME#64rr_EVEX : BinOpRRF_RF<BaseOpc, mnemonic, Xi64, null_frag>, PL; + } + def NAME#8rr_REV : BinOpRRF_RF_Rev<BaseOpc2, mnemonic, Xi8>; def NAME#16rr_REV : BinOpRRF_RF_Rev<BaseOpc2, mnemonic, Xi16>, OpSize16; def NAME#32rr_REV : BinOpRRF_RF_Rev<BaseOpc2, mnemonic, Xi32>, OpSize32; def NAME#64rr_REV : BinOpRRF_RF_Rev<BaseOpc2, mnemonic, Xi64>; + let Predicates = [In64BitMode] in { + def NAME#8rr_ND_REV : BinOpRRF_RF_Rev<BaseOpc2, mnemonic, Xi8, 1>; + def NAME#16rr_ND_REV : BinOpRRF_RF_Rev<BaseOpc2, mnemonic, Xi16, 1>, PD; + def NAME#32rr_ND_REV : BinOpRRF_RF_Rev<BaseOpc2, mnemonic, Xi32, 1>; + def NAME#64rr_ND_REV : BinOpRRF_RF_Rev<BaseOpc2, mnemonic, Xi64, 1>; + def NAME#8rr_EVEX_REV : BinOpRRF_RF_Rev<BaseOpc2, mnemonic, Xi8>, PL; + def NAME#16rr_EVEX_REV : BinOpRRF_RF_Rev<BaseOpc2, mnemonic, Xi16>, PL, PD; + def NAME#32rr_EVEX_REV : BinOpRRF_RF_Rev<BaseOpc2, mnemonic, Xi32>, PL; + def NAME#64rr_EVEX_REV : BinOpRRF_RF_Rev<BaseOpc2, mnemonic, Xi64>, PL; + } + + let Predicates = [NoNDD] in { + def NAME#8rm : BinOpRMF_RF<BaseOpc2, mnemonic, Xi8 , opnode>; + def NAME#16rm : BinOpRMF_RF<BaseOpc2, mnemonic, Xi16, opnode>, OpSize16; + def NAME#32rm : BinOpRMF_RF<BaseOpc2, mnemonic, Xi32, opnode>, OpSize32; + def NAME#64rm : BinOpRMF_RF<BaseOpc2, mnemonic, Xi64, opnode>; + } + let Predicates = [HasNDD, In64BitMode] in { + def NAME#8rm_ND : BinOpRMF_RF<BaseOpc2, mnemonic, Xi8 , opnode, 1>; + def NAME#16rm_ND : BinOpRMF_RF<BaseOpc2, mnemonic, Xi16, opnode, 1>, PD; + def NAME#32rm_ND : BinOpRMF_RF<BaseOpc2, mnemonic, Xi32, opnode, 1>; + def NAME#64rm_ND : BinOpRMF_RF<BaseOpc2, mnemonic, Xi64, opnode, 1>; + } + let Predicates = [In64BitMode] in { + def NAME#8rm_EVEX : BinOpRMF_RF<BaseOpc2, mnemonic, Xi8 , opnode>, PL; + def NAME#16rm_EVEX : BinOpRMF_RF<BaseOpc2, mnemonic, Xi16, opnode>, PL, PD; + def NAME#32rm_EVEX : BinOpRMF_RF<BaseOpc2, mnemonic, Xi32, opnode>, PL; + def NAME#64rm_EVEX : BinOpRMF_RF<BaseOpc2, mnemonic, Xi64, opnode>, PL; + } - def NAME#8rm : BinOpRMF_RF<BaseOpc2, mnemonic, Xi8 , opnode>; - def NAME#16rm : BinOpRMF_RF<BaseOpc2, mnemonic, Xi16, opnode>, OpSize16; - def NAME#32rm : BinOpRMF_RF<BaseOpc2, mnemonic, Xi32, opnode>, OpSize32; - def NAME#64rm : BinOpRMF_RF<BaseOpc2, mnemonic, Xi64, opnode>; + let Predicates = [NoNDD] in { + def NAME#8ri : BinOpRIF_RF<0x80, mnemonic, Xi8 , opnode, RegMRM>; + let isConvertibleToThreeAddress = ConvertibleToThreeAddress in { + // NOTE: These are order specific, we want the ri8 forms to be listed + // first so that they are slightly preferred to the ri forms. + def NAME#16ri8 : BinOpRI8F_RF<0x83, mnemonic, Xi16, RegMRM>, OpSize16; + def NAME#32ri8 : BinOpRI8F_RF<0x83, mnemonic, Xi32, RegMRM>, OpSize32; + def NAME#64ri8 : BinOpRI8F_RF<0x83, mnemonic, Xi64, RegMRM>; + + def NAME#16ri : BinOpRIF_RF<0x81, mnemonic, Xi16, opnode, RegMRM>, OpSize16; + def NAME#32ri : BinOpRIF_RF<0x81, mnemonic, Xi32, opnode, RegMRM>, OpSize32; + def NAME#64ri32: BinOpRIF_RF<0x81, mnemonic, Xi64, opnode, RegMRM>; + } + } - def NAME#8ri : BinOpRIF_RF<0x80, mnemonic, Xi8 , opnode, RegMRM>; + let Predicates = [HasNDD, In64BitMode] in { + def NAME#8ri_ND : BinOpRIF_RF<0x80, mnemonic, Xi8 , opnode, RegMRM, 1>; let isConvertibleToThreeAddress = ConvertibleToThreeAddress in { - // NOTE: These are order specific, we want the ri8 forms to be listed - // first so that they are slightly preferred to the ri forms. - def NAME#16ri8 : BinOpRI8F_RF<0x83, mnemonic, Xi16, RegMRM>, OpSize16; - def NAME#32ri8 : BinOpRI8F_RF<0x83, mnemonic, Xi32, RegMRM>, OpSize32; - def NAME#64ri8 : BinOpRI8F_RF<0x83, mnemonic, Xi64, RegMRM>; - - def NAME#16ri : BinOpRIF_RF<0x81, mnemonic, Xi16, opnode, RegMRM>, OpSize16; - def NAME#32ri : BinOpRIF_RF<0x81, mnemonic, Xi32, opnode, RegMRM>, OpSize32; - def NAME#64ri32: BinOpRIF_RF<0x81, mnemonic, Xi64, opnode, RegMRM>; + def NAME#16ri8_ND : BinOpRI8F_RF<0x83, mnemonic, Xi16, RegMRM, 1>, PD; + def NAME#32ri8_ND : BinOpRI8F_RF<0x83, mnemonic, Xi32, RegMRM, 1>; + def NAME#64ri8_ND : BinOpRI8F_RF<0x83, mnemonic, Xi64, RegMRM, 1>; + def NAME#16ri_ND : BinOpRIF_RF<0x81, mnemonic, Xi16, opnode, RegMRM, 1>, PD; + def NAME#32ri_ND : BinOpRIF_RF<0x81, mnemonic, Xi32, opnode, RegMRM, 1>; + def NAME#64ri32_ND: BinOpRIF_RF<0x81, mnemonic, Xi64, opnode, RegMRM, 1>; } - } // Constraints = "$src1 = $dst" + } + let Predicates = [In64BitMode] in { + def NAME#8ri_EVEX : BinOpRIF_RF<0x80, mnemonic, Xi8 , opnode, RegMRM>, PL; + def NAME#16ri8_EVEX : BinOpRI8F_RF<0x83, mnemonic, Xi16, RegMRM>, PL, PD; + def NAME#32ri8_EVEX : BinOpRI8F_RF<0x83, mnemonic, Xi32, RegMRM>, PL; + def NAME#64ri8_EVEX : BinOpRI8F_RF<0x83, mnemonic, Xi64, RegMRM>, PL; + def NAME#16ri_EVEX : BinOpRIF_RF<0x81, mnemonic, Xi16, opnode, RegMRM>, PL, PD; + def NAME#32ri_EVEX : BinOpRIF_RF<0x81, mnemonic, Xi32, opnode, RegMRM>, PL; + def NAME#64ri32_EVEX: BinOpRIF_RF<0x81, mnemonic, Xi64, opnode, RegMRM>, PL; + } def NAME#8mr : BinOpMRF_MF<BaseOpc, mnemonic, Xi8 , opnode>; def NAME#16mr : BinOpMRF_MF<BaseOpc, mnemonic, Xi16, opnode>, OpSize16; def NAME#32mr : BinOpMRF_MF<BaseOpc, mnemonic, Xi32, opnode>, OpSize32; def NAME#64mr : BinOpMRF_MF<BaseOpc, mnemonic, Xi64, opnode>; + let Predicates = [HasNDD, In64BitMode] in { + def NAME#8mr_ND : BinOpMRF_RF<BaseOpc, mnemonic, Xi8 , opnode>; + def NAME#16mr_ND : BinOpMRF_RF<BaseOpc, mnemonic, Xi16, opnode>, PD; + def NAME#32mr_ND : BinOpMRF_RF<BaseOpc, mnemonic, Xi32, opnode>; + def NAME#64mr_ND : BinOpMRF_RF<BaseOpc, mnemonic, Xi64, opnode>; + } + let Predicates = [In64BitMode] in { + def NAME#8mr_EVEX : BinOpMRF_MF<BaseOpc, mnemonic, Xi8 , null_frag>, PL; + def NAME#16mr_EVEX : BinOpMRF_MF<BaseOpc, mnemonic, Xi16, null_frag>, PL, PD; + def NAME#32mr_EVEX : BinOpMRF_MF<BaseOpc, mnemonic, Xi32, null_frag>, PL; + def NAME#64mr_EVEX : BinOpMRF_MF<BaseOpc, mnemonic, Xi64, null_frag>, PL; + } // NOTE: These are order specific, we want the mi8 forms to be listed // first so that they are slightly preferred to the mi forms. + def NAME#8mi : BinOpMIF_MF<0x80, mnemonic, Xi8 , opnode, MemMRM>; def NAME#16mi8 : BinOpMI8F_MF<mnemonic, Xi16, MemMRM>, OpSize16; def NAME#32mi8 : BinOpMI8F_MF<mnemonic, Xi32, MemMRM>, OpSize32; let Predicates = [In64BitMode] in - def NAME#64mi8 : BinOpMI8F_MF<mnemonic, Xi64, MemMRM>; - - def NAME#8mi : BinOpMIF_MF<0x80, mnemonic, Xi8 , opnode, MemMRM>; + def NAME#64mi8 : BinOpMI8F_MF<mnemonic, Xi64, MemMRM>; def NAME#16mi : BinOpMIF_MF<0x81, mnemonic, Xi16, opnode, MemMRM>, OpSize16; def NAME#32mi : BinOpMIF_MF<0x81, mnemonic, Xi32, opnode, MemMRM>, OpSize32; let Predicates = [In64BitMode] in - def NAME#64mi32 : BinOpMIF_MF<0x81, mnemonic, Xi64, opnode, MemMRM>; + def NAME#64mi32 : BinOpMIF_MF<0x81, mnemonic, Xi64, opnode, MemMRM>; + + let Predicates = [HasNDD, In64BitMode] in { + def NAME#8mi_ND : BinOpMIF_RF<0x80, mnemonic, Xi8 , opnode, MemMRM>; + def NAME#16mi8_ND : BinOpMI8F_RF<mnemonic, Xi16, MemMRM>, PD; + def NAME#32mi8_ND : BinOpMI8F_RF<mnemonic, Xi32, MemMRM>; + def NAME#64mi8_ND : BinOpMI8F_RF<mnemonic, Xi64, MemMRM>; + def NAME#16mi_ND : BinOpMIF_RF<0x81, mnemonic, Xi16, opnode, MemMRM>, PD; + def NAME#32mi_ND : BinOpMIF_RF<0x81, mnemonic, Xi32, opnode, MemMRM>; + def NAME#64mi32_ND : BinOpMIF_RF<0x81, mnemonic, Xi64, opnode, MemMRM>; + } + let Predicates = [In64BitMode] in { + def NAME#8mi_EVEX : BinOpMIF_MF<0x80, mnemonic, Xi8 , opnode, MemMRM>, PL; + def NAME#16mi8_EVEX : BinOpMI8F_MF<mnemonic, Xi16, MemMRM>, PL, PD; + def NAME#32mi8_EVEX : BinOpMI8F_MF<mnemonic, Xi32, MemMRM>, PL; + def NAME#64mi8_EVEX : BinOpMI8F_MF<mnemonic, Xi64, MemMRM>, PL; + def NAME#16mi_EVEX : BinOpMIF_MF<0x81, mnemonic, Xi16, opnode, MemMRM>, PL, PD; + def NAME#32mi_EVEX : BinOpMIF_MF<0x81, mnemonic, Xi32, opnode, MemMRM>, PL; + def NAME#64mi32_EVEX : BinOpMIF_MF<0x81, mnemonic, Xi64, opnode, MemMRM>, PL; + } // These are for the disassembler since 0x82 opcode behaves like 0x80, but // not in 64-bit mode. let Predicates = [Not64BitMode] in { - let Constraints = "$src1 = $dst" in def NAME#8ri8 : BinOpRI8F_RF<0x82, mnemonic, Xi8, RegMRM>, DisassembleOnly; def NAME#8mi8 : BinOpMI8F_MF<mnemonic, Xi8, MemMRM>, DisassembleOnly; } @@ -1089,36 +1030,30 @@ def : Pat<(X86testpat (loadi64 addr:$src1), i64relocImmSExt32_su:$src2), //===----------------------------------------------------------------------===// // ANDN Instruction // -multiclass bmi_andn<string mnemonic, RegisterClass RC, X86MemOperand x86memop, - PatFrag ld_frag, X86FoldableSchedWrite sched> { -let Predicates = [HasBMI, NoEGPR] in { - def rr : I<0xF2, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), - !strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(set RC:$dst, EFLAGS, (X86and_flag (not RC:$src1), RC:$src2))]>, - VEX, VVVV, Sched<[sched]>; - def rm : I<0xF2, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), - !strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(set RC:$dst, EFLAGS, - (X86and_flag (not RC:$src1), (ld_frag addr:$src2)))]>, - VEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold]>; -} -let Predicates = [HasBMI, HasEGPR, In64BitMode] in { - def rr_EVEX : I<0xF2, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), - !strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(set RC:$dst, EFLAGS, (X86and_flag (not RC:$src1), RC:$src2))]>, - EVEX, VVVV, Sched<[sched]>; - def rm_EVEX : I<0xF2, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), - !strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(set RC:$dst, EFLAGS, - (X86and_flag (not RC:$src1), (ld_frag addr:$src2)))]>, - EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold]>; -} +multiclass AndN<X86TypeInfo t, string suffix> { + defvar andn_rr_p = + [(set t.RegClass:$dst, EFLAGS, (X86and_flag (not t.RegClass:$src1), + t.RegClass:$src2))]; + defvar andn_rm_p = + [(set t.RegClass:$dst, EFLAGS, (X86and_flag (not t.RegClass:$src1), + (t.LoadNode addr:$src2)))]; + def rr#suffix : ITy<0xF2, MRMSrcReg, t, (outs t.RegClass:$dst), + (ins t.RegClass:$src1, t.RegClass:$src2), "andn", + binop_ndd_args, andn_rr_p>, VVVV, Sched<[WriteALU]>, + T8, DefEFLAGS; + def rm#suffix : ITy<0xF2, MRMSrcMem, t, (outs t.RegClass:$dst), + (ins t.RegClass:$src1, t.MemOperand:$src2), "andn", + binop_ndd_args, andn_rm_p>, VVVV, + Sched<[WriteALU.Folded, WriteALU.ReadAfterFold]>, + T8, DefEFLAGS; } // Complexity is reduced to give and with immediate a chance to match first. -let Defs = [EFLAGS], AddedComplexity = -6 in { - defm ANDN32 : bmi_andn<"andn{l}", GR32, i32mem, loadi32, WriteALU>, T8; - defm ANDN64 : bmi_andn<"andn{q}", GR64, i64mem, loadi64, WriteALU>, T8, REX_W; +let AddedComplexity = -6 in { +defm ANDN32 : AndN<Xi32, "">, VEX, Requires<[HasBMI, NoEGPR]>; +defm ANDN64 : AndN<Xi64, "">, VEX, REX_W, Requires<[HasBMI, NoEGPR]>; +defm ANDN32 : AndN<Xi32, "_EVEX">, EVEX, Requires<[HasBMI, HasEGPR, In64BitMode]>; +defm ANDN64 : AndN<Xi64, "_EVEX">, EVEX, REX_W, Requires<[HasBMI, HasEGPR, In64BitMode]>; } let Predicates = [HasBMI], AddedComplexity = -6 in { @@ -1135,78 +1070,63 @@ let Predicates = [HasBMI], AddedComplexity = -6 in { //===----------------------------------------------------------------------===// // MULX Instruction // -multiclass bmi_mulx<string mnemonic, RegisterClass RC, X86MemOperand x86memop, - X86FoldableSchedWrite sched> { -let hasSideEffects = 0 in { -let Predicates = [HasBMI2, NoEGPR] in { - def rr : I<0xF6, MRMSrcReg, (outs RC:$dst1, RC:$dst2), (ins RC:$src), - !strconcat(mnemonic, "\t{$src, $dst2, $dst1|$dst1, $dst2, $src}"), - []>, T8, XD, VEX, VVVV, Sched<[WriteIMulH, sched]>; - +multiclass MulX<X86TypeInfo t, X86FoldableSchedWrite sched> { + defvar mulx_args = "{$src, $dst2, $dst1|$dst1, $dst2, $src}"; + defvar mulx_rm_sched = + [WriteIMulHLd, sched.Folded, + // Memory operand. + ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault, + // Implicit read of EDX/RDX + sched.ReadAfterFold]; + + def rr : ITy<0xF6, MRMSrcReg, t, (outs t.RegClass:$dst1, t.RegClass:$dst2), + (ins t.RegClass:$src), "mulx", mulx_args, []>, T8, XD, VEX, + VVVV, Sched<[WriteIMulH, sched]>; let mayLoad = 1 in - def rm : I<0xF6, MRMSrcMem, (outs RC:$dst1, RC:$dst2), (ins x86memop:$src), - !strconcat(mnemonic, "\t{$src, $dst2, $dst1|$dst1, $dst2, $src}"), - []>, T8, XD, VEX, VVVV, - Sched<[WriteIMulHLd, sched.Folded, - // Memory operand. - ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault, - // Implicit read of EDX/RDX - sched.ReadAfterFold]>; - + def rm : ITy<0xF6, MRMSrcMem, t, (outs t.RegClass:$dst1, t.RegClass:$dst2), + (ins t.MemOperand:$src), "mulx", mulx_args, []>, T8, XD, VEX, + VVVV, Sched<mulx_rm_sched>; + + let Predicates = [In64BitMode] in { + def rr_EVEX : ITy<0xF6, MRMSrcReg, t, + (outs t.RegClass:$dst1, t.RegClass:$dst2), + (ins t.RegClass:$src), "mulx", mulx_args, []>, T8, XD, + EVEX, VVVV, Sched<[WriteIMulH, sched]>; + let mayLoad = 1 in + def rm_EVEX : ITy<0xF6, MRMSrcMem, t, + (outs t.RegClass:$dst1, t.RegClass:$dst2), + (ins t.MemOperand:$src), "mulx", mulx_args, []>, T8, XD, + EVEX, VVVV, Sched<mulx_rm_sched>; + } // Pseudo instructions to be used when the low result isn't used. The // instruction is defined to keep the high if both destinations are the same. - def Hrr : PseudoI<(outs RC:$dst), (ins RC:$src), - []>, Sched<[sched]>; - + def Hrr : PseudoI<(outs t.RegClass:$dst), (ins t.RegClass:$src), []>, + Sched<[sched]>; let mayLoad = 1 in - def Hrm : PseudoI<(outs RC:$dst), (ins x86memop:$src), - []>, Sched<[sched.Folded]>; -} -let Predicates = [HasBMI2, HasEGPR, In64BitMode] in - def rr#_EVEX : I<0xF6, MRMSrcReg, (outs RC:$dst1, RC:$dst2), (ins RC:$src), - !strconcat(mnemonic, "\t{$src, $dst2, $dst1|$dst1, $dst2, $src}"), - []>, T8, XD, EVEX, VVVV, Sched<[WriteIMulH, sched]>; -let Predicates = [HasBMI2, HasEGPR, In64BitMode], mayLoad = 1 in - def rm#_EVEX : I<0xF6, MRMSrcMem, (outs RC:$dst1, RC:$dst2), (ins x86memop:$src), - !strconcat(mnemonic, "\t{$src, $dst2, $dst1|$dst1, $dst2, $src}"), - []>, T8, XD, EVEX, VVVV, - Sched<[WriteIMulHLd, sched.Folded, - // Memory operand. - ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault, - // Implicit read of EDX/RDX - sched.ReadAfterFold]>; -} + def Hrm : PseudoI<(outs t.RegClass:$dst), (ins t.MemOperand:$src), []>, + Sched<[sched.Folded]>; } let Uses = [EDX] in - defm MULX32 : bmi_mulx<"mulx{l}", GR32, i32mem, WriteMULX32>; +defm MULX32 : MulX<Xi32, WriteMULX32>; + let Uses = [RDX] in - defm MULX64 : bmi_mulx<"mulx{q}", GR64, i64mem, WriteMULX64>, REX_W; +defm MULX64 : MulX<Xi64, WriteMULX64>, REX_W; //===----------------------------------------------------------------------===// // ADCX and ADOX Instructions // // We don't have patterns for these as there is no advantage over ADC for // most code. -class ADCOXOpRR <string m, X86TypeInfo t> - : BinOpRRF_RF<0xF6, m, t, null_frag> { - let Form = MRMSrcReg; - let isCommutable = 1; +let Form = MRMSrcReg in { +def ADCX32rr : BinOpRRF_RF<0xF6, "adcx", Xi32, null_frag>, T8, PD; +def ADCX64rr : BinOpRRF_RF<0xF6, "adcx", Xi64, null_frag>, T8, PD; +def ADOX32rr : BinOpRRF_RF<0xF6, "adox", Xi32, null_frag>, T8, XS; +def ADOX64rr : BinOpRRF_RF<0xF6, "adox", Xi64, null_frag>, T8, XS; } - -class ADCOXOpRM <string m, X86TypeInfo t> - : BinOpRMF_RF<0xF6, m, t, null_frag> { - let Form = MRMSrcMem; -} - -let OpSize = OpSizeFixed, Constraints = "$src1 = $dst", - Predicates = [HasADX] in { -def ADCX32rr : ADCOXOpRR<"adcx", Xi32>, T8, PD; -def ADCX64rr : ADCOXOpRR<"adcx", Xi64>, T8, PD; -def ADOX32rr : ADCOXOpRR<"adox", Xi32>, T8, XS; -def ADOX64rr : ADCOXOpRR<"adox", Xi64>, T8, XS; -def ADCX32rm : ADCOXOpRM<"adcx", Xi32>, T8, PD; -def ADCX64rm : ADCOXOpRM<"adcx", Xi64>, T8, PD; -def ADOX32rm : ADCOXOpRM<"adox", Xi32>, T8, XS; -def ADOX64rm : ADCOXOpRM<"adox", Xi64>, T8, XS; +let Form = MRMSrcMem in { +def ADCX32rm : BinOpRMF_RF<0xF6, "adcx", Xi32, null_frag>, T8, PD; +def ADCX64rm : BinOpRMF_RF<0xF6, "adcx", Xi64, null_frag>, T8, PD; +def ADOX32rm : BinOpRMF_RF<0xF6, "adox", Xi32, null_frag>, T8, XS; +def ADOX64rm : BinOpRMF_RF<0xF6, "adox", Xi64, null_frag>, T8, XS; } diff --git a/llvm/lib/Target/X86/X86InstrFormats.td b/llvm/lib/Target/X86/X86InstrFormats.td index 07e5576960d6..6e76b44b66a3 100644 --- a/llvm/lib/Target/X86/X86InstrFormats.td +++ b/llvm/lib/Target/X86/X86InstrFormats.td @@ -256,6 +256,7 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins, bit hasEVEX_Z = 0; // Does this inst set the EVEX_Z field? bit hasEVEX_L2 = 0; // Does this inst set the EVEX_L2 field? bit hasEVEX_B = 0; // Does this inst set the EVEX_B field? + bit hasEVEX_NF = 0; // Does this inst set the EVEX_NF field? bits<3> CD8_Form = 0; // Compressed disp8 form - vector-width. // Declare it int rather than bits<4> so that all bits are defined when // assigning to bits<7>. @@ -309,4 +310,5 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins, let TSFlags{48} = hasEVEX_RC; let TSFlags{49} = hasNoTrackPrefix; let TSFlags{51-50} = explicitOpPrefixBits; + let TSFlags{52} = hasEVEX_NF; } diff --git a/llvm/lib/Target/X86/X86InstrPredicates.td b/llvm/lib/Target/X86/X86InstrPredicates.td index 8653f15d8602..94fa6e45ded9 100644 --- a/llvm/lib/Target/X86/X86InstrPredicates.td +++ b/llvm/lib/Target/X86/X86InstrPredicates.td @@ -10,6 +10,8 @@ def TruePredicate : Predicate<"true">; def HasEGPR : Predicate<"Subtarget->hasEGPR()">; def NoEGPR : Predicate<"!Subtarget->hasEGPR()">; +def HasNDD : Predicate<"Subtarget->hasNDD()">; +def NoNDD : Predicate<"!Subtarget->hasNDD()">; def HasCMOV : Predicate<"Subtarget->canUseCMOV()">; def NoCMOV : Predicate<"!Subtarget->canUseCMOV()">; def HasNOPL : Predicate<"Subtarget->hasNOPL()">; @@ -100,7 +102,6 @@ def HasIFMA : Predicate<"Subtarget->hasIFMA()">; def HasAVXIFMA : Predicate<"Subtarget->hasAVXIFMA()">; def NoVLX_Or_NoIFMA : Predicate<"!Subtarget->hasVLX() || !Subtarget->hasIFMA()">; def HasRTM : Predicate<"Subtarget->hasRTM()">; -def HasADX : Predicate<"Subtarget->hasADX()">; def HasSHA : Predicate<"Subtarget->hasSHA()">; def HasSHA512 : Predicate<"Subtarget->hasSHA512()">; def HasSGX : Predicate<"Subtarget->hasSGX()">; diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index df1f0b5b4ca7..e8a1a2b83886 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -6655,49 +6655,51 @@ let Defs = [ECX, EFLAGS], Uses = [EAX, EDX], hasSideEffects = 0 in { // SSE4.2 - CRC Instructions //===----------------------------------------------------------------------===// +// NOTE: 'HasCRC32' is used as CRC32 instructions are GPR only and not directly +// controlled by the SSE42 flag. +// // No CRC instructions have AVX equivalents -// crc intrinsic instruction -// This set of instructions are only rm, the only difference is the size -// of r and m. -class SS42I_crc32r<bits<8> opc, string asm, RegisterClass RCOut, - RegisterClass RCIn, SDPatternOperator Int> : - CRC32I<opc, MRMSrcReg, (outs RCOut:$dst), (ins RCOut:$src1, RCIn:$src2), - !strconcat(asm, "\t{$src2, $src1|$src1, $src2}"), - [(set RCOut:$dst, (Int RCOut:$src1, RCIn:$src2))]>, - Sched<[WriteCRC32]>; - -class SS42I_crc32m<bits<8> opc, string asm, RegisterClass RCOut, - X86MemOperand x86memop, SDPatternOperator Int> : - CRC32I<opc, MRMSrcMem, (outs RCOut:$dst), (ins RCOut:$src1, x86memop:$src2), - !strconcat(asm, "\t{$src2, $src1|$src1, $src2}"), - [(set RCOut:$dst, (Int RCOut:$src1, (load addr:$src2)))]>, - Sched<[WriteCRC32.Folded, WriteCRC32.ReadAfterFold]>; - -let Constraints = "$src1 = $dst" in { - def CRC32r32m8 : SS42I_crc32m<0xF0, "crc32{b}", GR32, i8mem, - int_x86_sse42_crc32_32_8>; - def CRC32r32r8 : SS42I_crc32r<0xF0, "crc32{b}", GR32, GR8, - int_x86_sse42_crc32_32_8>; - def CRC32r32m16 : SS42I_crc32m<0xF1, "crc32{w}", GR32, i16mem, - int_x86_sse42_crc32_32_16>, OpSize16; - def CRC32r32r16 : SS42I_crc32r<0xF1, "crc32{w}", GR32, GR16, - int_x86_sse42_crc32_32_16>, OpSize16; - def CRC32r32m32 : SS42I_crc32m<0xF1, "crc32{l}", GR32, i32mem, - int_x86_sse42_crc32_32_32>, OpSize32; - def CRC32r32r32 : SS42I_crc32r<0xF1, "crc32{l}", GR32, GR32, - int_x86_sse42_crc32_32_32>, OpSize32; - def CRC32r64m64 : SS42I_crc32m<0xF1, "crc32{q}", GR64, i64mem, - int_x86_sse42_crc32_64_64>, REX_W; - def CRC32r64r64 : SS42I_crc32r<0xF1, "crc32{q}", GR64, GR64, - int_x86_sse42_crc32_64_64>, REX_W; - let hasSideEffects = 0 in { - let mayLoad = 1 in - def CRC32r64m8 : SS42I_crc32m<0xF0, "crc32{b}", GR64, i8mem, - null_frag>, REX_W; - def CRC32r64r8 : SS42I_crc32r<0xF0, "crc32{b}", GR64, GR8, - null_frag>, REX_W; - } +class Crc32r<X86TypeInfo t, RegisterClass rc, SDPatternOperator node> + : ITy<0xF1, MRMSrcReg, t, (outs rc:$dst), (ins rc:$src1, t.RegClass:$src2), + "crc32", binop_args, [(set rc:$dst, (node rc:$src1, t.RegClass:$src2))]>, + Sched<[WriteCRC32]>, NoCD8 { + let Constraints = "$src1 = $dst"; +} + +class Crc32m<X86TypeInfo t, RegisterClass rc, SDPatternOperator node> + : ITy<0xF1, MRMSrcMem, t, (outs rc:$dst), (ins rc:$src1, t.MemOperand:$src2), + "crc32", binop_args, [(set rc:$dst, (node rc:$src1, (load addr:$src2)))]>, + Sched<[WriteCRC32.Folded, WriteCRC32.ReadAfterFold]>, NoCD8 { + let Constraints = "$src1 = $dst"; +} + +let Predicates = [HasCRC32, NoEGPR], OpMap = T8, OpPrefix = XD in { + def CRC32r32r8 : Crc32r<Xi8, GR32, int_x86_sse42_crc32_32_8>; + def CRC32r32m8 : Crc32m<Xi8, GR32, int_x86_sse42_crc32_32_8>; + def CRC32r32r16 : Crc32r<Xi16, GR32, int_x86_sse42_crc32_32_16>, OpSize16; + def CRC32r32m16 : Crc32m<Xi16, GR32, int_x86_sse42_crc32_32_16>, OpSize16; + def CRC32r32r32 : Crc32r<Xi32, GR32, int_x86_sse42_crc32_32_32>, OpSize32; + def CRC32r32m32 : Crc32m<Xi32, GR32, int_x86_sse42_crc32_32_32>, OpSize32; + def CRC32r64r64 : Crc32r<Xi64, GR64, int_x86_sse42_crc32_64_64>; + def CRC32r64m64 : Crc32m<Xi64, GR64, int_x86_sse42_crc32_64_64>; + def CRC32r64r8 : Crc32r<Xi8, GR64, null_frag>, REX_W; + let mayLoad = 1 in + def CRC32r64m8 : Crc32m<Xi8, GR64, null_frag>, REX_W; +} + +let Predicates = [HasCRC32, HasEGPR, In64BitMode], OpMap = T_MAP4, OpEnc = EncEVEX in { + def CRC32r32r8_EVEX : Crc32r<Xi8, GR32, int_x86_sse42_crc32_32_8>; + def CRC32r32m8_EVEX : Crc32m<Xi8, GR32, int_x86_sse42_crc32_32_8>; + def CRC32r32r16_EVEX : Crc32r<Xi16, GR32, int_x86_sse42_crc32_32_16>, PD; + def CRC32r32m16_EVEX : Crc32m<Xi16, GR32, int_x86_sse42_crc32_32_16>, PD; + def CRC32r32r32_EVEX : Crc32r<Xi32, GR32, int_x86_sse42_crc32_32_32>; + def CRC32r32m32_EVEX : Crc32m<Xi32, GR32, int_x86_sse42_crc32_32_32>; + def CRC32r64r64_EVEX : Crc32r<Xi64, GR64, int_x86_sse42_crc32_64_64>; + def CRC32r64m64_EVEX : Crc32m<Xi64, GR64, int_x86_sse42_crc32_64_64>; + def CRC32r64r8_EVEX : Crc32r<Xi8, GR64, null_frag>, REX_W; + let mayLoad = 1 in + def CRC32r64m8_EVEX : Crc32m<Xi8, GR64, null_frag>, REX_W; } //===----------------------------------------------------------------------===// @@ -7160,6 +7162,10 @@ def : Pat<(v32i8 (X86SubVBroadcastld128 addr:$src)), (VBROADCASTF128rm addr:$src)>; } +let Predicates = [HasAVXNECONVERT, NoVLX] in + def : Pat<(v16bf16 (X86SubVBroadcastld128 addr:$src)), + (VBROADCASTF128rm addr:$src)>; + //===----------------------------------------------------------------------===// // VPERM2F128 - Permute Floating-Point Values in 128-bit chunks // @@ -7905,6 +7911,9 @@ let Predicates = [HasAVX2, NoVLX] in { defm : vinsert_lowering<"VINSERTI128", "VPERM2I128", v16i8, v32i8, loadv16i8, loadv32i8>; } +let Predicates = [HasAVXNECONVERT, NoVLX] in + defm : vinsert_lowering<"VINSERTI128", "VPERM2I128", v8bf16, v16bf16, loadv8bf16, loadv16bf16>; + //===----------------------------------------------------------------------===// // VEXTRACTI128 - Extract packed integer values // @@ -7927,6 +7936,9 @@ let Predicates = [HasAVX2, NoVLX] in { defm : vextract_lowering<"VEXTRACTI128", v32i8, v16i8>; } +let Predicates = [HasAVXNECONVERT, NoVLX] in + defm : vextract_lowering<"VEXTRACTI128", v16bf16, v8bf16>; + //===----------------------------------------------------------------------===// // VPMASKMOV - Conditional SIMD Integer Packed Loads and Stores // diff --git a/llvm/lib/Target/X86/X86InstrSystem.td b/llvm/lib/Target/X86/X86InstrSystem.td index efb58c6102dd..699e5847e63f 100644 --- a/llvm/lib/Target/X86/X86InstrSystem.td +++ b/llvm/lib/Target/X86/X86InstrSystem.td @@ -446,11 +446,11 @@ let Predicates = [HasUSERMSR], mayLoad = 1 in { } let Predicates = [HasUSERMSR], mayStore = 1 in { def UWRMSRrr : I<0xf8, MRMSrcReg, (outs), (ins GR64:$src1, GR64:$src2), - "uwrmsr\t{$src1, $src2|$src2, $src1}", + "uwrmsr\t{$src2, $src1|$src1, $src2}", [(int_x86_uwrmsr GR64:$src1, GR64:$src2)]>, T8, XS; def UWRMSRir : Ii32<0xf8, MRM0r, (outs), (ins GR64:$src, i64i32imm:$imm), "uwrmsr\t{$src, $imm|$imm, $src}", - [(int_x86_uwrmsr GR64:$src, i64immSExt32_su:$imm)]>, T_MAP7, XS, VEX; + [(int_x86_uwrmsr i64immSExt32_su:$imm, GR64:$src)]>, T_MAP7, XS, VEX; } let Defs = [RAX, RDX], Uses = [ECX] in def RDPMC : I<0x33, RawFrm, (outs), (ins), "rdpmc", []>, TB; diff --git a/llvm/lib/Target/X86/X86InstrUtils.td b/llvm/lib/Target/X86/X86InstrUtils.td index 9499753143d9..da85922a018d 100644 --- a/llvm/lib/Target/X86/X86InstrUtils.td +++ b/llvm/lib/Target/X86/X86InstrUtils.td @@ -39,17 +39,19 @@ class PS { Prefix OpPrefix = PS; } class PD { Prefix OpPrefix = PD; } class XD { Prefix OpPrefix = XD; } class XS { Prefix OpPrefix = XS; } -class VEX { Encoding OpEnc = EncVEX; } +class XOP { Encoding OpEnc = EncXOP; } +class VEX { Encoding OpEnc = EncVEX; } +class EVEX { Encoding OpEnc = EncEVEX; } class WIG { bit IgnoresW = 1; } // Special version of REX_W that can be changed to VEX.W==0 for EVEX2VEX. class VEX_W1X { bit hasREX_W = 1; bit EVEX_W1_VEX_W0 = 1; } class VEX_L { bit hasVEX_L = 1; } class VEX_LIG { bit ignoresVEX_L = 1; } class VVVV { bit hasVEX_4V = 1; } -class EVEX { Encoding OpEnc = EncEVEX; } class EVEX_K { bit hasEVEX_K = 1; } class EVEX_KZ : EVEX_K { bit hasEVEX_Z = 1; } class EVEX_B { bit hasEVEX_B = 1; } +class EVEX_NF { bit hasEVEX_NF = 1; } class EVEX_RC { bit hasEVEX_RC = 1; } class EVEX_V512 { bit hasEVEX_L2 = 1; bit hasVEX_L = 0; } class EVEX_V256 { bit hasEVEX_L2 = 0; bit hasVEX_L = 1; } @@ -63,7 +65,7 @@ class EVEX_CD8<int esize, CD8VForm form> { bits<3> CD8_Form = form.Value; } class NoCD8 { bits<7> CD8_Scale = 0; } -class XOP { Encoding OpEnc = EncXOP; } + class EVEX2VEXOverride<string VEXInstrName> { string EVEX2VEXOverride = VEXInstrName; } @@ -99,16 +101,24 @@ class DisassembleOnly { bit ForceDisassemble = 1; } - -// SchedModel info for instruction that loads one value and gets the second -// (and possibly third) value from a register. -// This is used for instructions that put the memory operands before other -// uses. -class SchedLoadReg<X86FoldableSchedWrite Sched> : Sched<[Sched.Folded, - // Memory operand. - ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault, - // Register reads (implicit or explicit). - Sched.ReadAfterFold, Sched.ReadAfterFold]>; +defvar unaryop_args = "$src1"; +defvar unaryop_ndd_args = "{$src1, $dst|$dst, $src1}"; +defvar binop_args = "{$src2, $src1|$src1, $src2}"; +defvar binop_ndd_args = "{$src2, $src1, $dst|$dst, $src1, $src2}"; +defvar tie_dst_src1 = "$src1 = $dst"; + +// NDD - Helper for new data destination instructions +class NDD<bit ndd> { + string Constraints = !if(!eq(ndd, 0), tie_dst_src1, ""); + Encoding OpEnc = !if(!eq(ndd, 0), EncNormal, EncEVEX); + bit hasEVEX_B = ndd; + bit hasVEX_4V = ndd; + Map OpMap = !if(!eq(ndd, 0), OB, T_MAP4); +} +// NF - Helper for NF (no flags update) instructions +class NF: T_MAP4, EVEX, EVEX_NF, NoCD8; +// PL - Helper for promoted legacy instructions +class PL: T_MAP4, EVEX, NoCD8, ExplicitEVEXPrefix; //===----------------------------------------------------------------------===// // X86 Type infomation definitions @@ -723,13 +733,6 @@ class SS42AI<bits<8> o, Format F, dag outs, dag ins, string asm, : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA, PD, Requires<[UseSSE42]>; -// CRC32I - SSE 4.2 CRC32 instructions. -// NOTE: 'HasCRC32' is used as CRC32 instructions are GPR only and not directly -// controlled by the SSE42 flag. -class CRC32I<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern> - : I<o, F, outs, ins, asm, pattern>, T8, XD, Requires<[HasCRC32]>; - // AVX Instruction Templates: // Instructions introduced in AVX (no SSE equivalent forms) // @@ -957,15 +960,380 @@ class MMXIi8<bits<8> o, Format F, dag outs, dag ins, string asm, /// 2. Infers whether the instruction should have a 0x40 REX_W prefix. /// 3. Infers whether the low bit of the opcode should be 0 (for i8 operations) /// or 1 (for i16,i32,i64 operations). -class ITy<bits<8> opcode, Format f, X86TypeInfo typeinfo, dag outs, dag ins, - string mnemonic, string args, list<dag> pattern> - : I<{opcode{7}, opcode{6}, opcode{5}, opcode{4}, - opcode{3}, opcode{2}, opcode{1}, - !if(!eq(typeinfo.HasEvenOpcode, 1), 0, opcode{0})}, f, outs, ins, - !strconcat(mnemonic, "{", typeinfo.InstrSuffix, "}\t", args), pattern> { - +class ITy<bits<8> o, Format f, X86TypeInfo t, dag outs, dag ins, string m, + string args, list<dag> p> + : I<{o{7}, o{6}, o{5}, o{4}, o{3}, o{2}, o{1}, + !if(!eq(t.HasEvenOpcode, 1), 0, o{0})}, f, outs, ins, + !strconcat(m, "{", t.InstrSuffix, "}\t", args), p> { let hasSideEffects = 0; - let hasREX_W = typeinfo.HasREX_W; + let hasREX_W = t.HasREX_W; } -defvar binop_args = "{$src2, $src1|$src1, $src2}"; +// BinOpRR - Instructions that read "reg, reg". +class BinOpRR<bits<8> o, string m, string args, X86TypeInfo t, dag out, list<dag> p> + : ITy<o, MRMDestReg, t, out, (ins t.RegClass:$src1, t.RegClass:$src2), m, + args, p>, Sched<[WriteALU]>; +// BinOpRR_F - Instructions that read "reg, reg" and write EFLAGS only. +class BinOpRR_F<bits<8> o, string m, X86TypeInfo t, SDPatternOperator node> + : BinOpRR<o, m, binop_args, t, (outs), + [(set EFLAGS, (node t.RegClass:$src1, t.RegClass:$src2))]>, + DefEFLAGS; +// BinOpRR_F_Rev - Reversed encoding of BinOpRR_F +class BinOpRR_F_Rev<bits<8> o, string m, X86TypeInfo t> + : BinOpRR_F<o, m, t, null_frag>, DisassembleOnly { + let Form = MRMSrcReg; +} +// BinOpRR_R - Instructions that read "reg, reg" and write "reg". +class BinOpRR_R<bits<8> o, string m, X86TypeInfo t, bit ndd = 0> + : BinOpRR<o, m, !if(!eq(ndd, 0), binop_args, binop_ndd_args), t, + (outs t.RegClass:$dst), []>, NDD<ndd>; +// BinOpRR_R_Rev - Reversed encoding of BinOpRR_R +class BinOpRR_R_Rev<bits<8> o, string m, X86TypeInfo t, bit ndd = 0> + : BinOpRR_R<o, m, t, ndd>, DisassembleOnly { + let Form = MRMSrcReg; +} +// BinOpRR_RF - Instructions that read "reg, reg", and write "reg", EFLAGS. +class BinOpRR_RF<bits<8> o, string m, X86TypeInfo t, SDPatternOperator node, bit ndd = 0> + : BinOpRR<o, m, !if(!eq(ndd, 0), binop_args, binop_ndd_args), t, + (outs t.RegClass:$dst), + [(set t.RegClass:$dst, EFLAGS, + (node t.RegClass:$src1, t.RegClass:$src2))]>, DefEFLAGS, NDD<ndd>; +// BinOpRR_RF_Rev - Reversed encoding of BinOpRR_RF. +class BinOpRR_RF_Rev<bits<8> o, string m, X86TypeInfo t, bit ndd = 0> + : BinOpRR_RF<o, m, t, null_frag, ndd>, DisassembleOnly { + let Form = MRMSrcReg; +} +// BinOpRRF_RF - Instructions that read "reg, reg", write "reg" and read/write +// EFLAGS. +class BinOpRRF_RF<bits<8> o, string m, X86TypeInfo t, SDPatternOperator node, bit ndd = 0> + : BinOpRR<o, m, !if(!eq(ndd, 0), binop_args, binop_ndd_args), t, (outs t.RegClass:$dst), + [(set t.RegClass:$dst, EFLAGS, + (node t.RegClass:$src1, t.RegClass:$src2, + EFLAGS))]>, DefEFLAGS, UseEFLAGS, NDD<ndd> { + let SchedRW = [WriteADC]; +} +// BinOpRRF_RF_Rev - Reversed encoding of BinOpRRF_RF +class BinOpRRF_RF_Rev<bits<8> o, string m, X86TypeInfo t, bit ndd = 0> + : BinOpRRF_RF<o, m, t, null_frag, ndd>, DisassembleOnly { + let Form = MRMSrcReg; +} + +// BinOpRM - Instructions that read "reg, [mem]". +class BinOpRM<bits<8> o, string m, string args, X86TypeInfo t, dag out, list<dag> p> + : ITy<o, MRMSrcMem, t, out, (ins t.RegClass:$src1, t.MemOperand:$src2), m, + args, p>, + Sched<[WriteALU.Folded, WriteALU.ReadAfterFold]> { + let mayLoad = 1; +} +// BinOpRM_F - Instructions that read "reg, [mem]" and write EFLAGS only. +class BinOpRM_F<bits<8> o, string m, X86TypeInfo t, SDNode node> + : BinOpRM<o, m, binop_args, t, (outs), + [(set EFLAGS, (node t.RegClass:$src1, + (t.LoadNode addr:$src2)))]>, DefEFLAGS; +// BinOpRM_R - Instructions that read "reg, [mem]", and write "reg". +class BinOpRM_R<bits<8> o, string m, X86TypeInfo t, bit ndd = 0> + : BinOpRM<o, m, !if(!eq(ndd, 0), binop_args, binop_ndd_args), t, (outs t.RegClass:$dst), + []>, NDD<ndd>; +// BinOpRM_RF - Instructions that read "reg, [mem]", and write "reg", EFLAGS. +class BinOpRM_RF<bits<8> o, string m, X86TypeInfo t, SDPatternOperator node, bit ndd = 0> + : BinOpRM<o, m, !if(!eq(ndd, 0), binop_args, binop_ndd_args), t, (outs t.RegClass:$dst), + [(set t.RegClass:$dst, EFLAGS, (node t.RegClass:$src1, + (t.LoadNode addr:$src2)))]>, DefEFLAGS, NDD<ndd>; +// BinOpRMF_RF - Instructions that read "reg, [mem]", write "reg" and read/write +// EFLAGS. +class BinOpRMF_RF<bits<8> o, string m, X86TypeInfo t, SDPatternOperator node, bit ndd = 0> + : BinOpRM<o, m, !if(!eq(ndd, 0), binop_args, binop_ndd_args), t, (outs t.RegClass:$dst), + [(set t.RegClass:$dst, EFLAGS, + (node t.RegClass:$src1, (t.LoadNode addr:$src2), EFLAGS))]>, + DefEFLAGS, UseEFLAGS, NDD<ndd> { + let SchedRW = [WriteADC.Folded, WriteADC.ReadAfterFold, + // base, scale, index, offset, segment. + ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault, + // implicit register read. + WriteADC.ReadAfterFold]; +} + +// BinOpRI - Instructions that read "reg, imm". +class BinOpRI<bits<8> o, string m, string args, X86TypeInfo t, Format f, dag out, list<dag> p> + : ITy<o, f, t, out, (ins t.RegClass:$src1, t.ImmOperand:$src2), m, + args, p>, Sched<[WriteALU]> { + let ImmT = t.ImmEncoding; +} +// BinOpRI_F - Instructions that read "reg, imm" and write EFLAGS only. +class BinOpRI_F<bits<8> o, string m, X86TypeInfo t, SDPatternOperator node, + Format f> + : BinOpRI<o, m, binop_args, t, f, (outs), + [(set EFLAGS, (node t.RegClass:$src1, + t.ImmOperator:$src2))]>, DefEFLAGS; +// BinOpRI_R - Instructions that read "reg, imm" and write "reg". +class BinOpRI_R<bits<8> o, string m, X86TypeInfo t, Format f, bit ndd = 0> + : BinOpRI<o, m, !if(!eq(ndd, 0), binop_args, binop_ndd_args), t, f, (outs t.RegClass:$dst), + []>, NDD<ndd>; +// BinOpRI_RF - Instructions that read "reg, imm" and write "reg", EFLAGS. +class BinOpRI_RF<bits<8> o, string m, X86TypeInfo t, SDPatternOperator node, Format f, bit ndd = 0> + : BinOpRI<o, m, !if(!eq(ndd, 0), binop_args, binop_ndd_args), t, f, (outs t.RegClass:$dst), + [(set t.RegClass:$dst, EFLAGS, + (node t.RegClass:$src1, t.ImmOperator:$src2))]>, DefEFLAGS, NDD<ndd>; +// BinOpRIF_RF - Instructions that read "reg, imm", write "reg" and read/write +// EFLAGS. +class BinOpRIF_RF<bits<8> o, string m, X86TypeInfo t, SDNode node, Format f, bit ndd = 0> + : BinOpRI<o, m, !if(!eq(ndd, 0), binop_args, binop_ndd_args), t, f, (outs t.RegClass:$dst), + [(set t.RegClass:$dst, EFLAGS, + (node t.RegClass:$src1, t.ImmOperator:$src2, + EFLAGS))]>, DefEFLAGS, UseEFLAGS, NDD<ndd> { + let SchedRW = [WriteADC]; +} +// BinOpRI8 - Instructions that read "reg, imm8". +class BinOpRI8<bits<8> o, string m, string args, X86TypeInfo t, Format f, dag out> + : ITy<o, f, t, out, (ins t.RegClass:$src1, t.Imm8Operand:$src2), m, + args, []>, Sched<[WriteALU]> { + let ImmT = Imm8; +} +// BinOpRI8_F - Instructions that read "reg, imm8" and write EFLAGS only. +class BinOpRI8_F<bits<8> o, string m, X86TypeInfo t, Format f> + : BinOpRI8<o, m, binop_args, t, f, (outs)>, DefEFLAGS; +// BinOpRI8_R - Instructions that read "reg, imm8" and write "reg". +class BinOpRI8_R<bits<8> o, string m, X86TypeInfo t, Format f, bit ndd = 0> + : BinOpRI8<o, m, !if(!eq(ndd, 0), binop_args, binop_ndd_args), t, f, (outs t.RegClass:$dst)>, NDD<ndd>; +// BinOpRI8_RF - Instructions that read "reg, imm8" and write "reg", EFLAGS. +class BinOpRI8_RF<bits<8> o, string m, X86TypeInfo t, Format f, bit ndd = 0> + : BinOpRI8<o, m, !if(!eq(ndd, 0), binop_args, binop_ndd_args), t, f, (outs t.RegClass:$dst)>, DefEFLAGS, NDD<ndd>; +// BinOpRI8F_RF - Instructions that read "reg, imm", write "reg" and read/write +// EFLAGS. +class BinOpRI8F_RF<bits<8> o, string m, X86TypeInfo t, Format f, bit ndd = 0> + : BinOpRI8<o, m, !if(!eq(ndd, 0), binop_args, binop_ndd_args), t, f, (outs t.RegClass:$dst)>, DefEFLAGS, UseEFLAGS, NDD<ndd> { + let SchedRW = [WriteADC]; +} + +// BinOpMR - Instructions that read "[mem], reg". +class BinOpMR<bits<8> o, string m, string args, X86TypeInfo t, dag out, list<dag> p> + : ITy<o, MRMDestMem, t, out, (ins t.MemOperand:$src1, t.RegClass:$src2), m, + args, p> { + let mayLoad = 1; + let SchedRW = [WriteALU.Folded, WriteALU.ReadAfterFold]; +} +// BinOpMR_R - Instructions that read "[mem], reg", and write "reg". +class BinOpMR_R<bits<8> o, string m, X86TypeInfo t> + : BinOpMR<o, m, binop_ndd_args, t, (outs t.RegClass:$dst), []>, NDD<1>; +// BinOpMR_RF - Instructions that read "[mem], reg", and write "reg", EFLAGS. +class BinOpMR_RF<bits<8> o, string m, X86TypeInfo t, SDPatternOperator node> + : BinOpMR<o, m, binop_ndd_args, t, (outs t.RegClass:$dst), + [(set t.RegClass:$dst, EFLAGS, (node (t.LoadNode addr:$src1), + t.RegClass:$src2))]>, DefEFLAGS, NDD<1>; +// BinOpMR_F - Instructions that read "[mem], imm8" and write EFLAGS only. +class BinOpMR_F<bits<8> o, string m, X86TypeInfo t, SDPatternOperator node> + : BinOpMR<o, m, binop_args, t, (outs), + [(set EFLAGS, (node (t.LoadNode addr:$src1), t.RegClass:$src2))]>, + Sched<[WriteALU.Folded, ReadDefault, ReadDefault, ReadDefault, + ReadDefault, ReadDefault, WriteALU.ReadAfterFold]>, DefEFLAGS; +// BinOpMR_M - Instructions that read "[mem], reg" and write "[mem]". +class BinOpMR_M<bits<8> o, string m, X86TypeInfo t> + : BinOpMR<o, m, binop_args, t, (outs), []>, + Sched<[WriteALURMW, + // base, scale, index, offset, segment + ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault]> { + let mayStore = 1; +} +// BinOpMR_MF - Instructions that read "[mem], reg" and write "[mem]", EFLAGS. +class BinOpMR_MF<bits<8> o, string m, X86TypeInfo t, SDPatternOperator node> + : BinOpMR<o, m, binop_args, t, (outs), + [(store (node (load addr:$src1), t.RegClass:$src2), addr:$src1), + (implicit EFLAGS)]>, + Sched<[WriteALURMW, + // base, scale, index, offset, segment + ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault, + WriteALU.ReadAfterFold]>, // reg + DefEFLAGS { + let mayStore = 1; +} +// BinOpMRF_RF - Instructions that read "[mem], reg", write "reg" and +// read/write EFLAGS. +class BinOpMRF_RF<bits<8> o, string m, X86TypeInfo t, SDPatternOperator node> + : BinOpMR<o, m, binop_ndd_args, t, (outs t.RegClass:$dst), + [(set t.RegClass:$dst, EFLAGS, (node (load addr:$src1), + t.RegClass:$src2, EFLAGS))]>, DefEFLAGS, UseEFLAGS, NDD<1>, + Sched<[WriteADC.Folded, WriteADC.ReadAfterFold]>; +// BinOpMRF_MF - Instructions that read "[mem], reg", write "[mem]" and +// read/write EFLAGS. +class BinOpMRF_MF<bits<8> o, string m, X86TypeInfo t, SDPatternOperator node> + : BinOpMR<o, m, binop_args, t, (outs), + [(store (node (load addr:$src1), t.RegClass:$src2, EFLAGS), + addr:$src1), (implicit EFLAGS)]>, + Sched<[WriteADCRMW, + // base, scale, index, offset, segment + ReadDefault, ReadDefault, ReadDefault, + ReadDefault, ReadDefault, + WriteALU.ReadAfterFold, // reg + WriteALU.ReadAfterFold]>, // EFLAGS + DefEFLAGS, UseEFLAGS { + let mayStore = 1; +} + +// BinOpMI - Instructions that read "[mem], imm". +class BinOpMI<bits<8> o, string m, string args, X86TypeInfo t, Format f, dag out, list<dag> p> + : ITy<o, f, t, out, (ins t.MemOperand:$src1, t.ImmOperand:$src2), m, + args, p> { + let ImmT = t.ImmEncoding; + let mayLoad = 1; +} +// BinOpMI_F - Instructions that read "[mem], imm" and write EFLAGS only. +class BinOpMI_F<bits<8> o, string m, X86TypeInfo t, SDPatternOperator node, + Format f> + : BinOpMI<o, m, binop_args, t, f, (outs), + [(set EFLAGS, (node (t.LoadNode addr:$src1), t.ImmOperator:$src2))]>, + Sched<[WriteALU.Folded]>, DefEFLAGS; +// BinOpMI_R - Instructions that read "[mem], imm" and write "reg". +class BinOpMI_R<bits<8> o, string m, X86TypeInfo t, Format f> + : BinOpMI<o, m, binop_ndd_args, t, f, (outs t.RegClass:$dst), []>, + Sched<[WriteALU.Folded, WriteALU.ReadAfterFold]>, NDD<1>; +// BinOpMI_R - Instructions that read "[mem], imm" and write "reg", EFLAGS. +class BinOpMI_RF<bits<8> o, string m, X86TypeInfo t, SDPatternOperator node, + Format f> + : BinOpMI<o, m, binop_ndd_args, t, f, (outs t.RegClass:$dst), + [(set t.RegClass:$dst, EFLAGS, (node (t.LoadNode addr:$src1), t.ImmOperator:$src2))]>, + Sched<[WriteALU.Folded, WriteALU.ReadAfterFold]>, DefEFLAGS, NDD<1>; +// BinOpMI_M - Instructions that read "[mem], imm" and write "[mem]". +class BinOpMI_M<bits<8> o, string m, X86TypeInfo t, Format f> + : BinOpMI<o, m, binop_args, t, f, (outs), []>, Sched<[WriteALURMW]> { + let mayStore = 1; +} +// BinOpMI_MF - Instructions that read "[mem], imm" and write "[mem]", EFLAGS. +class BinOpMI_MF<bits<8> o, string m, X86TypeInfo t, SDPatternOperator node, Format f> + : BinOpMI<o, m, binop_args, t, f, (outs), + [(store (node (t.VT (load addr:$src1)), + t.ImmOperator:$src2), addr:$src1), (implicit EFLAGS)]>, + Sched<[WriteALURMW]>, DefEFLAGS { + let mayStore = 1; +} +// BinOpMIF_RF - Instructions that read "[mem], imm", write "reg" and +// read/write EFLAGS. +class BinOpMIF_RF<bits<8> o, string m, X86TypeInfo t, SDNode node, Format f> + : BinOpMI<o, m, binop_ndd_args, t, f, (outs t.RegClass:$dst), + [(set t.RegClass:$dst, EFLAGS, (node (t.VT (load addr:$src1)), + t.ImmOperator:$src2, EFLAGS))]>, + Sched<[WriteADC.Folded, WriteADC.ReadAfterFold]>, DefEFLAGS, UseEFLAGS, NDD<1>; +// BinOpMIF_MF - Instructions that read "[mem], imm", write "[mem]" and +// read/write EFLAGS. +class BinOpMIF_MF<bits<8> o, string m, X86TypeInfo t, SDNode node, Format f> + : BinOpMI<o, m, binop_args, t, f, (outs), + [(store (node (t.VT (load addr:$src1)), + t.ImmOperator:$src2, EFLAGS), addr:$src1), (implicit EFLAGS)]>, + Sched<[WriteADCRMW]>, DefEFLAGS, UseEFLAGS { + let mayStore = 1; +} + +// BinOpMI8 - Instructions that read "[mem], imm8". +class BinOpMI8<string m, string args, X86TypeInfo t, Format f, dag out> + : ITy<0x83, f, t, out, (ins t.MemOperand:$src1, t.Imm8Operand:$src2), m, + args, []> { + let ImmT = Imm8; + let mayLoad = 1; +} +// BinOpMI8_F - Instructions that read "[mem], imm8" and write EFLAGS only. +class BinOpMI8_F<string m, X86TypeInfo t, Format f> + : BinOpMI8<m, binop_args, t, f, (outs)>, Sched<[WriteALU.Folded]>, DefEFLAGS; +// BinOpMI8_R - Instructions that read "[mem], imm8" and write "reg". +class BinOpMI8_R<string m, X86TypeInfo t, Format f> + : BinOpMI8<m, binop_ndd_args, t, f, (outs t.RegClass:$dst)>, Sched<[WriteALU.Folded, WriteALU.ReadAfterFold]>, NDD<1>; +// BinOpMI8_RF - Instructions that read "[mem], imm8" and write "reg"/EFLAGS. +class BinOpMI8_RF<string m, X86TypeInfo t, Format f> + : BinOpMI8<m, binop_ndd_args, t, f, (outs t.RegClass:$dst)>, Sched<[WriteALU.Folded, WriteALU.ReadAfterFold]>, DefEFLAGS, NDD<1>; +// BinOpMI8_M - Instructions that read "[mem], imm8" and write "[mem]". +class BinOpMI8_M<string m, X86TypeInfo t, Format f> + : BinOpMI8<m, binop_args, t, f, (outs)>, Sched<[WriteALURMW]> { + let mayStore = 1; +} +// BinOpMI8_MF - Instructions that read "[mem], imm8" and write "[mem]", EFLAGS. +class BinOpMI8_MF<string m, X86TypeInfo t, Format f> + : BinOpMI8<m, binop_args, t, f, (outs)>, Sched<[WriteALURMW]>, DefEFLAGS { + let mayStore = 1; +} +// BinOpMI8F_RF - Instructions that read "[mem], imm8", write "reg" and +// read/write EFLAGS. +class BinOpMI8F_RF<string m, X86TypeInfo t, Format f> + : BinOpMI8<m, binop_ndd_args, t, f, (outs t.RegClass:$dst)>, + Sched<[WriteADC.Folded, WriteADC.ReadAfterFold]>, DefEFLAGS, UseEFLAGS, NDD<1>; +// BinOpMI8F_MF - Instructions that read "[mem], imm8", write "[mem]" and +// read/write EFLAGS. +class BinOpMI8F_MF<string m, X86TypeInfo t, Format f> + : BinOpMI8<m, binop_args, t, f, (outs)>, Sched<[WriteADCRMW]>, DefEFLAGS, UseEFLAGS { + let mayStore = 1; +} + +// BinOpAI - Instructions that read "a-reg imm" (Accumulator register). +class BinOpAI<bits<8> o, string m, X86TypeInfo t, Register areg, string args> + : ITy<o, RawFrm, t, (outs), (ins t.ImmOperand:$src), m, args, []>, + Sched<[WriteALU]> { + let ImmT = t.ImmEncoding; + let Uses = [areg]; +} +// BinOpAI_F - Instructions that read "a-reg imm" and write EFLAGS only. +class BinOpAI_F<bits<8> o, string m, X86TypeInfo t, Register areg, string args> + : BinOpAI<o, m, t, areg, args>, DefEFLAGS; + +// BinOpAI_AF - Instructions that read "a-reg imm" and write a-reg/EFLAGS. +class BinOpAI_AF<bits<8> o, string m, X86TypeInfo t, Register areg, + string args> : BinOpAI<o, m, t, areg, args> { + let Defs = [areg, EFLAGS]; +} +// BinOpAIF_AF - Instructions that read "a-reg imm", write a-reg and read/write +// EFLAGS. +class BinOpAIF_AF<bits<8> o, string m, X86TypeInfo t, Register areg, + string args> : BinOpAI<o, m, t, areg, args> { + let Uses = [areg, EFLAGS]; + let Defs = [areg, EFLAGS]; + let SchedRW = [WriteADC]; +} + +// UnaryOpR - Instructions that read "reg". +class UnaryOpR<bits<8> o, Format f, string m, string args, X86TypeInfo t, + dag out, list<dag> p> + : ITy<o, f, t, out, (ins t.RegClass:$src1), m, args, p>, Sched<[WriteALU]>; +// UnaryOpR_R - Instructions that read "reg" and write "reg". +class UnaryOpR_R<bits<8> o, Format f, string m, X86TypeInfo t, + SDPatternOperator node, bit ndd = 0> + : UnaryOpR<o, f, m, !if(!eq(ndd, 0), unaryop_args, unaryop_ndd_args), t, + (outs t.RegClass:$dst), + [(set t.RegClass:$dst, (node t.RegClass:$src1))]>, NDD<ndd>; +// UnaryOpR_RF - Instructions that read "reg" and write "reg"/EFLAGS. +class UnaryOpR_RF<bits<8> o, Format f, string m, X86TypeInfo t, + SDPatternOperator node, bit ndd = 0> + : UnaryOpR<o, f, m, !if(!eq(ndd, 0), unaryop_args, unaryop_ndd_args), t, + (outs t.RegClass:$dst), + [(set t.RegClass:$dst, (node t.RegClass:$src1)), + (implicit EFLAGS)]>, DefEFLAGS, NDD<ndd>; + +// UnaryOpM - Instructions that read "[mem]". +class UnaryOpM<bits<8> o, Format f, string m, string args, X86TypeInfo t, + dag out, list<dag> p> + : ITy<o, f, t, out, (ins t.MemOperand:$src1), m, args, p> { + let mayLoad = 1; +} +// UnaryOpM_R - Instructions that read "[mem]" and writes "reg". +class UnaryOpM_R<bits<8> o, Format f, string m, X86TypeInfo t, + SDPatternOperator node> + : UnaryOpM<o, f, m, unaryop_ndd_args, t, (outs t.RegClass:$dst), + [(set t.RegClass:$dst, (node (t.LoadNode addr:$src1)))]>, + Sched<[WriteALU.Folded, WriteALU.ReadAfterFold]>, NDD<1>; +// UnaryOpM_RF - Instructions that read "[mem]" and writes "reg"/EFLAGS. +class UnaryOpM_RF<bits<8> o, Format f, string m, X86TypeInfo t, + SDPatternOperator node> + : UnaryOpM<o, f, m, unaryop_ndd_args, t, (outs t.RegClass:$dst), + [(set t.RegClass:$dst, EFLAGS, (node (t.LoadNode addr:$src1)))]>, + Sched<[WriteALU.Folded, WriteALU.ReadAfterFold]>, DefEFLAGS, NDD<1>; +// UnaryOpM_M - Instructions that read "[mem]" and writes "[mem]". +class UnaryOpM_M<bits<8> o, Format f, string m, X86TypeInfo t, + SDPatternOperator node> + : UnaryOpM<o, f, m, unaryop_args, t, (outs), + [(store (node (t.LoadNode addr:$src1)), addr:$src1)]>, + Sched<[WriteALURMW]>{ + let mayStore = 1; +} +// UnaryOpM_MF - Instructions that read "[mem]" and writes "[mem]"/EFLAGS. +class UnaryOpM_MF<bits<8> o, Format f, string m, X86TypeInfo t, + SDPatternOperator node> + : UnaryOpM<o, f, m, unaryop_args, t, (outs), + [(store (node (t.LoadNode addr:$src1)), addr:$src1), + (implicit EFLAGS)]>, Sched<[WriteALURMW]>, DefEFLAGS { + let mayStore = 1; +} diff --git a/llvm/lib/Target/X86/X86InstrVecCompiler.td b/llvm/lib/Target/X86/X86InstrVecCompiler.td index 70bd77bba03a..bbd19cf8d5b2 100644 --- a/llvm/lib/Target/X86/X86InstrVecCompiler.td +++ b/llvm/lib/Target/X86/X86InstrVecCompiler.td @@ -130,6 +130,9 @@ let Predicates = [HasAVX, NoVLX] in { defm : subvec_zero_lowering<"DQA", VR128, v32i8, v16i8, sub_xmm>; } +let Predicates = [HasAVXNECONVERT, NoVLX] in + defm : subvec_zero_lowering<"DQA", VR128, v16bf16, v8bf16, sub_xmm>; + let Predicates = [HasVLX] in { defm : subvec_zero_lowering<"APDZ128", VR128X, v4f64, v2f64, sub_xmm>; defm : subvec_zero_lowering<"APSZ128", VR128X, v8f32, v4f32, sub_xmm>; @@ -175,6 +178,12 @@ let Predicates = [HasFP16, HasVLX] in { defm : subvec_zero_lowering<"APSZ256", VR256X, v32f16, v16f16, sub_ymm>; } +let Predicates = [HasBF16, HasVLX] in { + defm : subvec_zero_lowering<"APSZ128", VR128X, v16bf16, v8bf16, sub_xmm>; + defm : subvec_zero_lowering<"APSZ128", VR128X, v32bf16, v8bf16, sub_xmm>; + defm : subvec_zero_lowering<"APSZ256", VR256X, v32bf16, v16bf16, sub_ymm>; +} + class maskzeroupper<ValueType vt, RegisterClass RC> : PatLeaf<(vt RC:$src), [{ return isMaskZeroExtended(N); diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp index 8a04987e768a..49631f38017a 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -1459,6 +1459,15 @@ InstructionCost X86TTIImpl::getArithmeticInstrCost( Args, CxtI); } +InstructionCost +X86TTIImpl::getAltInstrCost(VectorType *VecTy, unsigned Opcode0, + unsigned Opcode1, const SmallBitVector &OpcodeMask, + TTI::TargetCostKind CostKind) const { + if (isLegalAltInstr(VecTy, Opcode0, Opcode1, OpcodeMask)) + return TTI::TCC_Basic; + return InstructionCost::getInvalid(); +} + InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, VectorType *BaseTp, ArrayRef<int> Mask, @@ -3724,10 +3733,10 @@ X86TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, { ISD::BITREVERSE, MVT::v8i16, { 8, 13, 10, 16 } }, { ISD::BITREVERSE, MVT::v32i8, { 13, 15, 17, 26 } }, // 2 x 128-bit Op + extract/insert { ISD::BITREVERSE, MVT::v16i8, { 7, 7, 9, 13 } }, - { ISD::BSWAP, MVT::v4i64, { 5, 7, 5, 10 } }, - { ISD::BSWAP, MVT::v2i64, { 2, 3, 1, 3 } }, - { ISD::BSWAP, MVT::v8i32, { 5, 7, 5, 10 } }, - { ISD::BSWAP, MVT::v4i32, { 2, 3, 1, 3 } }, + { ISD::BSWAP, MVT::v4i64, { 5, 6, 5, 10 } }, + { ISD::BSWAP, MVT::v2i64, { 2, 2, 1, 3 } }, + { ISD::BSWAP, MVT::v8i32, { 5, 6, 5, 10 } }, + { ISD::BSWAP, MVT::v4i32, { 2, 2, 1, 3 } }, { ISD::BSWAP, MVT::v16i16, { 5, 6, 5, 10 } }, { ISD::BSWAP, MVT::v8i16, { 2, 2, 1, 3 } }, { ISD::CTLZ, MVT::v4i64, { 29, 33, 49, 58 } }, // 2 x 128-bit Op + extract/insert @@ -3804,6 +3813,9 @@ X86TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, { ISD::FSQRT, MVT::v2f64, { 67, 71, 1, 5 } }, // sqrtpd }; static const CostKindTblEntry SLMCostTbl[] = { + { ISD::BSWAP, MVT::v2i64, { 5, 5, 1, 5 } }, + { ISD::BSWAP, MVT::v4i32, { 5, 5, 1, 5 } }, + { ISD::BSWAP, MVT::v8i16, { 5, 5, 1, 5 } }, { ISD::FSQRT, MVT::f32, { 20, 20, 1, 1 } }, // sqrtss { ISD::FSQRT, MVT::v4f32, { 40, 41, 1, 5 } }, // sqrtps { ISD::FSQRT, MVT::f64, { 35, 35, 1, 1 } }, // sqrtsd @@ -3842,9 +3854,9 @@ X86TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, { ISD::BITREVERSE, MVT::v4i32, { 16, 20, 11, 21 } }, { ISD::BITREVERSE, MVT::v8i16, { 16, 20, 11, 21 } }, { ISD::BITREVERSE, MVT::v16i8, { 11, 12, 10, 16 } }, - { ISD::BSWAP, MVT::v2i64, { 5, 5, 1, 5 } }, - { ISD::BSWAP, MVT::v4i32, { 5, 5, 1, 5 } }, - { ISD::BSWAP, MVT::v8i16, { 5, 5, 1, 5 } }, + { ISD::BSWAP, MVT::v2i64, { 2, 3, 1, 5 } }, + { ISD::BSWAP, MVT::v4i32, { 2, 3, 1, 5 } }, + { ISD::BSWAP, MVT::v8i16, { 2, 3, 1, 5 } }, { ISD::CTLZ, MVT::v2i64, { 18, 28, 28, 35 } }, { ISD::CTLZ, MVT::v4i32, { 15, 20, 22, 28 } }, { ISD::CTLZ, MVT::v8i16, { 13, 17, 16, 22 } }, diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.h b/llvm/lib/Target/X86/X86TargetTransformInfo.h index 0fa0d240a548..07a3fff4f84b 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.h +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.h @@ -140,6 +140,11 @@ public: TTI::OperandValueInfo Op2Info = {TTI::OK_AnyValue, TTI::OP_None}, ArrayRef<const Value *> Args = ArrayRef<const Value *>(), const Instruction *CxtI = nullptr); + InstructionCost getAltInstrCost(VectorType *VecTy, unsigned Opcode0, + unsigned Opcode1, + const SmallBitVector &OpcodeMask, + TTI::TargetCostKind CostKind) const; + InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, ArrayRef<int> Mask, TTI::TargetCostKind CostKind, int Index, diff --git a/llvm/lib/Target/XCore/XCoreISelDAGToDAG.cpp b/llvm/lib/Target/XCore/XCoreISelDAGToDAG.cpp index 1288597fc6b0..05003ec304ad 100644 --- a/llvm/lib/Target/XCore/XCoreISelDAGToDAG.cpp +++ b/llvm/lib/Target/XCore/XCoreISelDAGToDAG.cpp @@ -250,7 +250,7 @@ bool XCoreDAGToDAGISel::tryBRIND(SDNode *N) { SDValue Addr = N->getOperand(1); if (Addr->getOpcode() != ISD::INTRINSIC_W_CHAIN) return false; - unsigned IntNo = cast<ConstantSDNode>(Addr->getOperand(1))->getZExtValue(); + unsigned IntNo = Addr->getConstantOperandVal(1); if (IntNo != Intrinsic::xcore_checkevent) return false; SDValue nextAddr = Addr->getOperand(2); diff --git a/llvm/lib/Target/XCore/XCoreISelLowering.cpp b/llvm/lib/Target/XCore/XCoreISelLowering.cpp index 7736adab19e8..18feeaadb03c 100644 --- a/llvm/lib/Target/XCore/XCoreISelLowering.cpp +++ b/llvm/lib/Target/XCore/XCoreISelLowering.cpp @@ -767,7 +767,7 @@ SDValue XCoreTargetLowering::LowerFRAMEADDR(SDValue Op, // An index of zero corresponds to the current function's frame address. // An index of one to the parent's frame address, and so on. // Depths > 0 not supported yet! - if (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue() > 0) + if (Op.getConstantOperandVal(0) > 0) return SDValue(); MachineFunction &MF = DAG.getMachineFunction(); @@ -783,7 +783,7 @@ LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const { // An index of zero corresponds to the current function's return address. // An index of one to the parent's return address, and so on. // Depths > 0 not supported yet! - if (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue() > 0) + if (Op.getConstantOperandVal(0) > 0) return SDValue(); MachineFunction &MF = DAG.getMachineFunction(); @@ -905,7 +905,7 @@ LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const { SDValue XCoreTargetLowering:: LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); - unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); + unsigned IntNo = Op.getConstantOperandVal(0); switch (IntNo) { case Intrinsic::xcore_crc8: EVT VT = Op.getValueType(); @@ -1497,7 +1497,7 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N, switch (N->getOpcode()) { default: break; case ISD::INTRINSIC_VOID: - switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) { + switch (N->getConstantOperandVal(1)) { case Intrinsic::xcore_outt: case Intrinsic::xcore_outct: case Intrinsic::xcore_chkct: { @@ -1733,30 +1733,30 @@ void XCoreTargetLowering::computeKnownBitsForTargetNode(const SDValue Op, break; case ISD::INTRINSIC_W_CHAIN: { - unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); - switch (IntNo) { - case Intrinsic::xcore_getts: - // High bits are known to be zero. - Known.Zero = APInt::getHighBitsSet(Known.getBitWidth(), - Known.getBitWidth() - 16); - break; - case Intrinsic::xcore_int: - case Intrinsic::xcore_inct: - // High bits are known to be zero. - Known.Zero = APInt::getHighBitsSet(Known.getBitWidth(), - Known.getBitWidth() - 8); - break; - case Intrinsic::xcore_testct: - // Result is either 0 or 1. - Known.Zero = APInt::getHighBitsSet(Known.getBitWidth(), - Known.getBitWidth() - 1); - break; - case Intrinsic::xcore_testwct: - // Result is in the range 0 - 4. - Known.Zero = APInt::getHighBitsSet(Known.getBitWidth(), - Known.getBitWidth() - 3); - break; - } + unsigned IntNo = Op.getConstantOperandVal(1); + switch (IntNo) { + case Intrinsic::xcore_getts: + // High bits are known to be zero. + Known.Zero = + APInt::getHighBitsSet(Known.getBitWidth(), Known.getBitWidth() - 16); + break; + case Intrinsic::xcore_int: + case Intrinsic::xcore_inct: + // High bits are known to be zero. + Known.Zero = + APInt::getHighBitsSet(Known.getBitWidth(), Known.getBitWidth() - 8); + break; + case Intrinsic::xcore_testct: + // Result is either 0 or 1. + Known.Zero = + APInt::getHighBitsSet(Known.getBitWidth(), Known.getBitWidth() - 1); + break; + case Intrinsic::xcore_testwct: + // Result is in the range 0 - 4. + Known.Zero = + APInt::getHighBitsSet(Known.getBitWidth(), Known.getBitWidth() - 3); + break; + } } break; } diff --git a/llvm/lib/TargetParser/ARMTargetParser.cpp b/llvm/lib/TargetParser/ARMTargetParser.cpp index 27d168020ce6..ce640f5b8d45 100644 --- a/llvm/lib/TargetParser/ARMTargetParser.cpp +++ b/llvm/lib/TargetParser/ARMTargetParser.cpp @@ -348,11 +348,7 @@ StringRef ARM::getArchExtName(uint64_t ArchExtKind) { } static bool stripNegationPrefix(StringRef &Name) { - if (Name.starts_with("no")) { - Name = Name.substr(2); - return true; - } - return false; + return Name.consume_front("no"); } StringRef ARM::getArchExtFeature(StringRef ArchExt) { diff --git a/llvm/lib/TargetParser/Host.cpp b/llvm/lib/TargetParser/Host.cpp index 11c5000acc07..2e08c7b12d9d 100644 --- a/llvm/lib/TargetParser/Host.cpp +++ b/llvm/lib/TargetParser/Host.cpp @@ -1160,7 +1160,7 @@ getAMDProcessorTypeAndSubtype(unsigned Family, unsigned Model, case 25: CPU = "znver3"; *Type = X86::AMDFAM19H; - if ((Model >= 0x00 && Model <= 0x0f) || (Model >= 0x20 && Model <= 0x2f) || + if (Model <= 0x0f || (Model >= 0x20 && Model <= 0x2f) || (Model >= 0x30 && Model <= 0x3f) || (Model >= 0x40 && Model <= 0x4f) || (Model >= 0x50 && Model <= 0x5f)) { // Family 19h Models 00h-0Fh (Genesis, Chagall) Zen 3 diff --git a/llvm/lib/TargetParser/Triple.cpp b/llvm/lib/TargetParser/Triple.cpp index d475650c2d18..e93502187b54 100644 --- a/llvm/lib/TargetParser/Triple.cpp +++ b/llvm/lib/TargetParser/Triple.cpp @@ -1208,8 +1208,7 @@ static VersionTuple parseVersionFromName(StringRef Name) { VersionTuple Triple::getEnvironmentVersion() const { StringRef EnvironmentName = getEnvironmentName(); StringRef EnvironmentTypeName = getEnvironmentTypeName(getEnvironment()); - if (EnvironmentName.starts_with(EnvironmentTypeName)) - EnvironmentName = EnvironmentName.substr(EnvironmentTypeName.size()); + EnvironmentName.consume_front(EnvironmentTypeName); return parseVersionFromName(EnvironmentName); } diff --git a/llvm/lib/TargetParser/X86TargetParser.cpp b/llvm/lib/TargetParser/X86TargetParser.cpp index 085554f18b2b..d46ff07ec734 100644 --- a/llvm/lib/TargetParser/X86TargetParser.cpp +++ b/llvm/lib/TargetParser/X86TargetParser.cpp @@ -162,8 +162,6 @@ constexpr FeatureBitset FeaturesAlderlake = constexpr FeatureBitset FeaturesSierraforest = FeaturesAlderlake | FeatureCMPCCXADD | FeatureAVXIFMA | FeatureUINTR | FeatureENQCMD | FeatureAVXNECONVERT | FeatureAVXVNNIINT8; -constexpr FeatureBitset FeaturesGrandridge = - FeaturesSierraforest | FeatureRAOINT; constexpr FeatureBitset FeaturesArrowlakeS = FeaturesSierraforest | FeatureAVXVNNIINT16 | FeatureSHA512 | FeatureSM3 | FeatureSM4; constexpr FeatureBitset FeaturesPantherlake = @@ -369,7 +367,7 @@ constexpr ProcInfo Processors[] = { // Sierraforest microarchitecture based processors. { {"sierraforest"}, CK_Sierraforest, FEATURE_AVX2, FeaturesSierraforest, 'p', false }, // Grandridge microarchitecture based processors. - { {"grandridge"}, CK_Grandridge, FEATURE_AVX2, FeaturesGrandridge, 'p', false }, + { {"grandridge"}, CK_Grandridge, FEATURE_AVX2, FeaturesSierraforest, 'p', false }, // Granite Rapids microarchitecture based processors. { {"graniterapids"}, CK_Graniterapids, FEATURE_AVX512BF16, FeaturesGraniteRapids, 'n', false }, // Granite Rapids D microarchitecture based processors. diff --git a/llvm/lib/TextAPI/TextStubV5.cpp b/llvm/lib/TextAPI/TextStubV5.cpp index 2f82bc03480b..aea772dbc4be 100644 --- a/llvm/lib/TextAPI/TextStubV5.cpp +++ b/llvm/lib/TextAPI/TextStubV5.cpp @@ -201,8 +201,9 @@ Expected<StubT> getRequiredValue( template <typename JsonT, typename StubT = JsonT> Expected<StubT> getRequiredValue( TBDKey Key, const Object *Obj, - std::function<std::optional<JsonT>(const Object *, StringRef)> GetValue, - StubT DefaultValue, std::function<std::optional<StubT>(JsonT)> Validate) { + std::function<std::optional<JsonT>(const Object *, StringRef)> const + GetValue, + StubT DefaultValue, function_ref<std::optional<StubT>(JsonT)> Validate) { std::optional<JsonT> Val = GetValue(Obj, Keys[Key]); if (!Val) return DefaultValue; @@ -215,7 +216,7 @@ Expected<StubT> getRequiredValue( } Error collectFromArray(TBDKey Key, const Object *Obj, - std::function<void(StringRef)> Append, + function_ref<void(StringRef)> Append, bool IsRequired = false) { const auto *Values = Obj->getArray(Keys[Key]); if (!Values) { diff --git a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp index f37b4dc938d3..529f7309a1a2 100644 --- a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp +++ b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp @@ -2951,9 +2951,11 @@ void coro::salvageDebugInfo( // dbg.declare does. if (isa<DbgDeclareInst>(DVI)) { std::optional<BasicBlock::iterator> InsertPt; - if (auto *I = dyn_cast<Instruction>(Storage)) + if (auto *I = dyn_cast<Instruction>(Storage)) { InsertPt = I->getInsertionPointAfterDef(); - else if (isa<Argument>(Storage)) + if (!OptimizeFrame && I->getDebugLoc()) + DVI.setDebugLoc(I->getDebugLoc()); + } else if (isa<Argument>(Storage)) InsertPt = F->getEntryBlock().begin(); if (InsertPt) DVI.moveBefore(*(*InsertPt)->getParent(), *InsertPt); diff --git a/llvm/lib/Transforms/IPO/FunctionAttrs.cpp b/llvm/lib/Transforms/IPO/FunctionAttrs.cpp index 7c277518b21d..7ebf265e17ba 100644 --- a/llvm/lib/Transforms/IPO/FunctionAttrs.cpp +++ b/llvm/lib/Transforms/IPO/FunctionAttrs.cpp @@ -76,6 +76,7 @@ STATISTIC(NumReadOnlyArg, "Number of arguments marked readonly"); STATISTIC(NumWriteOnlyArg, "Number of arguments marked writeonly"); STATISTIC(NumNoAlias, "Number of function returns marked noalias"); STATISTIC(NumNonNullReturn, "Number of function returns marked nonnull"); +STATISTIC(NumNoUndefReturn, "Number of function returns marked noundef"); STATISTIC(NumNoRecurse, "Number of functions marked as norecurse"); STATISTIC(NumNoUnwind, "Number of functions marked as nounwind"); STATISTIC(NumNoFree, "Number of functions marked as nofree"); @@ -1279,6 +1280,45 @@ static void addNonNullAttrs(const SCCNodeSet &SCCNodes, } } +/// Deduce noundef attributes for the SCC. +static void addNoUndefAttrs(const SCCNodeSet &SCCNodes, + SmallSet<Function *, 8> &Changed) { + // Check each function in turn, determining which functions return noundef + // values. + for (Function *F : SCCNodes) { + // Already noundef. + if (F->getAttributes().hasRetAttr(Attribute::NoUndef)) + continue; + + // We can infer and propagate function attributes only when we know that the + // definition we'll get at link time is *exactly* the definition we see now. + // For more details, see GlobalValue::mayBeDerefined. + if (!F->hasExactDefinition()) + return; + + // MemorySanitizer assumes that the definition and declaration of a + // function will be consistent. A function with sanitize_memory attribute + // should be skipped from inference. + if (F->hasFnAttribute(Attribute::SanitizeMemory)) + continue; + + if (F->getReturnType()->isVoidTy()) + continue; + + if (all_of(*F, [](BasicBlock &BB) { + if (auto *Ret = dyn_cast<ReturnInst>(BB.getTerminator())) { + // TODO: perform context-sensitive analysis? + return isGuaranteedNotToBeUndefOrPoison(Ret->getReturnValue()); + } + return true; + })) { + F->addRetAttr(Attribute::NoUndef); + ++NumNoUndefReturn; + Changed.insert(F); + } + } +} + namespace { /// Collects a set of attribute inference requests and performs them all in one @@ -1629,7 +1669,10 @@ static void addNoRecurseAttrs(const SCCNodeSet &SCCNodes, for (auto &I : BB.instructionsWithoutDebug()) if (auto *CB = dyn_cast<CallBase>(&I)) { Function *Callee = CB->getCalledFunction(); - if (!Callee || Callee == F || !Callee->doesNotRecurse()) + if (!Callee || Callee == F || + (!Callee->doesNotRecurse() && + !(Callee->isDeclaration() && + Callee->hasFnAttribute(Attribute::NoCallback)))) // Function calls a potentially recursive function. return; } @@ -1785,6 +1828,7 @@ deriveAttrsInPostOrder(ArrayRef<Function *> Functions, AARGetterT &&AARGetter, inferConvergent(Nodes.SCCNodes, Changed); addNoReturnAttrs(Nodes.SCCNodes, Changed); addWillReturn(Nodes.SCCNodes, Changed); + addNoUndefAttrs(Nodes.SCCNodes, Changed); // If we have no external nodes participating in the SCC, we can deduce some // more precise attributes as well. diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp index 719a2678fc18..556fde37efeb 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -1685,8 +1685,8 @@ Instruction *InstCombinerImpl::visitAdd(BinaryOperator &I) { assert(NotLHS != nullptr && NotRHS != nullptr && "isFreeToInvert desynced with getFreelyInverted"); Value *LHSPlusRHS = Builder.CreateAdd(NotLHS, NotRHS); - return BinaryOperator::CreateSub(ConstantInt::get(RHS->getType(), -2), - LHSPlusRHS); + return BinaryOperator::CreateSub( + ConstantInt::getSigned(RHS->getType(), -2), LHSPlusRHS); } } diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index 63b1e0f64a88..c03f50d75814 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -3513,9 +3513,13 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) { return BinaryOperator::CreateOr(Op0, C); // ((B | C) & A) | B -> B | (A & C) - if (match(Op0, m_And(m_Or(m_Specific(Op1), m_Value(C)), m_Value(A)))) + if (match(Op0, m_c_And(m_c_Or(m_Specific(Op1), m_Value(C)), m_Value(A)))) return BinaryOperator::CreateOr(Op1, Builder.CreateAnd(A, C)); + // B | ((B | C) & A) -> B | (A & C) + if (match(Op1, m_c_And(m_c_Or(m_Specific(Op0), m_Value(C)), m_Value(A)))) + return BinaryOperator::CreateOr(Op0, Builder.CreateAnd(A, C)); + if (Instruction *DeMorgan = matchDeMorgansLaws(I, *this)) return DeMorgan; @@ -3872,6 +3876,14 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) { } } + // (X & C1) | C2 -> X & (C1 | C2) iff (X & C2) == C2 + if (match(Op0, m_OneUse(m_And(m_Value(X), m_APInt(C1)))) && + match(Op1, m_APInt(C2))) { + KnownBits KnownX = computeKnownBits(X, /*Depth*/ 0, &I); + if ((KnownX.One & *C2) == *C2) + return BinaryOperator::CreateAnd(X, ConstantInt::get(Ty, *C1 | *C2)); + } + return nullptr; } diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index 3b7fe7fa2266..43d4496571be 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -3850,6 +3850,12 @@ bool InstCombinerImpl::transformConstExprCastCall(CallBase &Call) { if (Callee->hasFnAttribute("thunk")) return false; + // If this is a call to a naked function, the assembly might be + // using an argument, or otherwise rely on the frame layout, + // the function prototype will mismatch. + if (Callee->hasFnAttribute(Attribute::Naked)) + return false; + // If this is a musttail call, the callee's prototype must match the caller's // prototype with the exception of pointee types. The code below doesn't // implement that, so we can't do this transform. diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp index 289976718e52..3875e59c3ede 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -111,8 +111,8 @@ Instruction *InstCombinerImpl::foldCmpLoadFromIndexedGlobal( LoadInst *LI, GetElementPtrInst *GEP, GlobalVariable *GV, CmpInst &ICI, ConstantInt *AndCst) { if (LI->isVolatile() || LI->getType() != GEP->getResultElementType() || - GV->getValueType() != GEP->getSourceElementType() || - !GV->isConstant() || !GV->hasDefinitiveInitializer()) + GV->getValueType() != GEP->getSourceElementType() || !GV->isConstant() || + !GV->hasDefinitiveInitializer()) return nullptr; Constant *Init = GV->getInitializer(); @@ -128,8 +128,7 @@ Instruction *InstCombinerImpl::foldCmpLoadFromIndexedGlobal( // the simple index into a single-dimensional array. // // Require: GEP GV, 0, i {{, constant indices}} - if (GEP->getNumOperands() < 3 || - !isa<ConstantInt>(GEP->getOperand(1)) || + if (GEP->getNumOperands() < 3 || !isa<ConstantInt>(GEP->getOperand(1)) || !cast<ConstantInt>(GEP->getOperand(1))->isZero() || isa<Constant>(GEP->getOperand(2))) return nullptr; @@ -142,15 +141,18 @@ Instruction *InstCombinerImpl::foldCmpLoadFromIndexedGlobal( Type *EltTy = Init->getType()->getArrayElementType(); for (unsigned i = 3, e = GEP->getNumOperands(); i != e; ++i) { ConstantInt *Idx = dyn_cast<ConstantInt>(GEP->getOperand(i)); - if (!Idx) return nullptr; // Variable index. + if (!Idx) + return nullptr; // Variable index. uint64_t IdxVal = Idx->getZExtValue(); - if ((unsigned)IdxVal != IdxVal) return nullptr; // Too large array index. + if ((unsigned)IdxVal != IdxVal) + return nullptr; // Too large array index. if (StructType *STy = dyn_cast<StructType>(EltTy)) EltTy = STy->getElementType(IdxVal); else if (ArrayType *ATy = dyn_cast<ArrayType>(EltTy)) { - if (IdxVal >= ATy->getNumElements()) return nullptr; + if (IdxVal >= ATy->getNumElements()) + return nullptr; EltTy = ATy->getElementType(); } else { return nullptr; // Unknown type. @@ -191,7 +193,8 @@ Instruction *InstCombinerImpl::foldCmpLoadFromIndexedGlobal( Constant *CompareRHS = cast<Constant>(ICI.getOperand(1)); for (unsigned i = 0, e = ArrayElementCount; i != e; ++i) { Constant *Elt = Init->getAggregateElement(i); - if (!Elt) return nullptr; + if (!Elt) + return nullptr; // If this is indexing an array of structures, get the structure element. if (!LaterIndices.empty()) { @@ -214,16 +217,17 @@ Instruction *InstCombinerImpl::foldCmpLoadFromIndexedGlobal( if (isa<UndefValue>(C)) { // Extend range state machines to cover this element in case there is an // undef in the middle of the range. - if (TrueRangeEnd == (int)i-1) + if (TrueRangeEnd == (int)i - 1) TrueRangeEnd = i; - if (FalseRangeEnd == (int)i-1) + if (FalseRangeEnd == (int)i - 1) FalseRangeEnd = i; continue; } // If we can't compute the result for any of the elements, we have to give // up evaluating the entire conditional. - if (!isa<ConstantInt>(C)) return nullptr; + if (!isa<ConstantInt>(C)) + return nullptr; // Otherwise, we know if the comparison is true or false for this element, // update our state machines. @@ -233,7 +237,7 @@ Instruction *InstCombinerImpl::foldCmpLoadFromIndexedGlobal( if (IsTrueForElt) { // Update the TrueElement state machine. if (FirstTrueElement == Undefined) - FirstTrueElement = TrueRangeEnd = i; // First true element. + FirstTrueElement = TrueRangeEnd = i; // First true element. else { // Update double-compare state machine. if (SecondTrueElement == Undefined) @@ -242,7 +246,7 @@ Instruction *InstCombinerImpl::foldCmpLoadFromIndexedGlobal( SecondTrueElement = Overdefined; // Update range state machine. - if (TrueRangeEnd == (int)i-1) + if (TrueRangeEnd == (int)i - 1) TrueRangeEnd = i; else TrueRangeEnd = Overdefined; @@ -259,7 +263,7 @@ Instruction *InstCombinerImpl::foldCmpLoadFromIndexedGlobal( SecondFalseElement = Overdefined; // Update range state machine. - if (FalseRangeEnd == (int)i-1) + if (FalseRangeEnd == (int)i - 1) FalseRangeEnd = i; else FalseRangeEnd = Overdefined; @@ -348,7 +352,8 @@ Instruction *InstCombinerImpl::foldCmpLoadFromIndexedGlobal( // False for two elements -> 'i != 47 & i != 72'. Value *C1 = Builder.CreateICmpNE(Idx, FirstFalseIdx); - Value *SecondFalseIdx = ConstantInt::get(Idx->getType(),SecondFalseElement); + Value *SecondFalseIdx = + ConstantInt::get(Idx->getType(), SecondFalseElement); Value *C2 = Builder.CreateICmpNE(Idx, SecondFalseIdx); return BinaryOperator::CreateAnd(C1, C2); } @@ -365,8 +370,8 @@ Instruction *InstCombinerImpl::foldCmpLoadFromIndexedGlobal( Idx = Builder.CreateAdd(Idx, Offs); } - Value *End = ConstantInt::get(Idx->getType(), - TrueRangeEnd-FirstTrueElement+1); + Value *End = + ConstantInt::get(Idx->getType(), TrueRangeEnd - FirstTrueElement + 1); return new ICmpInst(ICmpInst::ICMP_ULT, Idx, End); } @@ -380,8 +385,8 @@ Instruction *InstCombinerImpl::foldCmpLoadFromIndexedGlobal( Idx = Builder.CreateAdd(Idx, Offs); } - Value *End = ConstantInt::get(Idx->getType(), - FalseRangeEnd-FirstFalseElement); + Value *End = + ConstantInt::get(Idx->getType(), FalseRangeEnd - FirstFalseElement); return new ICmpInst(ICmpInst::ICMP_UGT, Idx, End); } @@ -4624,27 +4629,35 @@ Instruction *InstCombinerImpl::foldICmpBinOp(ICmpInst &I, } bool NoOp0WrapProblem = false, NoOp1WrapProblem = false; - if (BO0 && isa<OverflowingBinaryOperator>(BO0)) - NoOp0WrapProblem = - ICmpInst::isEquality(Pred) || - (CmpInst::isUnsigned(Pred) && BO0->hasNoUnsignedWrap()) || - (CmpInst::isSigned(Pred) && BO0->hasNoSignedWrap()); - if (BO1 && isa<OverflowingBinaryOperator>(BO1)) - NoOp1WrapProblem = - ICmpInst::isEquality(Pred) || - (CmpInst::isUnsigned(Pred) && BO1->hasNoUnsignedWrap()) || - (CmpInst::isSigned(Pred) && BO1->hasNoSignedWrap()); - + bool Op0HasNUW = false, Op1HasNUW = false; + bool Op0HasNSW = false, Op1HasNSW = false; // Analyze the case when either Op0 or Op1 is an add instruction. // Op0 = A + B (or A and B are null); Op1 = C + D (or C and D are null). + auto hasNoWrapProblem = [](const BinaryOperator &BO, CmpInst::Predicate Pred, + bool &HasNSW, bool &HasNUW) -> bool { + if (isa<OverflowingBinaryOperator>(BO)) { + HasNUW = BO.hasNoUnsignedWrap(); + HasNSW = BO.hasNoSignedWrap(); + return ICmpInst::isEquality(Pred) || + (CmpInst::isUnsigned(Pred) && HasNUW) || + (CmpInst::isSigned(Pred) && HasNSW); + } else if (BO.getOpcode() == Instruction::Or) { + HasNUW = true; + HasNSW = true; + return true; + } else { + return false; + } + }; Value *A = nullptr, *B = nullptr, *C = nullptr, *D = nullptr; - if (BO0 && BO0->getOpcode() == Instruction::Add) { - A = BO0->getOperand(0); - B = BO0->getOperand(1); + + if (BO0) { + match(BO0, m_AddLike(m_Value(A), m_Value(B))); + NoOp0WrapProblem = hasNoWrapProblem(*BO0, Pred, Op0HasNSW, Op0HasNUW); } - if (BO1 && BO1->getOpcode() == Instruction::Add) { - C = BO1->getOperand(0); - D = BO1->getOperand(1); + if (BO1) { + match(BO1, m_AddLike(m_Value(C), m_Value(D))); + NoOp1WrapProblem = hasNoWrapProblem(*BO1, Pred, Op1HasNSW, Op1HasNUW); } // icmp (A+B), A -> icmp B, 0 for equalities or if there is no overflow. @@ -4764,17 +4777,15 @@ Instruction *InstCombinerImpl::foldICmpBinOp(ICmpInst &I, APInt AP2Abs = AP2->abs(); if (AP1Abs.uge(AP2Abs)) { APInt Diff = *AP1 - *AP2; - bool HasNUW = BO0->hasNoUnsignedWrap() && Diff.ule(*AP1); - bool HasNSW = BO0->hasNoSignedWrap(); Constant *C3 = Constant::getIntegerValue(BO0->getType(), Diff); - Value *NewAdd = Builder.CreateAdd(A, C3, "", HasNUW, HasNSW); + Value *NewAdd = Builder.CreateAdd( + A, C3, "", Op0HasNUW && Diff.ule(*AP1), Op0HasNSW); return new ICmpInst(Pred, NewAdd, C); } else { APInt Diff = *AP2 - *AP1; - bool HasNUW = BO1->hasNoUnsignedWrap() && Diff.ule(*AP2); - bool HasNSW = BO1->hasNoSignedWrap(); Constant *C3 = Constant::getIntegerValue(BO0->getType(), Diff); - Value *NewAdd = Builder.CreateAdd(C, C3, "", HasNUW, HasNSW); + Value *NewAdd = Builder.CreateAdd( + C, C3, "", Op1HasNUW && Diff.ule(*AP2), Op1HasNSW); return new ICmpInst(Pred, A, NewAdd); } } @@ -4868,16 +4879,14 @@ Instruction *InstCombinerImpl::foldICmpBinOp(ICmpInst &I, isKnownNonZero(Z, Q.DL, /*Depth=*/0, Q.AC, Q.CxtI, Q.DT); // if Z != 0 and nsw(X * Z) and nsw(Y * Z) // X * Z eq/ne Y * Z -> X eq/ne Y - if (NonZero && BO0 && BO1 && BO0->hasNoSignedWrap() && - BO1->hasNoSignedWrap()) + if (NonZero && BO0 && BO1 && Op0HasNSW && Op1HasNSW) return new ICmpInst(Pred, X, Y); } else NonZero = isKnownNonZero(Z, Q.DL, /*Depth=*/0, Q.AC, Q.CxtI, Q.DT); // If Z != 0 and nuw(X * Z) and nuw(Y * Z) // X * Z u{lt/le/gt/ge}/eq/ne Y * Z -> X u{lt/le/gt/ge}/eq/ne Y - if (NonZero && BO0 && BO1 && BO0->hasNoUnsignedWrap() && - BO1->hasNoUnsignedWrap()) + if (NonZero && BO0 && BO1 && Op0HasNUW && Op1HasNUW) return new ICmpInst(Pred, X, Y); } } @@ -4966,7 +4975,8 @@ Instruction *InstCombinerImpl::foldICmpBinOp(ICmpInst &I, return new ICmpInst(Pred, BO0->getOperand(0), BO1->getOperand(0)); case Instruction::SDiv: - if (!I.isEquality() || !BO0->isExact() || !BO1->isExact()) + if (!(I.isEquality() || match(BO0->getOperand(1), m_NonNegative())) || + !BO0->isExact() || !BO1->isExact()) break; return new ICmpInst(Pred, BO0->getOperand(0), BO1->getOperand(0)); @@ -4976,8 +4986,8 @@ Instruction *InstCombinerImpl::foldICmpBinOp(ICmpInst &I, return new ICmpInst(Pred, BO0->getOperand(0), BO1->getOperand(0)); case Instruction::Shl: { - bool NUW = BO0->hasNoUnsignedWrap() && BO1->hasNoUnsignedWrap(); - bool NSW = BO0->hasNoSignedWrap() && BO1->hasNoSignedWrap(); + bool NUW = Op0HasNUW && Op1HasNUW; + bool NSW = Op0HasNSW && Op1HasNSW; if (!NUW && !NSW) break; if (!NSW && I.isSigned()) @@ -5029,10 +5039,10 @@ Instruction *InstCombinerImpl::foldICmpBinOp(ICmpInst &I, } /// Fold icmp Pred min|max(X, Y), Z. -Instruction * -InstCombinerImpl::foldICmpWithMinMaxImpl(Instruction &I, - MinMaxIntrinsic *MinMax, Value *Z, - ICmpInst::Predicate Pred) { +Instruction *InstCombinerImpl::foldICmpWithMinMax(Instruction &I, + MinMaxIntrinsic *MinMax, + Value *Z, + ICmpInst::Predicate Pred) { Value *X = MinMax->getLHS(); Value *Y = MinMax->getRHS(); if (ICmpInst::isSigned(Pred) && !MinMax->isSigned()) @@ -5161,24 +5171,6 @@ InstCombinerImpl::foldICmpWithMinMaxImpl(Instruction &I, return nullptr; } -Instruction *InstCombinerImpl::foldICmpWithMinMax(ICmpInst &Cmp) { - ICmpInst::Predicate Pred = Cmp.getPredicate(); - Value *Lhs = Cmp.getOperand(0); - Value *Rhs = Cmp.getOperand(1); - - if (MinMaxIntrinsic *MinMax = dyn_cast<MinMaxIntrinsic>(Lhs)) { - if (Instruction *Res = foldICmpWithMinMaxImpl(Cmp, MinMax, Rhs, Pred)) - return Res; - } - - if (MinMaxIntrinsic *MinMax = dyn_cast<MinMaxIntrinsic>(Rhs)) { - if (Instruction *Res = foldICmpWithMinMaxImpl( - Cmp, MinMax, Lhs, ICmpInst::getSwappedPredicate(Pred))) - return Res; - } - - return nullptr; -} // Canonicalize checking for a power-of-2-or-zero value: static Instruction *foldICmpPow2Test(ICmpInst &I, @@ -6843,6 +6835,34 @@ static Instruction *foldReductionIdiom(ICmpInst &I, return nullptr; } +// This helper will be called with icmp operands in both orders. +Instruction *InstCombinerImpl::foldICmpCommutative(ICmpInst::Predicate Pred, + Value *Op0, Value *Op1, + ICmpInst &CxtI) { + // Try to optimize 'icmp GEP, P' or 'icmp P, GEP'. + if (auto *GEP = dyn_cast<GEPOperator>(Op0)) + if (Instruction *NI = foldGEPICmp(GEP, Op1, Pred, CxtI)) + return NI; + + if (auto *SI = dyn_cast<SelectInst>(Op0)) + if (Instruction *NI = foldSelectICmp(Pred, SI, Op1, CxtI)) + return NI; + + if (auto *MinMax = dyn_cast<MinMaxIntrinsic>(Op0)) + if (Instruction *Res = foldICmpWithMinMax(CxtI, MinMax, Op1, Pred)) + return Res; + + { + Value *X; + const APInt *C; + // icmp X+Cst, X + if (match(Op0, m_Add(m_Value(X), m_APInt(C))) && Op1 == X) + return foldICmpAddOpConst(X, *C, Pred); + } + + return nullptr; +} + Instruction *InstCombinerImpl::visitICmpInst(ICmpInst &I) { bool Changed = false; const SimplifyQuery Q = SQ.getWithInstruction(&I); @@ -6966,20 +6986,11 @@ Instruction *InstCombinerImpl::visitICmpInst(ICmpInst &I) { if (Instruction *Res = foldICmpInstWithConstantNotInt(I)) return Res; - // Try to optimize 'icmp GEP, P' or 'icmp P, GEP'. - if (auto *GEP = dyn_cast<GEPOperator>(Op0)) - if (Instruction *NI = foldGEPICmp(GEP, Op1, I.getPredicate(), I)) - return NI; - if (auto *GEP = dyn_cast<GEPOperator>(Op1)) - if (Instruction *NI = foldGEPICmp(GEP, Op0, I.getSwappedPredicate(), I)) - return NI; - - if (auto *SI = dyn_cast<SelectInst>(Op0)) - if (Instruction *NI = foldSelectICmp(I.getPredicate(), SI, Op1, I)) - return NI; - if (auto *SI = dyn_cast<SelectInst>(Op1)) - if (Instruction *NI = foldSelectICmp(I.getSwappedPredicate(), SI, Op0, I)) - return NI; + if (Instruction *Res = foldICmpCommutative(I.getPredicate(), Op0, Op1, I)) + return Res; + if (Instruction *Res = + foldICmpCommutative(I.getSwappedPredicate(), Op1, Op0, I)) + return Res; // In case of a comparison with two select instructions having the same // condition, check whether one of the resulting branches can be simplified. @@ -7030,9 +7041,6 @@ Instruction *InstCombinerImpl::visitICmpInst(ICmpInst &I) { if (Instruction *R = foldICmpWithCastOp(I)) return R; - if (Instruction *Res = foldICmpWithMinMax(I)) - return Res; - { Value *X, *Y; // Transform (X & ~Y) == 0 --> (X & Y) != 0 @@ -7134,18 +7142,6 @@ Instruction *InstCombinerImpl::visitICmpInst(ICmpInst &I) { !ACXI->isWeak()) return ExtractValueInst::Create(ACXI, 1); - { - Value *X; - const APInt *C; - // icmp X+Cst, X - if (match(Op0, m_Add(m_Value(X), m_APInt(C))) && Op1 == X) - return foldICmpAddOpConst(X, *C, I.getPredicate()); - - // icmp X, X+Cst - if (match(Op1, m_Add(m_Value(X), m_APInt(C))) && Op0 == X) - return foldICmpAddOpConst(X, *C, I.getSwappedPredicate()); - } - if (Instruction *Res = foldICmpWithHighBitMask(I, Builder)) return Res; diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h index 9e76a0cf17b1..bdaf7550b4b4 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h +++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h @@ -648,9 +648,8 @@ public: Instruction *foldICmpInstWithConstantAllowUndef(ICmpInst &Cmp, const APInt &C); Instruction *foldICmpBinOp(ICmpInst &Cmp, const SimplifyQuery &SQ); - Instruction *foldICmpWithMinMaxImpl(Instruction &I, MinMaxIntrinsic *MinMax, - Value *Z, ICmpInst::Predicate Pred); - Instruction *foldICmpWithMinMax(ICmpInst &Cmp); + Instruction *foldICmpWithMinMax(Instruction &I, MinMaxIntrinsic *MinMax, + Value *Z, ICmpInst::Predicate Pred); Instruction *foldICmpEquality(ICmpInst &Cmp); Instruction *foldIRemByPowerOfTwoToBitTest(ICmpInst &I); Instruction *foldSignBitTest(ICmpInst &I); @@ -708,6 +707,8 @@ public: const APInt &C); Instruction *foldICmpBitCast(ICmpInst &Cmp); Instruction *foldICmpWithTrunc(ICmpInst &Cmp); + Instruction *foldICmpCommutative(ICmpInst::Predicate Pred, Value *Op0, + Value *Op1, ICmpInst &CxtI); // Helpers of visitSelectInst(). Instruction *foldSelectOfBools(SelectInst &SI); diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp index 20bf00344b14..ab55f235920a 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -1171,14 +1171,15 @@ static Value *foldSelectCttzCtlz(ICmpInst *ICI, Value *TrueVal, Value *FalseVal, return nullptr; } -static Instruction *canonicalizeSPF(SelectInst &Sel, ICmpInst &Cmp, - InstCombinerImpl &IC) { +static Value *canonicalizeSPF(ICmpInst &Cmp, Value *TrueVal, Value *FalseVal, + InstCombinerImpl &IC) { Value *LHS, *RHS; // TODO: What to do with pointer min/max patterns? - if (!Sel.getType()->isIntOrIntVectorTy()) + if (!TrueVal->getType()->isIntOrIntVectorTy()) return nullptr; - SelectPatternFlavor SPF = matchSelectPattern(&Sel, LHS, RHS).Flavor; + SelectPatternFlavor SPF = + matchDecomposedSelectPattern(&Cmp, TrueVal, FalseVal, LHS, RHS).Flavor; if (SPF == SelectPatternFlavor::SPF_ABS || SPF == SelectPatternFlavor::SPF_NABS) { if (!Cmp.hasOneUse() && !RHS->hasOneUse()) @@ -1188,13 +1189,13 @@ static Instruction *canonicalizeSPF(SelectInst &Sel, ICmpInst &Cmp, bool IntMinIsPoison = SPF == SelectPatternFlavor::SPF_ABS && match(RHS, m_NSWNeg(m_Specific(LHS))); Constant *IntMinIsPoisonC = - ConstantInt::get(Type::getInt1Ty(Sel.getContext()), IntMinIsPoison); + ConstantInt::get(Type::getInt1Ty(Cmp.getContext()), IntMinIsPoison); Instruction *Abs = IC.Builder.CreateBinaryIntrinsic(Intrinsic::abs, LHS, IntMinIsPoisonC); if (SPF == SelectPatternFlavor::SPF_NABS) - return BinaryOperator::CreateNeg(Abs); // Always without NSW flag! - return IC.replaceInstUsesWith(Sel, Abs); + return IC.Builder.CreateNeg(Abs); // Always without NSW flag! + return Abs; } if (SelectPatternResult::isMinOrMax(SPF)) { @@ -1215,8 +1216,7 @@ static Instruction *canonicalizeSPF(SelectInst &Sel, ICmpInst &Cmp, default: llvm_unreachable("Unexpected SPF"); } - return IC.replaceInstUsesWith( - Sel, IC.Builder.CreateBinaryIntrinsic(IntrinsicID, LHS, RHS)); + return IC.Builder.CreateBinaryIntrinsic(IntrinsicID, LHS, RHS); } return nullptr; @@ -1677,8 +1677,9 @@ Instruction *InstCombinerImpl::foldSelectInstWithICmp(SelectInst &SI, if (Instruction *NewSel = foldSelectValueEquivalence(SI, *ICI)) return NewSel; - if (Instruction *NewSPF = canonicalizeSPF(SI, *ICI, *this)) - return NewSPF; + if (Value *V = + canonicalizeSPF(*ICI, SI.getTrueValue(), SI.getFalseValue(), *this)) + return replaceInstUsesWith(SI, V); if (Value *V = foldSelectInstWithICmpConst(SI, ICI, Builder)) return replaceInstUsesWith(SI, V); @@ -2363,6 +2364,9 @@ static Instruction *foldSelectToCopysign(SelectInst &Sel, Value *FVal = Sel.getFalseValue(); Type *SelType = Sel.getType(); + if (ICmpInst::makeCmpResultType(TVal->getType()) != Cond->getType()) + return nullptr; + // Match select ?, TC, FC where the constants are equal but negated. // TODO: Generalize to handle a negated variable operand? const APFloat *TC, *FC; @@ -3790,5 +3794,50 @@ Instruction *InstCombinerImpl::visitSelectInst(SelectInst &SI) { if (Instruction *I = foldBitCeil(SI, Builder)) return I; + // Fold: + // (select A && B, T, F) -> (select A, (select B, T, F), F) + // (select A || B, T, F) -> (select A, T, (select B, T, F)) + // if (select B, T, F) is foldable. + // TODO: preserve FMF flags + auto FoldSelectWithAndOrCond = [&](bool IsAnd, Value *A, + Value *B) -> Instruction * { + if (Value *V = simplifySelectInst(B, TrueVal, FalseVal, + SQ.getWithInstruction(&SI))) + return SelectInst::Create(A, IsAnd ? V : TrueVal, IsAnd ? FalseVal : V); + + // Is (select B, T, F) a SPF? + if (CondVal->hasOneUse() && SelType->isIntOrIntVectorTy()) { + if (ICmpInst *Cmp = dyn_cast<ICmpInst>(B)) + if (Value *V = canonicalizeSPF(*Cmp, TrueVal, FalseVal, *this)) + return SelectInst::Create(A, IsAnd ? V : TrueVal, + IsAnd ? FalseVal : V); + } + + return nullptr; + }; + + Value *LHS, *RHS; + if (match(CondVal, m_And(m_Value(LHS), m_Value(RHS)))) { + if (Instruction *I = FoldSelectWithAndOrCond(/*IsAnd*/ true, LHS, RHS)) + return I; + if (Instruction *I = FoldSelectWithAndOrCond(/*IsAnd*/ true, RHS, LHS)) + return I; + } else if (match(CondVal, m_Or(m_Value(LHS), m_Value(RHS)))) { + if (Instruction *I = FoldSelectWithAndOrCond(/*IsAnd*/ false, LHS, RHS)) + return I; + if (Instruction *I = FoldSelectWithAndOrCond(/*IsAnd*/ false, RHS, LHS)) + return I; + } else { + // We cannot swap the operands of logical and/or. + // TODO: Can we swap the operands by inserting a freeze? + if (match(CondVal, m_LogicalAnd(m_Value(LHS), m_Value(RHS)))) { + if (Instruction *I = FoldSelectWithAndOrCond(/*IsAnd*/ true, LHS, RHS)) + return I; + } else if (match(CondVal, m_LogicalOr(m_Value(LHS), m_Value(RHS)))) { + if (Instruction *I = FoldSelectWithAndOrCond(/*IsAnd*/ false, LHS, RHS)) + return I; + } + } + return nullptr; } diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp index 7f5a7b666903..351fc3b0174f 100644 --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -2469,31 +2469,43 @@ Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) { DL.getIndexSizeInBits(AS)) { uint64_t TyAllocSize = DL.getTypeAllocSize(GEPEltType).getFixedValue(); - bool Matched = false; - uint64_t C; - Value *V = nullptr; if (TyAllocSize == 1) { - V = GEP.getOperand(1); - Matched = true; - } else if (match(GEP.getOperand(1), - m_AShr(m_Value(V), m_ConstantInt(C)))) { - if (TyAllocSize == 1ULL << C) - Matched = true; - } else if (match(GEP.getOperand(1), - m_SDiv(m_Value(V), m_ConstantInt(C)))) { - if (TyAllocSize == C) - Matched = true; + // Canonicalize (gep i8* X, (ptrtoint Y)-(ptrtoint X)) to (bitcast Y), + // but only if the result pointer is only used as if it were an integer, + // or both point to the same underlying object (otherwise provenance is + // not necessarily retained). + Value *X = GEP.getPointerOperand(); + Value *Y; + if (match(GEP.getOperand(1), + m_Sub(m_PtrToInt(m_Value(Y)), m_PtrToInt(m_Specific(X)))) && + GEPType == Y->getType()) { + bool HasSameUnderlyingObject = + getUnderlyingObject(X) == getUnderlyingObject(Y); + bool Changed = false; + GEP.replaceUsesWithIf(Y, [&](Use &U) { + bool ShouldReplace = HasSameUnderlyingObject || + isa<ICmpInst>(U.getUser()) || + isa<PtrToIntInst>(U.getUser()); + Changed |= ShouldReplace; + return ShouldReplace; + }); + return Changed ? &GEP : nullptr; + } + } else { + // Canonicalize (gep T* X, V / sizeof(T)) to (gep i8* X, V) + Value *V; + if ((has_single_bit(TyAllocSize) && + match(GEP.getOperand(1), + m_Exact(m_AShr(m_Value(V), + m_SpecificInt(countr_zero(TyAllocSize)))))) || + match(GEP.getOperand(1), + m_Exact(m_SDiv(m_Value(V), m_SpecificInt(TyAllocSize))))) { + GetElementPtrInst *NewGEP = GetElementPtrInst::Create( + Builder.getInt8Ty(), GEP.getPointerOperand(), V); + NewGEP->setIsInBounds(GEP.isInBounds()); + return NewGEP; + } } - - // Canonicalize (gep i8* X, (ptrtoint Y)-(ptrtoint X)) to (bitcast Y), but - // only if both point to the same underlying object (otherwise provenance - // is not necessarily retained). - Value *Y; - Value *X = GEP.getOperand(0); - if (Matched && - match(V, m_Sub(m_PtrToInt(m_Value(Y)), m_PtrToInt(m_Specific(X)))) && - getUnderlyingObject(X) == getUnderlyingObject(Y)) - return CastInst::CreatePointerBitCastOrAddrSpaceCast(Y, GEPType); } } // We do not handle pointer-vector geps here. diff --git a/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp b/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp index 899d7e0a11e6..06c87bd6dc37 100644 --- a/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp +++ b/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp @@ -366,6 +366,13 @@ struct Decomposition { append_range(Vars, Other.Vars); } + void sub(const Decomposition &Other) { + Decomposition Tmp = Other; + Tmp.mul(-1); + add(Tmp.Offset); + append_range(Vars, Tmp.Vars); + } + void mul(int64_t Factor) { Offset = multiplyWithOverflow(Offset, Factor); for (auto &Var : Vars) @@ -569,10 +576,12 @@ static Decomposition decompose(Value *V, return Result; } - if (match(V, m_NUWSub(m_Value(Op0), m_ConstantInt(CI))) && canUseSExt(CI)) - return {-1 * CI->getSExtValue(), {{1, Op0}}}; - if (match(V, m_NUWSub(m_Value(Op0), m_Value(Op1)))) - return {0, {{1, Op0}, {-1, Op1}}}; + if (match(V, m_NUWSub(m_Value(Op0), m_Value(Op1)))) { + auto ResA = decompose(Op0, Preconditions, IsSigned, DL); + auto ResB = decompose(Op1, Preconditions, IsSigned, DL); + ResA.sub(ResB); + return ResA; + } return {V, IsKnownNonNegative}; } @@ -1010,22 +1019,14 @@ void State::addInfoFor(BasicBlock &BB) { continue; } - if (match(&I, m_Intrinsic<Intrinsic::ssub_with_overflow>())) { - WorkList.push_back( - FactOrCheck::getCheck(DT.getNode(&BB), cast<CallInst>(&I))); - continue; - } - - if (isa<MinMaxIntrinsic>(&I)) { - WorkList.push_back(FactOrCheck::getInstFact(DT.getNode(&BB), &I)); - continue; - } - - Value *A, *B; - CmpInst::Predicate Pred; - // For now, just handle assumes with a single compare as condition. - if (match(&I, m_Intrinsic<Intrinsic::assume>( - m_ICmp(Pred, m_Value(A), m_Value(B))))) { + auto *II = dyn_cast<IntrinsicInst>(&I); + Intrinsic::ID ID = II ? II->getIntrinsicID() : Intrinsic::not_intrinsic; + switch (ID) { + case Intrinsic::assume: { + Value *A, *B; + CmpInst::Predicate Pred; + if (!match(I.getOperand(0), m_ICmp(Pred, m_Value(A), m_Value(B)))) + break; if (GuaranteedToExecute) { // The assume is guaranteed to execute when BB is entered, hence Cond // holds on entry to BB. @@ -1035,7 +1036,23 @@ void State::addInfoFor(BasicBlock &BB) { WorkList.emplace_back( FactOrCheck::getInstFact(DT.getNode(I.getParent()), &I)); } + break; + } + // Enqueue ssub_with_overflow for simplification. + case Intrinsic::ssub_with_overflow: + WorkList.push_back( + FactOrCheck::getCheck(DT.getNode(&BB), cast<CallInst>(&I))); + break; + // Enqueue the intrinsics to add extra info. + case Intrinsic::abs: + case Intrinsic::umin: + case Intrinsic::umax: + case Intrinsic::smin: + case Intrinsic::smax: + WorkList.push_back(FactOrCheck::getInstFact(DT.getNode(&BB), &I)); + break; } + GuaranteedToExecute &= isGuaranteedToTransferExecutionToSuccessor(&I); } @@ -1693,6 +1710,13 @@ static bool eliminateConstraints(Function &F, DominatorTree &DT, LoopInfo &LI, ICmpInst::Predicate Pred; if (!CB.isConditionFact()) { + Value *X; + if (match(CB.Inst, m_Intrinsic<Intrinsic::abs>(m_Value(X)))) { + // TODO: Add CB.Inst >= 0 fact. + AddFact(CmpInst::ICMP_SGE, CB.Inst, X); + continue; + } + if (auto *MinMax = dyn_cast<MinMaxIntrinsic>(CB.Inst)) { Pred = ICmpInst::getNonStrictPredicate(MinMax->getPredicate()); AddFact(Pred, MinMax, MinMax->getLHS()); diff --git a/llvm/lib/Transforms/Utils/CanonicalizeFreezeInLoops.cpp b/llvm/lib/Transforms/Utils/CanonicalizeFreezeInLoops.cpp index fb4d82885377..282c44563466 100644 --- a/llvm/lib/Transforms/Utils/CanonicalizeFreezeInLoops.cpp +++ b/llvm/lib/Transforms/Utils/CanonicalizeFreezeInLoops.cpp @@ -29,9 +29,10 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Utils/CanonicalizeFreezeInLoops.h" +#include "llvm/ADT/DenseMapInfo.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallSet.h" -#include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/IVDescriptors.h" #include "llvm/Analysis/LoopAnalysisManager.h" #include "llvm/Analysis/LoopInfo.h" @@ -66,19 +67,6 @@ class CanonicalizeFreezeInLoopsImpl { ScalarEvolution &SE; DominatorTree &DT; - struct FrozenIndPHIInfo { - // A freeze instruction that uses an induction phi - FreezeInst *FI = nullptr; - // The induction phi, step instruction, the operand idx of StepInst which is - // a step value - PHINode *PHI; - BinaryOperator *StepInst; - unsigned StepValIdx = 0; - - FrozenIndPHIInfo(PHINode *PHI, BinaryOperator *StepInst) - : PHI(PHI), StepInst(StepInst) {} - }; - // Can freeze instruction be pushed into operands of I? // In order to do this, I should not create a poison after I's flags are // stripped. @@ -99,6 +87,46 @@ public: } // anonymous namespace +namespace llvm { + +struct FrozenIndPHIInfo { + // A freeze instruction that uses an induction phi + FreezeInst *FI = nullptr; + // The induction phi, step instruction, the operand idx of StepInst which is + // a step value + PHINode *PHI; + BinaryOperator *StepInst; + unsigned StepValIdx = 0; + + FrozenIndPHIInfo(PHINode *PHI, BinaryOperator *StepInst) + : PHI(PHI), StepInst(StepInst) {} + + bool operator==(const FrozenIndPHIInfo &Other) { return FI == Other.FI; } +}; + +template <> struct DenseMapInfo<FrozenIndPHIInfo> { + static inline FrozenIndPHIInfo getEmptyKey() { + return FrozenIndPHIInfo(DenseMapInfo<PHINode *>::getEmptyKey(), + DenseMapInfo<BinaryOperator *>::getEmptyKey()); + } + + static inline FrozenIndPHIInfo getTombstoneKey() { + return FrozenIndPHIInfo(DenseMapInfo<PHINode *>::getTombstoneKey(), + DenseMapInfo<BinaryOperator *>::getTombstoneKey()); + } + + static unsigned getHashValue(const FrozenIndPHIInfo &Val) { + return DenseMapInfo<FreezeInst *>::getHashValue(Val.FI); + }; + + static bool isEqual(const FrozenIndPHIInfo &LHS, + const FrozenIndPHIInfo &RHS) { + return LHS.FI == RHS.FI; + }; +}; + +} // end namespace llvm + // Given U = (value, user), replace value with freeze(value), and let // SCEV forget user. The inserted freeze is placed in the preheader. void CanonicalizeFreezeInLoopsImpl::InsertFreezeAndForgetFromSCEV(Use &U) { @@ -126,7 +154,7 @@ bool CanonicalizeFreezeInLoopsImpl::run() { if (!L->isLoopSimplifyForm()) return false; - SmallVector<FrozenIndPHIInfo, 4> Candidates; + SmallSetVector<FrozenIndPHIInfo, 4> Candidates; for (auto &PHI : L->getHeader()->phis()) { InductionDescriptor ID; @@ -155,7 +183,7 @@ bool CanonicalizeFreezeInLoopsImpl::run() { if (auto *FI = dyn_cast<FreezeInst>(U)) { LLVM_DEBUG(dbgs() << "canonfr: found: " << *FI << "\n"); Info.FI = FI; - Candidates.push_back(Info); + Candidates.insert(Info); } }; for_each(PHI.users(), Visit); diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp index a758fb306982..c76cc9db16d7 100644 --- a/llvm/lib/Transforms/Utils/Local.cpp +++ b/llvm/lib/Transforms/Utils/Local.cpp @@ -3593,8 +3593,9 @@ DIExpression *llvm::getExpressionForConstant(DIBuilder &DIB, const Constant &C, if (isa<ConstantInt>(C)) return createIntegerExpression(C); - if (Ty.isFloatTy() || Ty.isDoubleTy()) { - const APFloat &APF = cast<ConstantFP>(&C)->getValueAPF(); + auto *FP = dyn_cast<ConstantFP>(&C); + if (FP && (Ty.isFloatTy() || Ty.isDoubleTy())) { + const APFloat &APF = FP->getValueAPF(); return DIB.createConstantValueExpression( APF.bitcastToAPInt().getZExtValue()); } diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index f82e161fb846..8e135d80f4f2 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -8174,13 +8174,20 @@ VPRecipeBase *VPRecipeBuilder::tryToWidenMemory(Instruction *I, bool Consecutive = Reverse || Decision == LoopVectorizationCostModel::CM_Widen; + VPValue *Ptr = isa<LoadInst>(I) ? Operands[0] : Operands[1]; + if (Consecutive) { + auto *VectorPtr = new VPVectorPointerRecipe(Ptr, getLoadStoreType(I), + Reverse, I->getDebugLoc()); + Builder.getInsertBlock()->appendRecipe(VectorPtr); + Ptr = VectorPtr; + } if (LoadInst *Load = dyn_cast<LoadInst>(I)) - return new VPWidenMemoryInstructionRecipe(*Load, Operands[0], Mask, - Consecutive, Reverse); + return new VPWidenMemoryInstructionRecipe(*Load, Ptr, Mask, Consecutive, + Reverse); StoreInst *Store = cast<StoreInst>(I); - return new VPWidenMemoryInstructionRecipe(*Store, Operands[1], Operands[0], - Mask, Consecutive, Reverse); + return new VPWidenMemoryInstructionRecipe(*Store, Ptr, Operands[0], Mask, + Consecutive, Reverse); } /// Creates a VPWidenIntOrFpInductionRecpipe for \p Phi. If needed, it will also @@ -9475,8 +9482,8 @@ void VPWidenMemoryInstructionRecipe::execute(VPTransformState &State) { InnerLoopVectorizer::VectorParts BlockInMaskParts(State.UF); bool isMaskRequired = getMask(); if (isMaskRequired) { - // Mask reversal is only neede for non-all-one (null) masks, as reverse of a - // null all-one mask is a null mask. + // Mask reversal is only needed for non-all-one (null) masks, as reverse of + // a null all-one mask is a null mask. for (unsigned Part = 0; Part < State.UF; ++Part) { Value *Mask = State.get(getMask(), Part); if (isReverse()) @@ -9485,44 +9492,6 @@ void VPWidenMemoryInstructionRecipe::execute(VPTransformState &State) { } } - const auto CreateVecPtr = [&](unsigned Part, Value *Ptr) -> Value * { - // Calculate the pointer for the specific unroll-part. - Value *PartPtr = nullptr; - - // Use i32 for the gep index type when the value is constant, - // or query DataLayout for a more suitable index type otherwise. - const DataLayout &DL = - Builder.GetInsertBlock()->getModule()->getDataLayout(); - Type *IndexTy = State.VF.isScalable() && (isReverse() || Part > 0) - ? DL.getIndexType(PointerType::getUnqual( - ScalarDataTy->getContext())) - : Builder.getInt32Ty(); - bool InBounds = false; - if (auto *gep = dyn_cast<GetElementPtrInst>(Ptr->stripPointerCasts())) - InBounds = gep->isInBounds(); - if (isReverse()) { - // If the address is consecutive but reversed, then the - // wide store needs to start at the last vector element. - // RunTimeVF = VScale * VF.getKnownMinValue() - // For fixed-width VScale is 1, then RunTimeVF = VF.getKnownMinValue() - Value *RunTimeVF = getRuntimeVF(Builder, IndexTy, State.VF); - // NumElt = -Part * RunTimeVF - Value *NumElt = - Builder.CreateMul(ConstantInt::get(IndexTy, -(int64_t)Part), RunTimeVF); - // LastLane = 1 - RunTimeVF - Value *LastLane = - Builder.CreateSub(ConstantInt::get(IndexTy, 1), RunTimeVF); - PartPtr = Builder.CreateGEP(ScalarDataTy, Ptr, NumElt, "", InBounds); - PartPtr = - Builder.CreateGEP(ScalarDataTy, PartPtr, LastLane, "", InBounds); - } else { - Value *Increment = createStepForVF(Builder, IndexTy, State.VF, Part); - PartPtr = Builder.CreateGEP(ScalarDataTy, Ptr, Increment, "", InBounds); - } - - return PartPtr; - }; - // Handle Stores: if (SI) { State.setDebugLocFrom(SI->getDebugLoc()); @@ -9543,8 +9512,7 @@ void VPWidenMemoryInstructionRecipe::execute(VPTransformState &State) { // We don't want to update the value in the map as it might be used in // another expression. So don't call resetVectorValue(StoredVal). } - auto *VecPtr = - CreateVecPtr(Part, State.get(getAddr(), VPIteration(0, 0))); + auto *VecPtr = State.get(getAddr(), Part); if (isMaskRequired) NewSI = Builder.CreateMaskedStore(StoredVal, VecPtr, Alignment, BlockInMaskParts[Part]); @@ -9568,8 +9536,7 @@ void VPWidenMemoryInstructionRecipe::execute(VPTransformState &State) { nullptr, "wide.masked.gather"); State.addMetadata(NewLI, LI); } else { - auto *VecPtr = - CreateVecPtr(Part, State.get(getAddr(), VPIteration(0, 0))); + auto *VecPtr = State.get(getAddr(), Part); if (isMaskRequired) NewLI = Builder.CreateMaskedLoad( DataTy, VecPtr, Alignment, BlockInMaskParts[Part], diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 32913b3f5569..304991526064 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -4925,36 +4925,34 @@ void BoUpSLP::buildExternalUses( LLVM_DEBUG(dbgs() << "SLP: Checking user:" << *U << ".\n"); Instruction *UserInst = dyn_cast<Instruction>(U); - if (!UserInst) + if (!UserInst || isDeleted(UserInst)) continue; - if (isDeleted(UserInst)) + // Ignore users in the user ignore list. + if (UserIgnoreList && UserIgnoreList->contains(UserInst)) continue; // Skip in-tree scalars that become vectors if (TreeEntry *UseEntry = getTreeEntry(U)) { - Value *UseScalar = UseEntry->Scalars[0]; // Some in-tree scalars will remain as scalar in vectorized - // instructions. If that is the case, the one in Lane 0 will + // instructions. If that is the case, the one in FoundLane will // be used. - if (UseScalar != U || - UseEntry->State == TreeEntry::ScatterVectorize || + if (UseEntry->State == TreeEntry::ScatterVectorize || UseEntry->State == TreeEntry::PossibleStridedVectorize || - !doesInTreeUserNeedToExtract(Scalar, UserInst, TLI)) { + !doesInTreeUserNeedToExtract( + Scalar, cast<Instruction>(UseEntry->Scalars.front()), TLI)) { LLVM_DEBUG(dbgs() << "SLP: \tInternal user will be removed:" << *U << ".\n"); assert(UseEntry->State != TreeEntry::NeedToGather && "Bad state"); continue; } + U = nullptr; } - // Ignore users in the user ignore list. - if (UserIgnoreList && UserIgnoreList->contains(UserInst)) - continue; - - LLVM_DEBUG(dbgs() << "SLP: Need to extract:" << *U << " from lane " - << Lane << " from " << *Scalar << ".\n"); - ExternalUses.push_back(ExternalUser(Scalar, U, FoundLane)); + LLVM_DEBUG(dbgs() << "SLP: Need to extract:" << *UserInst + << " from lane " << Lane << " from " << *Scalar + << ".\n"); + ExternalUses.emplace_back(Scalar, U, FoundLane); } } } @@ -8384,6 +8382,8 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals, (void)E; return TTI->getInstructionCost(VI, CostKind); }; + // FIXME: Workaround for syntax error reported by MSVC buildbots. + TargetTransformInfo &TTIRef = *TTI; // Need to clear CommonCost since the final shuffle cost is included into // vector cost. auto GetVectorCost = [&](InstructionCost) { @@ -8398,14 +8398,15 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals, // No need to add new vector costs here since we're going to reuse // same main/alternate vector ops, just do different shuffling. } else if (Instruction::isBinaryOp(E->getOpcode())) { - VecCost = TTI->getArithmeticInstrCost(E->getOpcode(), VecTy, CostKind); + VecCost = + TTIRef.getArithmeticInstrCost(E->getOpcode(), VecTy, CostKind); VecCost += - TTI->getArithmeticInstrCost(E->getAltOpcode(), VecTy, CostKind); + TTIRef.getArithmeticInstrCost(E->getAltOpcode(), VecTy, CostKind); } else if (auto *CI0 = dyn_cast<CmpInst>(VL0)) { auto *MaskTy = FixedVectorType::get(Builder.getInt1Ty(), VL.size()); - VecCost = TTI->getCmpSelInstrCost(E->getOpcode(), VecTy, MaskTy, - CI0->getPredicate(), CostKind, VL0); - VecCost += TTI->getCmpSelInstrCost( + VecCost = TTIRef.getCmpSelInstrCost(E->getOpcode(), VecTy, MaskTy, + CI0->getPredicate(), CostKind, VL0); + VecCost += TTIRef.getCmpSelInstrCost( E->getOpcode(), VecTy, MaskTy, cast<CmpInst>(E->getAltOp())->getPredicate(), CostKind, E->getAltOp()); @@ -8414,10 +8415,11 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals, Type *Src1SclTy = E->getAltOp()->getOperand(0)->getType(); auto *Src0Ty = FixedVectorType::get(Src0SclTy, VL.size()); auto *Src1Ty = FixedVectorType::get(Src1SclTy, VL.size()); - VecCost = TTI->getCastInstrCost(E->getOpcode(), VecTy, Src0Ty, - TTI::CastContextHint::None, CostKind); - VecCost += TTI->getCastInstrCost(E->getAltOpcode(), VecTy, Src1Ty, - TTI::CastContextHint::None, CostKind); + VecCost = TTIRef.getCastInstrCost(E->getOpcode(), VecTy, Src0Ty, + TTI::CastContextHint::None, CostKind); + VecCost += + TTIRef.getCastInstrCost(E->getAltOpcode(), VecTy, Src1Ty, + TTI::CastContextHint::None, CostKind); } SmallVector<int> Mask; E->buildAltOpShuffleMask( @@ -8426,8 +8428,27 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals, return I->getOpcode() == E->getAltOpcode(); }, Mask); - VecCost += TTI->getShuffleCost(TargetTransformInfo::SK_PermuteTwoSrc, - FinalVecTy, Mask); + VecCost += TTIRef.getShuffleCost(TargetTransformInfo::SK_PermuteTwoSrc, + FinalVecTy, Mask); + // Patterns like [fadd,fsub] can be combined into a single instruction + // in x86. Reordering them into [fsub,fadd] blocks this pattern. So we + // need to take into account their order when looking for the most used + // order. + unsigned Opcode0 = E->getOpcode(); + unsigned Opcode1 = E->getAltOpcode(); + // The opcode mask selects between the two opcodes. + SmallBitVector OpcodeMask(E->Scalars.size(), false); + for (unsigned Lane : seq<unsigned>(0, E->Scalars.size())) + if (cast<Instruction>(E->Scalars[Lane])->getOpcode() == Opcode1) + OpcodeMask.set(Lane); + // If this pattern is supported by the target then we consider the + // order. + if (TTIRef.isLegalAltInstr(VecTy, Opcode0, Opcode1, OpcodeMask)) { + InstructionCost AltVecCost = TTIRef.getAltInstrCost( + VecTy, Opcode0, Opcode1, OpcodeMask, CostKind); + return AltVecCost < VecCost ? AltVecCost : VecCost; + } + // TODO: Check the reverse order too. return VecCost; }; return GetCostDiff(GetScalarCost, GetVectorCost); @@ -11493,17 +11514,6 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) { Value *PO = LI->getPointerOperand(); if (E->State == TreeEntry::Vectorize) { NewLI = Builder.CreateAlignedLoad(VecTy, PO, LI->getAlign()); - - // The pointer operand uses an in-tree scalar so we add the new - // LoadInst to ExternalUses list to make sure that an extract will - // be generated in the future. - if (isa<Instruction>(PO)) { - if (TreeEntry *Entry = getTreeEntry(PO)) { - // Find which lane we need to extract. - unsigned FoundLane = Entry->findLaneForValue(PO); - ExternalUses.emplace_back(PO, NewLI, FoundLane); - } - } } else { assert((E->State == TreeEntry::ScatterVectorize || E->State == TreeEntry::PossibleStridedVectorize) && @@ -11539,17 +11549,6 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) { StoreInst *ST = Builder.CreateAlignedStore(VecValue, Ptr, SI->getAlign()); - // The pointer operand uses an in-tree scalar, so add the new StoreInst to - // ExternalUses to make sure that an extract will be generated in the - // future. - if (isa<Instruction>(Ptr)) { - if (TreeEntry *Entry = getTreeEntry(Ptr)) { - // Find which lane we need to extract. - unsigned FoundLane = Entry->findLaneForValue(Ptr); - ExternalUses.push_back(ExternalUser(Ptr, ST, FoundLane)); - } - } - Value *V = propagateMetadata(ST, E->Scalars); E->VectorizedValue = V; @@ -11597,10 +11596,6 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) { CallInst *CI = cast<CallInst>(VL0); setInsertPointAfterBundle(E); - Intrinsic::ID IID = Intrinsic::not_intrinsic; - if (Function *FI = CI->getCalledFunction()) - IID = FI->getIntrinsicID(); - Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI); auto VecCallCosts = getVectorCallCosts(CI, VecTy, TTI, TLI); @@ -11611,18 +11606,18 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) { SmallVector<Value *> OpVecs; SmallVector<Type *, 2> TysForDecl; // Add return type if intrinsic is overloaded on it. - if (isVectorIntrinsicWithOverloadTypeAtArg(IID, -1)) + if (UseIntrinsic && isVectorIntrinsicWithOverloadTypeAtArg(ID, -1)) TysForDecl.push_back( FixedVectorType::get(CI->getType(), E->Scalars.size())); for (unsigned I : seq<unsigned>(0, CI->arg_size())) { ValueList OpVL; // Some intrinsics have scalar arguments. This argument should not be // vectorized. - if (UseIntrinsic && isVectorIntrinsicWithScalarOpAtArg(IID, I)) { + if (UseIntrinsic && isVectorIntrinsicWithScalarOpAtArg(ID, I)) { CallInst *CEI = cast<CallInst>(VL0); ScalarArg = CEI->getArgOperand(I); OpVecs.push_back(CEI->getArgOperand(I)); - if (isVectorIntrinsicWithOverloadTypeAtArg(IID, I)) + if (isVectorIntrinsicWithOverloadTypeAtArg(ID, I)) TysForDecl.push_back(ScalarArg->getType()); continue; } @@ -11634,7 +11629,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) { } LLVM_DEBUG(dbgs() << "SLP: OpVec[" << I << "]: " << *OpVec << "\n"); OpVecs.push_back(OpVec); - if (isVectorIntrinsicWithOverloadTypeAtArg(IID, I)) + if (UseIntrinsic && isVectorIntrinsicWithOverloadTypeAtArg(ID, I)) TysForDecl.push_back(OpVec->getType()); } @@ -11654,18 +11649,6 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) { CI->getOperandBundlesAsDefs(OpBundles); Value *V = Builder.CreateCall(CF, OpVecs, OpBundles); - // The scalar argument uses an in-tree scalar so we add the new vectorized - // call to ExternalUses list to make sure that an extract will be - // generated in the future. - if (isa_and_present<Instruction>(ScalarArg)) { - if (TreeEntry *Entry = getTreeEntry(ScalarArg)) { - // Find which lane we need to extract. - unsigned FoundLane = Entry->findLaneForValue(ScalarArg); - ExternalUses.push_back( - ExternalUser(ScalarArg, cast<User>(V), FoundLane)); - } - } - propagateIRFlags(V, E->Scalars, VL0); V = FinalShuffle(V, E, VecTy, IsSigned); @@ -11877,6 +11860,7 @@ Value *BoUpSLP::vectorizeTree( DenseMap<Value *, DenseMap<BasicBlock *, Instruction *>> ScalarToEEs; SmallDenseSet<Value *, 4> UsedInserts; DenseMap<Value *, Value *> VectorCasts; + SmallDenseSet<Value *, 4> ScalarsWithNullptrUser; // Extract all of the elements with the external uses. for (const auto &ExternalUse : ExternalUses) { Value *Scalar = ExternalUse.Scalar; @@ -11947,13 +11931,27 @@ Value *BoUpSLP::vectorizeTree( VectorToInsertElement.try_emplace(Vec, IE); return Vec; }; - // If User == nullptr, the Scalar is used as extra arg. Generate - // ExtractElement instruction and update the record for this scalar in - // ExternallyUsedValues. + // If User == nullptr, the Scalar remains as scalar in vectorized + // instructions or is used as extra arg. Generate ExtractElement instruction + // and update the record for this scalar in ExternallyUsedValues. if (!User) { - assert(ExternallyUsedValues.count(Scalar) && - "Scalar with nullptr as an external user must be registered in " - "ExternallyUsedValues map"); + if (!ScalarsWithNullptrUser.insert(Scalar).second) + continue; + assert((ExternallyUsedValues.count(Scalar) || + any_of(Scalar->users(), + [&](llvm::User *U) { + TreeEntry *UseEntry = getTreeEntry(U); + return UseEntry && + UseEntry->State == TreeEntry::Vectorize && + E->State == TreeEntry::Vectorize && + doesInTreeUserNeedToExtract( + Scalar, + cast<Instruction>(UseEntry->Scalars.front()), + TLI); + })) && + "Scalar with nullptr User must be registered in " + "ExternallyUsedValues map or remain as scalar in vectorized " + "instructions"); if (auto *VecI = dyn_cast<Instruction>(Vec)) { if (auto *PHI = dyn_cast<PHINode>(VecI)) Builder.SetInsertPoint(PHI->getParent(), @@ -16222,7 +16220,7 @@ bool SLPVectorizerPass::vectorizeGEPIndices(BasicBlock *BB, BoUpSLP &R) { for (auto *V : Candidates) { auto *GEP = cast<GetElementPtrInst>(V); auto *GEPIdx = GEP->idx_begin()->get(); - assert(GEP->getNumIndices() == 1 || !isa<Constant>(GEPIdx)); + assert(GEP->getNumIndices() == 1 && !isa<Constant>(GEPIdx)); Bundle[BundleIndex++] = GEPIdx; } diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 94cb76889813..7d33baac52c9 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -1357,6 +1357,36 @@ public: #endif }; +/// A recipe to compute the pointers for widened memory accesses of IndexTy for +/// all parts. If IsReverse is true, compute pointers for accessing the input in +/// reverse order per part. +class VPVectorPointerRecipe : public VPRecipeBase, public VPValue { + Type *IndexedTy; + bool IsReverse; + +public: + VPVectorPointerRecipe(VPValue *Ptr, Type *IndexedTy, bool IsReverse, + DebugLoc DL) + : VPRecipeBase(VPDef::VPVectorPointerSC, {Ptr}, DL), VPValue(this), + IndexedTy(IndexedTy), IsReverse(IsReverse) {} + + VP_CLASSOF_IMPL(VPDef::VPVectorPointerSC) + + void execute(VPTransformState &State) override; + + bool onlyFirstLaneUsed(const VPValue *Op) const override { + assert(is_contained(operands(), Op) && + "Op must be an operand of the recipe"); + return true; + } + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) + /// Print the recipe. + void print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const override; +#endif +}; + /// A pure virtual base class for all recipes modeling header phis, including /// phis for first order recurrences, pointer inductions and reductions. The /// start value is the first operand of the recipe and the incoming value from diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 02e400d590be..76961629aece 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -498,16 +498,17 @@ void VPWidenCallRecipe::execute(VPTransformState &State) { "DbgInfoIntrinsic should have been dropped during VPlan construction"); State.setDebugLocFrom(CI.getDebugLoc()); + bool UseIntrinsic = VectorIntrinsicID != Intrinsic::not_intrinsic; FunctionType *VFTy = nullptr; if (Variant) VFTy = Variant->getFunctionType(); for (unsigned Part = 0; Part < State.UF; ++Part) { SmallVector<Type *, 2> TysForDecl; // Add return type if intrinsic is overloaded on it. - if (isVectorIntrinsicWithOverloadTypeAtArg(VectorIntrinsicID, -1)) { + if (UseIntrinsic && + isVectorIntrinsicWithOverloadTypeAtArg(VectorIntrinsicID, -1)) TysForDecl.push_back( VectorType::get(CI.getType()->getScalarType(), State.VF)); - } SmallVector<Value *, 4> Args; for (const auto &I : enumerate(operands())) { // Some intrinsics have a scalar argument - don't replace it with a @@ -516,18 +517,19 @@ void VPWidenCallRecipe::execute(VPTransformState &State) { // e.g. linear parameters for pointers. Value *Arg; if ((VFTy && !VFTy->getParamType(I.index())->isVectorTy()) || - (VectorIntrinsicID != Intrinsic::not_intrinsic && + (UseIntrinsic && isVectorIntrinsicWithScalarOpAtArg(VectorIntrinsicID, I.index()))) Arg = State.get(I.value(), VPIteration(0, 0)); else Arg = State.get(I.value(), Part); - if (isVectorIntrinsicWithOverloadTypeAtArg(VectorIntrinsicID, I.index())) + if (UseIntrinsic && + isVectorIntrinsicWithOverloadTypeAtArg(VectorIntrinsicID, I.index())) TysForDecl.push_back(Arg->getType()); Args.push_back(Arg); } Function *VectorF; - if (VectorIntrinsicID != Intrinsic::not_intrinsic) { + if (UseIntrinsic) { // Use vector version of the intrinsic. Module *M = State.Builder.GetInsertBlock()->getModule(); VectorF = Intrinsic::getDeclaration(M, VectorIntrinsicID, TysForDecl); @@ -1209,6 +1211,59 @@ void VPWidenGEPRecipe::print(raw_ostream &O, const Twine &Indent, } #endif +void VPVectorPointerRecipe ::execute(VPTransformState &State) { + auto &Builder = State.Builder; + State.setDebugLocFrom(getDebugLoc()); + for (unsigned Part = 0; Part < State.UF; ++Part) { + // Calculate the pointer for the specific unroll-part. + Value *PartPtr = nullptr; + // Use i32 for the gep index type when the value is constant, + // or query DataLayout for a more suitable index type otherwise. + const DataLayout &DL = + Builder.GetInsertBlock()->getModule()->getDataLayout(); + Type *IndexTy = State.VF.isScalable() && (IsReverse || Part > 0) + ? DL.getIndexType(IndexedTy->getPointerTo()) + : Builder.getInt32Ty(); + Value *Ptr = State.get(getOperand(0), VPIteration(0, 0)); + bool InBounds = false; + if (auto *GEP = dyn_cast<GetElementPtrInst>(Ptr->stripPointerCasts())) + InBounds = GEP->isInBounds(); + if (IsReverse) { + // If the address is consecutive but reversed, then the + // wide store needs to start at the last vector element. + // RunTimeVF = VScale * VF.getKnownMinValue() + // For fixed-width VScale is 1, then RunTimeVF = VF.getKnownMinValue() + Value *RunTimeVF = getRuntimeVF(Builder, IndexTy, State.VF); + // NumElt = -Part * RunTimeVF + Value *NumElt = Builder.CreateMul( + ConstantInt::get(IndexTy, -(int64_t)Part), RunTimeVF); + // LastLane = 1 - RunTimeVF + Value *LastLane = + Builder.CreateSub(ConstantInt::get(IndexTy, 1), RunTimeVF); + PartPtr = Builder.CreateGEP(IndexedTy, Ptr, NumElt, "", InBounds); + PartPtr = Builder.CreateGEP(IndexedTy, PartPtr, LastLane, "", InBounds); + } else { + Value *Increment = createStepForVF(Builder, IndexTy, State.VF, Part); + PartPtr = Builder.CreateGEP(IndexedTy, Ptr, Increment, "", InBounds); + } + + State.set(this, PartPtr, Part); + } +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +void VPVectorPointerRecipe::print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const { + O << Indent; + printAsOperand(O, SlotTracker); + O << " = vector-pointer "; + if (IsReverse) + O << "(reverse) "; + + printOperands(O, SlotTracker); +} +#endif + void VPBlendRecipe::execute(VPTransformState &State) { State.setDebugLocFrom(getDebugLoc()); // We know that all PHIs in non-header blocks are converted into diff --git a/llvm/lib/Transforms/Vectorize/VPlanValue.h b/llvm/lib/Transforms/Vectorize/VPlanValue.h index 116acad8e8f3..8cc98f4abf93 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanValue.h +++ b/llvm/lib/Transforms/Vectorize/VPlanValue.h @@ -351,6 +351,7 @@ public: VPReductionSC, VPReplicateSC, VPScalarIVStepsSC, + VPVectorPointerSC, VPWidenCallSC, VPWidenCanonicalIVSC, VPWidenCastSC, diff --git a/llvm/tools/llvm-ar/llvm-ar.cpp b/llvm/tools/llvm-ar/llvm-ar.cpp index fcb6392a1d95..299b7856ec0b 100644 --- a/llvm/tools/llvm-ar/llvm-ar.cpp +++ b/llvm/tools/llvm-ar/llvm-ar.cpp @@ -1287,8 +1287,7 @@ static const char *matchFlagWithArg(StringRef Expected, ArrayRef<const char *> Args) { StringRef Arg = *ArgIt; - if (Arg.starts_with("--")) - Arg = Arg.substr(2); + Arg.consume_front("--"); size_t len = Expected.size(); if (Arg == Expected) { diff --git a/llvm/tools/llvm-diff/llvm-diff.cpp b/llvm/tools/llvm-diff/llvm-diff.cpp index 6fe18a51c9f5..3e77b1ed89b0 100644 --- a/llvm/tools/llvm-diff/llvm-diff.cpp +++ b/llvm/tools/llvm-diff/llvm-diff.cpp @@ -42,8 +42,7 @@ static std::unique_ptr<Module> readModule(LLVMContext &Context, static void diffGlobal(DifferenceEngine &Engine, Module &L, Module &R, StringRef Name) { // Drop leading sigils from the global name. - if (Name.starts_with("@")) - Name = Name.substr(1); + Name.consume_front("@"); Function *LFn = L.getFunction(Name); Function *RFn = R.getFunction(Name); diff --git a/llvm/utils/TableGen/X86DisassemblerTables.cpp b/llvm/utils/TableGen/X86DisassemblerTables.cpp index 06e7ec3b9230..9ee1472bdf5c 100644 --- a/llvm/utils/TableGen/X86DisassemblerTables.cpp +++ b/llvm/utils/TableGen/X86DisassemblerTables.cpp @@ -563,6 +563,13 @@ static inline bool inheritsFrom(InstructionContext child, case IC_EVEX_L2_W_XD_KZ_B: case IC_EVEX_L2_W_OPSIZE_KZ_B: return false; + case IC_EVEX_NF: + case IC_EVEX_B_NF: + case IC_EVEX_OPSIZE_NF: + case IC_EVEX_OPSIZE_B_NF: + case IC_EVEX_W_NF: + case IC_EVEX_W_B_NF: + return false; default: errs() << "Unknown instruction class: " << stringForContext((InstructionContext)parent) << "\n"; @@ -889,7 +896,19 @@ void DisassemblerTables::emitContextTable(raw_ostream &o, unsigned &i) const { if ((index & ATTR_EVEX) && (index & ATTR_OPSIZE) && (index & ATTR_ADSIZE)) o << "IC_EVEX_OPSIZE_ADSIZE"; - else if ((index & ATTR_EVEX) || (index & ATTR_VEX) || (index & ATTR_VEXL)) { + else if (index & ATTR_EVEXNF) { + o << "IC_EVEX"; + if (index & ATTR_REXW) + o << "_W"; + else if (index & ATTR_OPSIZE) + o << "_OPSIZE"; + + if (index & ATTR_EVEXB) + o << "_B"; + + o << "_NF"; + } else if ((index & ATTR_EVEX) || (index & ATTR_VEX) || + (index & ATTR_VEXL)) { if (index & ATTR_EVEX) o << "IC_EVEX"; else diff --git a/llvm/utils/TableGen/X86FoldTablesEmitter.cpp b/llvm/utils/TableGen/X86FoldTablesEmitter.cpp index d3299e281031..101b75e2f087 100644 --- a/llvm/utils/TableGen/X86FoldTablesEmitter.cpp +++ b/llvm/utils/TableGen/X86FoldTablesEmitter.cpp @@ -348,7 +348,9 @@ public: // memory form: broadcast if (IsBroadcast && (RegRI.HasEVEX_B || !MemRI.HasEVEX_B)) return false; - if (!IsBroadcast && (RegRI.HasEVEX_B || MemRI.HasEVEX_B)) + // EVEX_B indicates NDD for MAP4 instructions + if (!IsBroadcast && (RegRI.HasEVEX_B || MemRI.HasEVEX_B) && + RegRI.OpMap != X86Local::T_MAP4) return false; if (!mayFoldFromLeftToRight(RegRI.Form, MemRI.Form)) @@ -369,7 +371,8 @@ public: RegRI.OpMap, RegRI.OpSize, RegRI.AdSize, RegRI.HasREX_W, RegRI.HasVEX_4V, RegRI.HasVEX_L, RegRI.IgnoresVEX_L, RegRI.IgnoresW, RegRI.HasEVEX_K, RegRI.HasEVEX_KZ, - RegRI.HasEVEX_L2, RegRec->getValueAsBit("hasEVEX_RC"), + RegRI.HasEVEX_L2, RegRI.HasEVEX_NF, + RegRec->getValueAsBit("hasEVEX_RC"), RegRec->getValueAsBit("hasLockPrefix"), RegRec->getValueAsBit("hasNoTrackPrefix"), RegRec->getValueAsBit("EVEX_W1_VEX_W0")) != @@ -377,7 +380,8 @@ public: MemRI.OpMap, MemRI.OpSize, MemRI.AdSize, MemRI.HasREX_W, MemRI.HasVEX_4V, MemRI.HasVEX_L, MemRI.IgnoresVEX_L, MemRI.IgnoresW, MemRI.HasEVEX_K, MemRI.HasEVEX_KZ, - MemRI.HasEVEX_L2, MemRec->getValueAsBit("hasEVEX_RC"), + MemRI.HasEVEX_L2, MemRI.HasEVEX_NF, + MemRec->getValueAsBit("hasEVEX_RC"), MemRec->getValueAsBit("hasLockPrefix"), MemRec->getValueAsBit("hasNoTrackPrefix"), MemRec->getValueAsBit("EVEX_W1_VEX_W0"))) @@ -668,6 +672,14 @@ void X86FoldTablesEmitter::run(raw_ostream &O) { if (NoFoldSet.find(Rec->getName()) != NoFoldSet.end()) continue; + // Promoted legacy instruction is in EVEX space, and has REX2-encoding + // alternative. It's added due to HW design and never emitted by compiler. + if (byteFromBitsInit(Rec->getValueAsBitsInit("OpMapBits")) == + X86Local::T_MAP4 && + byteFromBitsInit(Rec->getValueAsBitsInit("explicitOpPrefixBits")) == + X86Local::ExplicitEVEX) + continue; + // - Instructions including RST register class operands are not relevant // for memory folding (for further details check the explanation in // lib/Target/X86/X86InstrFPStack.td file). diff --git a/llvm/utils/TableGen/X86RecognizableInstr.cpp b/llvm/utils/TableGen/X86RecognizableInstr.cpp index 47ee9544f323..fb430676c504 100644 --- a/llvm/utils/TableGen/X86RecognizableInstr.cpp +++ b/llvm/utils/TableGen/X86RecognizableInstr.cpp @@ -125,6 +125,7 @@ RecognizableInstrBase::RecognizableInstrBase(const CodeGenInstruction &insn) { HasEVEX_K = Rec->getValueAsBit("hasEVEX_K"); HasEVEX_KZ = Rec->getValueAsBit("hasEVEX_Z"); HasEVEX_B = Rec->getValueAsBit("hasEVEX_B"); + HasEVEX_NF = Rec->getValueAsBit("hasEVEX_NF"); IsCodeGenOnly = Rec->getValueAsBit("isCodeGenOnly"); IsAsmParserOnly = Rec->getValueAsBit("isAsmParserOnly"); ForceDisassemble = Rec->getValueAsBit("ForceDisassemble"); @@ -185,6 +186,9 @@ void RecognizableInstr::processInstr(DisassemblerTables &tables, : (HasEVEX_KZ ? n##_KZ \ : (HasEVEX_K ? n##_K : (HasEVEX_B ? n##_B : n))))) +#define EVEX_NF(n) (HasEVEX_NF ? n##_NF : n) +#define EVEX_B_NF(n) (HasEVEX_B ? EVEX_NF(n##_B) : EVEX_NF(n)) + InstructionContext RecognizableInstr::insnContext() const { InstructionContext insnContext; @@ -193,8 +197,15 @@ InstructionContext RecognizableInstr::insnContext() const { errs() << "Don't support VEX.L if EVEX_L2 is enabled: " << Name << "\n"; llvm_unreachable("Don't support VEX.L if EVEX_L2 is enabled"); } - // VEX_L & VEX_W - if (!EncodeRC && HasVEX_L && HasREX_W) { + if (HasEVEX_NF) { + if (OpPrefix == X86Local::PD) + insnContext = EVEX_B_NF(IC_EVEX_OPSIZE); + else if (HasREX_W) + insnContext = EVEX_B_NF(IC_EVEX_W); + else + insnContext = EVEX_B_NF(IC_EVEX); + } else if (!EncodeRC && HasVEX_L && HasREX_W) { + // VEX_L & VEX_W if (OpPrefix == X86Local::PD) insnContext = EVEX_KB(IC_EVEX_L_W_OPSIZE); else if (OpPrefix == X86Local::XS) @@ -486,6 +497,7 @@ void RecognizableInstr::emitInstructionSpecifier() { ++additionalOperands; #endif + bool IsND = OpMap == X86Local::T_MAP4 && HasEVEX_B && HasVEX_4V; switch (Form) { default: llvm_unreachable("Unhandled form"); @@ -536,11 +548,14 @@ void RecognizableInstr::emitInstructionSpecifier() { numPhysicalOperands <= 3 + additionalOperands && "Unexpected number of operands for MRMDestReg"); + if (IsND) + HANDLE_OPERAND(vvvvRegister) + HANDLE_OPERAND(rmRegister) if (HasEVEX_K) HANDLE_OPERAND(writemaskRegister) - if (HasVEX_4V) + if (!IsND && HasVEX_4V) // FIXME: In AVX, the register below becomes the one encoded // in ModRMVEX and the one above the one in the VEX.VVVV field HANDLE_OPERAND(vvvvRegister) @@ -570,12 +585,15 @@ void RecognizableInstr::emitInstructionSpecifier() { numPhysicalOperands <= 3 + additionalOperands && "Unexpected number of operands for MRMDestMemFrm with VEX_4V"); + if (IsND) + HANDLE_OPERAND(vvvvRegister) + HANDLE_OPERAND(memory) if (HasEVEX_K) HANDLE_OPERAND(writemaskRegister) - if (HasVEX_4V) + if (!IsND && HasVEX_4V) // FIXME: In AVX, the register below becomes the one encoded // in ModRMVEX and the one above the one in the VEX.VVVV field HANDLE_OPERAND(vvvvRegister) @@ -594,12 +612,15 @@ void RecognizableInstr::emitInstructionSpecifier() { numPhysicalOperands <= 4 + additionalOperands && "Unexpected number of operands for MRMSrcRegFrm"); + if (IsND) + HANDLE_OPERAND(vvvvRegister) + HANDLE_OPERAND(roRegister) if (HasEVEX_K) HANDLE_OPERAND(writemaskRegister) - if (HasVEX_4V) + if (!IsND && HasVEX_4V) // FIXME: In AVX, the register below becomes the one encoded // in ModRMVEX and the one above the one in the VEX.VVVV field HANDLE_OPERAND(vvvvRegister) @@ -641,13 +662,15 @@ void RecognizableInstr::emitInstructionSpecifier() { assert(numPhysicalOperands >= 2 + additionalOperands && numPhysicalOperands <= 4 + additionalOperands && "Unexpected number of operands for MRMSrcMemFrm"); + if (IsND) + HANDLE_OPERAND(vvvvRegister) HANDLE_OPERAND(roRegister) if (HasEVEX_K) HANDLE_OPERAND(writemaskRegister) - if (HasVEX_4V) + if (!IsND && HasVEX_4V) // FIXME: In AVX, the register below becomes the one encoded // in ModRMVEX and the one above the one in the VEX.VVVV field HANDLE_OPERAND(vvvvRegister) @@ -1216,6 +1239,8 @@ RecognizableInstr::roRegisterEncodingFromString(const std::string &s, OperandEncoding RecognizableInstr::vvvvRegisterEncodingFromString(const std::string &s, uint8_t OpSize) { + ENCODING("GR8", ENCODING_VVVV) + ENCODING("GR16", ENCODING_VVVV) ENCODING("GR32", ENCODING_VVVV) ENCODING("GR64", ENCODING_VVVV) ENCODING("FR32", ENCODING_VVVV) diff --git a/llvm/utils/TableGen/X86RecognizableInstr.h b/llvm/utils/TableGen/X86RecognizableInstr.h index 61ad5e32b3fb..007c700cdfaf 100644 --- a/llvm/utils/TableGen/X86RecognizableInstr.h +++ b/llvm/utils/TableGen/X86RecognizableInstr.h @@ -172,7 +172,7 @@ enum { PD = 1, XS = 2, XD = 3, PS = 4 }; enum { VEX = 1, XOP = 2, EVEX = 3 }; enum { OpSize16 = 1, OpSize32 = 2 }; enum { AdSize16 = 1, AdSize32 = 2, AdSize64 = 3 }; -enum { ExplicitREX2 = 1 }; +enum { ExplicitREX2 = 1, ExplicitEVEX = 3 }; } // namespace X86Local namespace X86Disassembler { @@ -212,6 +212,8 @@ struct RecognizableInstrBase { bool HasEVEX_KZ; /// The hasEVEX_B field from the record bool HasEVEX_B; + /// The hasEVEX_NF field from the record + bool HasEVEX_NF; /// Indicates that the instruction uses the L and L' fields for RC. bool EncodeRC; /// The isCodeGenOnly field from the record |