diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2021-02-16 20:39:22 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2021-02-16 20:39:22 +0000 |
commit | 9f93bc8bfd2690abd12a830e42a1c26038173ae5 (patch) | |
tree | 397432940f35d42b709d99d1d23523e7a24296c4 | |
parent | b60736ec1405bb0a8dd40989f67ef4c93da068ab (diff) |
Vendor import of llvm-project branch release/12.xvendor/llvm-project/llvmorg-12.0.0-rc1-109-gd5d089bf08c9
llvmorg-12.0.0-rc1-109-gd5d089bf08c9.
110 files changed, 1966 insertions, 1050 deletions
diff --git a/clang/include/clang/AST/ASTContext.h b/clang/include/clang/AST/ASTContext.h index ce47d54e44b0..ae69a68608b7 100644 --- a/clang/include/clang/AST/ASTContext.h +++ b/clang/include/clang/AST/ASTContext.h @@ -538,6 +538,9 @@ private: /// need them (like static local vars). llvm::MapVector<const NamedDecl *, unsigned> MangleNumbers; llvm::MapVector<const VarDecl *, unsigned> StaticLocalNumbers; + /// Mapping the associated device lambda mangling number if present. + mutable llvm::DenseMap<const CXXRecordDecl *, unsigned> + DeviceLambdaManglingNumbers; /// Mapping that stores parameterIndex values for ParmVarDecls when /// that value exceeds the bitfield size of ParmVarDeclBits.ParameterIndex. diff --git a/clang/include/clang/AST/DeclCXX.h b/clang/include/clang/AST/DeclCXX.h index e32101bb2276..89006b1cfa7f 100644 --- a/clang/include/clang/AST/DeclCXX.h +++ b/clang/include/clang/AST/DeclCXX.h @@ -1735,6 +1735,12 @@ public: getLambdaData().HasKnownInternalLinkage = HasKnownInternalLinkage; } + /// Set the device side mangling number. + void setDeviceLambdaManglingNumber(unsigned Num) const; + + /// Retrieve the device side mangling number. + unsigned getDeviceLambdaManglingNumber() const; + /// Returns the inheritance model used for this record. MSInheritanceModel getMSInheritanceModel() const; diff --git a/clang/include/clang/AST/Mangle.h b/clang/include/clang/AST/Mangle.h index 0e8d6dd53d8a..7b6495d85eb6 100644 --- a/clang/include/clang/AST/Mangle.h +++ b/clang/include/clang/AST/Mangle.h @@ -96,6 +96,9 @@ public: virtual bool shouldMangleCXXName(const NamedDecl *D) = 0; virtual bool shouldMangleStringLiteral(const StringLiteral *SL) = 0; + virtual bool isDeviceMangleContext() const { return false; } + virtual void setDeviceMangleContext(bool) {} + // FIXME: consider replacing raw_ostream & with something like SmallString &. void mangleName(GlobalDecl GD, raw_ostream &); virtual void mangleCXXName(GlobalDecl GD, raw_ostream &) = 0; diff --git a/clang/include/clang/AST/MangleNumberingContext.h b/clang/include/clang/AST/MangleNumberingContext.h index f1ca6a05dbaf..eb33759682d6 100644 --- a/clang/include/clang/AST/MangleNumberingContext.h +++ b/clang/include/clang/AST/MangleNumberingContext.h @@ -52,6 +52,11 @@ public: /// this context. virtual unsigned getManglingNumber(const TagDecl *TD, unsigned MSLocalManglingNumber) = 0; + + /// Retrieve the mangling number of a new lambda expression with the + /// given call operator within the device context. No device number is + /// assigned if there's no device numbering context is associated. + virtual unsigned getDeviceManglingNumber(const CXXMethodDecl *) { return 0; } }; } // end namespace clang diff --git a/clang/include/clang/AST/RecursiveASTVisitor.h b/clang/include/clang/AST/RecursiveASTVisitor.h index 505ea700fd0e..7870cea198a7 100644 --- a/clang/include/clang/AST/RecursiveASTVisitor.h +++ b/clang/include/clang/AST/RecursiveASTVisitor.h @@ -186,6 +186,9 @@ public: /// code, e.g., implicit constructors and destructors. bool shouldVisitImplicitCode() const { return false; } + /// Return whether this visitor should recurse into lambda body + bool shouldVisitLambdaBody() const { return true; } + /// Return whether this visitor should traverse post-order. bool shouldTraversePostOrder() const { return false; } @@ -2057,6 +2060,15 @@ bool RecursiveASTVisitor<Derived>::TraverseFunctionHelper(FunctionDecl *D) { // by clang. (!D->isDefaulted() || getDerived().shouldVisitImplicitCode()); + if (const auto *MD = dyn_cast<CXXMethodDecl>(D)) { + if (const CXXRecordDecl *RD = MD->getParent()) { + if (RD->isLambda() && + declaresSameEntity(RD->getLambdaCallOperator(), MD)) { + VisitBody = VisitBody && getDerived().shouldVisitLambdaBody(); + } + } + } + if (VisitBody) { TRY_TO(TraverseStmt(D->getBody())); // Function body. } diff --git a/clang/include/clang/ASTMatchers/ASTMatchers.h b/clang/include/clang/ASTMatchers/ASTMatchers.h index 6f6dfab59a39..031fa4682c3a 100644 --- a/clang/include/clang/ASTMatchers/ASTMatchers.h +++ b/clang/include/clang/ASTMatchers/ASTMatchers.h @@ -344,7 +344,7 @@ extern const internal::VariadicAllOfMatcher<Decl> decl; /// int number = 42; /// auto [foo, bar] = std::make_pair{42, 42}; /// \endcode -extern const internal::VariadicAllOfMatcher<DecompositionDecl> +extern const internal::VariadicDynCastAllOfMatcher<Decl, DecompositionDecl> decompositionDecl; /// Matches a declaration of a linkage specification. diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 42c5319041d0..1f6c13d5cc96 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -1147,7 +1147,7 @@ def fprofile_update_EQ : Joined<["-"], "fprofile-update=">, defm pseudo_probe_for_profiling : BoolFOption<"pseudo-probe-for-profiling", CodeGenOpts<"PseudoProbeForProfiling">, DefaultFalse, PosFlag<SetTrue, [], "Emit">, NegFlag<SetFalse, [], "Do not emit">, - BothFlags<[NoXarchOption, CC1Option], " pseudo probes for sample profiler">>; + BothFlags<[NoXarchOption, CC1Option], " pseudo probes for sample profiling">>; def forder_file_instrumentation : Flag<["-"], "forder-file-instrumentation">, Group<f_Group>, Flags<[CC1Option, CoreOption]>, HelpText<"Generate instrumented code to collect order file into default.profraw file (overridden by '=' form of option or LLVM_PROFILE_FILE env var)">; diff --git a/clang/include/clang/Lex/VariadicMacroSupport.h b/clang/include/clang/Lex/VariadicMacroSupport.h index 989e0ac703c9..119f02201fc6 100644 --- a/clang/include/clang/Lex/VariadicMacroSupport.h +++ b/clang/include/clang/Lex/VariadicMacroSupport.h @@ -39,17 +39,14 @@ namespace clang { assert(Ident__VA_ARGS__->isPoisoned() && "__VA_ARGS__ should be poisoned " "outside an ISO C/C++ variadic " "macro definition!"); - assert( - !Ident__VA_OPT__ || - (Ident__VA_OPT__->isPoisoned() && "__VA_OPT__ should be poisoned!")); + assert(Ident__VA_OPT__->isPoisoned() && "__VA_OPT__ should be poisoned!"); } /// Client code should call this function just before the Preprocessor is /// about to Lex tokens from the definition of a variadic (ISO C/C++) macro. void enterScope() { Ident__VA_ARGS__->setIsPoisoned(false); - if (Ident__VA_OPT__) - Ident__VA_OPT__->setIsPoisoned(false); + Ident__VA_OPT__->setIsPoisoned(false); } /// Client code should call this function as soon as the Preprocessor has @@ -58,8 +55,7 @@ namespace clang { /// (might be explicitly called, and then reinvoked via the destructor). void exitScope() { Ident__VA_ARGS__->setIsPoisoned(true); - if (Ident__VA_OPT__) - Ident__VA_OPT__->setIsPoisoned(true); + Ident__VA_OPT__->setIsPoisoned(true); } ~VariadicMacroScopeGuard() { exitScope(); } diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index 7f7c84eb1b1d..2530a2776373 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -6558,7 +6558,7 @@ public: /// Number lambda for linkage purposes if necessary. void handleLambdaNumbering( CXXRecordDecl *Class, CXXMethodDecl *Method, - Optional<std::tuple<unsigned, bool, Decl *>> Mangling = None); + Optional<std::tuple<bool, unsigned, unsigned, Decl *>> Mangling = None); /// Endow the lambda scope info with the relevant properties. void buildLambdaScope(sema::LambdaScopeInfo *LSI, @@ -11948,8 +11948,8 @@ public: /// if (diagIfOpenMPDeviceCode(Loc, diag::err_vla_unsupported)) /// return ExprError(); /// // Otherwise, continue parsing as normal. - SemaDiagnosticBuilder diagIfOpenMPDeviceCode(SourceLocation Loc, - unsigned DiagID); + SemaDiagnosticBuilder + diagIfOpenMPDeviceCode(SourceLocation Loc, unsigned DiagID, FunctionDecl *FD); /// Creates a SemaDiagnosticBuilder that emits the diagnostic if the current /// context is "used as host code". @@ -11965,17 +11965,19 @@ public: /// return ExprError(); /// // Otherwise, continue parsing as normal. SemaDiagnosticBuilder diagIfOpenMPHostCode(SourceLocation Loc, - unsigned DiagID); + unsigned DiagID, FunctionDecl *FD); - SemaDiagnosticBuilder targetDiag(SourceLocation Loc, unsigned DiagID); + SemaDiagnosticBuilder targetDiag(SourceLocation Loc, unsigned DiagID, + FunctionDecl *FD = nullptr); SemaDiagnosticBuilder targetDiag(SourceLocation Loc, - const PartialDiagnostic &PD) { - return targetDiag(Loc, PD.getDiagID()) << PD; + const PartialDiagnostic &PD, + FunctionDecl *FD = nullptr) { + return targetDiag(Loc, PD.getDiagID(), FD) << PD; } /// Check if the expression is allowed to be used in expressions for the /// offloading devices. - void checkDeviceDecl(const ValueDecl *D, SourceLocation Loc); + void checkDeviceDecl(ValueDecl *D, SourceLocation Loc); enum CUDAFunctionTarget { CFT_Device, diff --git a/clang/lib/AST/ASTImporter.cpp b/clang/lib/AST/ASTImporter.cpp index 085c50c0667b..0d723fbbcd8c 100644 --- a/clang/lib/AST/ASTImporter.cpp +++ b/clang/lib/AST/ASTImporter.cpp @@ -2848,6 +2848,8 @@ ExpectedDecl ASTNodeImporter::VisitRecordDecl(RecordDecl *D) { return CDeclOrErr.takeError(); D2CXX->setLambdaMangling(DCXX->getLambdaManglingNumber(), *CDeclOrErr, DCXX->hasKnownLambdaInternalLinkage()); + D2CXX->setDeviceLambdaManglingNumber( + DCXX->getDeviceLambdaManglingNumber()); } else if (DCXX->isInjectedClassName()) { // We have to be careful to do a similar dance to the one in // Sema::ActOnStartCXXMemberDeclarations diff --git a/clang/lib/AST/CXXABI.h b/clang/lib/AST/CXXABI.h index 31cb36918726..ca9424bcb7a4 100644 --- a/clang/lib/AST/CXXABI.h +++ b/clang/lib/AST/CXXABI.h @@ -22,8 +22,9 @@ class ASTContext; class CXXConstructorDecl; class DeclaratorDecl; class Expr; -class MemberPointerType; +class MangleContext; class MangleNumberingContext; +class MemberPointerType; /// Implements C++ ABI-specific semantic analysis functions. class CXXABI { @@ -75,6 +76,8 @@ public: /// Creates an instance of a C++ ABI class. CXXABI *CreateItaniumCXXABI(ASTContext &Ctx); CXXABI *CreateMicrosoftCXXABI(ASTContext &Ctx); +std::unique_ptr<MangleNumberingContext> +createItaniumNumberingContext(MangleContext *); } #endif diff --git a/clang/lib/AST/DeclCXX.cpp b/clang/lib/AST/DeclCXX.cpp index 0368ada0b81c..0375f9b4432e 100644 --- a/clang/lib/AST/DeclCXX.cpp +++ b/clang/lib/AST/DeclCXX.cpp @@ -1593,6 +1593,20 @@ Decl *CXXRecordDecl::getLambdaContextDecl() const { return getLambdaData().ContextDecl.get(Source); } +void CXXRecordDecl::setDeviceLambdaManglingNumber(unsigned Num) const { + assert(isLambda() && "Not a lambda closure type!"); + if (Num) + getASTContext().DeviceLambdaManglingNumbers[this] = Num; +} + +unsigned CXXRecordDecl::getDeviceLambdaManglingNumber() const { + assert(isLambda() && "Not a lambda closure type!"); + auto I = getASTContext().DeviceLambdaManglingNumbers.find(this); + if (I != getASTContext().DeviceLambdaManglingNumbers.end()) + return I->second; + return 0; +} + static CanQualType GetConversionType(ASTContext &Context, NamedDecl *Conv) { QualType T = cast<CXXConversionDecl>(Conv->getUnderlyingDecl()->getAsFunction()) diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 56181bbe1166..1bdad771a923 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -3497,8 +3497,8 @@ static bool diagnoseMutableFields(EvalInfo &Info, const Expr *E, AccessKinds AK, static bool lifetimeStartedInEvaluation(EvalInfo &Info, APValue::LValueBase Base, bool MutableSubobject = false) { - // A temporary we created. - if (Base.getCallIndex()) + // A temporary or transient heap allocation we created. + if (Base.getCallIndex() || Base.is<DynamicAllocLValue>()) return true; switch (Info.IsEvaluatingDecl) { @@ -10009,6 +10009,7 @@ bool RecordExprEvaluator::VisitLambdaExpr(const LambdaExpr *E) { auto *CaptureInitIt = E->capture_init_begin(); const LambdaCapture *CaptureIt = ClosureClass->captures_begin(); bool Success = true; + const ASTRecordLayout &Layout = Info.Ctx.getASTRecordLayout(ClosureClass); for (const auto *Field : ClosureClass->fields()) { assert(CaptureInitIt != E->capture_init_end()); // Get the initializer for this field @@ -10019,8 +10020,13 @@ bool RecordExprEvaluator::VisitLambdaExpr(const LambdaExpr *E) { if (!CurFieldInit) return Error(E); + LValue Subobject = This; + + if (!HandleLValueMember(Info, E, Subobject, Field, &Layout)) + return false; + APValue &FieldVal = Result.getStructField(Field->getFieldIndex()); - if (!EvaluateInPlace(FieldVal, Info, This, CurFieldInit)) { + if (!EvaluateInPlace(FieldVal, Info, Subobject, CurFieldInit)) { if (!Info.keepEvaluatingAfterFailure()) return false; Success = false; @@ -14786,11 +14792,14 @@ bool Expr::EvaluateAsLValue(EvalResult &Result, const ASTContext &Ctx, static bool EvaluateDestruction(const ASTContext &Ctx, APValue::LValueBase Base, APValue DestroyedValue, QualType Type, - SourceLocation Loc, Expr::EvalStatus &EStatus) { - EvalInfo Info(Ctx, EStatus, EvalInfo::EM_ConstantExpression); + SourceLocation Loc, Expr::EvalStatus &EStatus, + bool IsConstantDestruction) { + EvalInfo Info(Ctx, EStatus, + IsConstantDestruction ? EvalInfo::EM_ConstantExpression + : EvalInfo::EM_ConstantFold); Info.setEvaluatingDecl(Base, DestroyedValue, EvalInfo::EvaluatingDeclKind::Dtor); - Info.InConstantContext = true; + Info.InConstantContext = IsConstantDestruction; LValue LVal; LVal.set(Base); @@ -14844,7 +14853,8 @@ bool Expr::EvaluateAsConstantExpr(EvalResult &Result, const ASTContext &Ctx, // If this is a class template argument, it's required to have constant // destruction too. if (Kind == ConstantExprKind::ClassTemplateArgument && - (!EvaluateDestruction(Ctx, Base, Result.Val, T, getBeginLoc(), Result) || + (!EvaluateDestruction(Ctx, Base, Result.Val, T, getBeginLoc(), Result, + true) || Result.HasSideEffects)) { // FIXME: Prefix a note to indicate that the problem is lack of constant // destruction. @@ -14910,6 +14920,10 @@ bool VarDecl::evaluateDestruction( Expr::EvalStatus EStatus; EStatus.Diag = &Notes; + // Only treat the destruction as constant destruction if we formally have + // constant initialization (or are usable in a constant expression). + bool IsConstantDestruction = hasConstantInitialization(); + // Make a copy of the value for the destructor to mutate, if we know it. // Otherwise, treat the value as default-initialized; if the destructor works // anyway, then the destruction is constant (and must be essentially empty). @@ -14920,7 +14934,8 @@ bool VarDecl::evaluateDestruction( return false; if (!EvaluateDestruction(getASTContext(), this, std::move(DestroyedValue), - getType(), getLocation(), EStatus) || + getType(), getLocation(), EStatus, + IsConstantDestruction) || EStatus.HasSideEffects) return false; diff --git a/clang/lib/AST/ItaniumCXXABI.cpp b/clang/lib/AST/ItaniumCXXABI.cpp index 069add8464ae..be10258a2d77 100644 --- a/clang/lib/AST/ItaniumCXXABI.cpp +++ b/clang/lib/AST/ItaniumCXXABI.cpp @@ -258,3 +258,9 @@ public: CXXABI *clang::CreateItaniumCXXABI(ASTContext &Ctx) { return new ItaniumCXXABI(Ctx); } + +std::unique_ptr<MangleNumberingContext> +clang::createItaniumNumberingContext(MangleContext *Mangler) { + return std::make_unique<ItaniumNumberingContext>( + cast<ItaniumMangleContext>(Mangler)); +} diff --git a/clang/lib/AST/ItaniumMangle.cpp b/clang/lib/AST/ItaniumMangle.cpp index 6c8d5687c64a..5cad84a96845 100644 --- a/clang/lib/AST/ItaniumMangle.cpp +++ b/clang/lib/AST/ItaniumMangle.cpp @@ -125,6 +125,8 @@ class ItaniumMangleContextImpl : public ItaniumMangleContext { llvm::DenseMap<DiscriminatorKeyTy, unsigned> Discriminator; llvm::DenseMap<const NamedDecl*, unsigned> Uniquifier; + bool IsDevCtx = false; + public: explicit ItaniumMangleContextImpl(ASTContext &Context, DiagnosticsEngine &Diags) @@ -137,6 +139,10 @@ public: bool shouldMangleStringLiteral(const StringLiteral *) override { return false; } + + bool isDeviceMangleContext() const override { return IsDevCtx; } + void setDeviceMangleContext(bool IsDev) override { IsDevCtx = IsDev; } + void mangleCXXName(GlobalDecl GD, raw_ostream &) override; void mangleThunk(const CXXMethodDecl *MD, const ThunkInfo &Thunk, raw_ostream &) override; @@ -546,8 +552,8 @@ private: unsigned knownArity); void mangleCastExpression(const Expr *E, StringRef CastEncoding); void mangleInitListElements(const InitListExpr *InitList); - void mangleDeclRefExpr(const NamedDecl *D); - void mangleExpression(const Expr *E, unsigned Arity = UnknownArity); + void mangleExpression(const Expr *E, unsigned Arity = UnknownArity, + bool AsTemplateArg = false); void mangleCXXCtorType(CXXCtorType T, const CXXRecordDecl *InheritedFrom); void mangleCXXDtorType(CXXDtorType T); @@ -558,6 +564,7 @@ private: unsigned NumTemplateArgs); void mangleTemplateArgs(TemplateName TN, const TemplateArgumentList &AL); void mangleTemplateArg(TemplateArgument A, bool NeedExactType); + void mangleTemplateArgExpr(const Expr *E); void mangleValueInTemplateArg(QualType T, const APValue &V, bool TopLevel, bool NeedExactType = false); @@ -726,9 +733,17 @@ void CXXNameMangler::mangleFunctionEncodingBareType(const FunctionDecl *FD) { EnableIfAttr *EIA = dyn_cast<EnableIfAttr>(*I); if (!EIA) continue; - Out << 'X'; - mangleExpression(EIA->getCond()); - Out << 'E'; + if (Context.getASTContext().getLangOpts().getClangABICompat() > + LangOptions::ClangABI::Ver11) { + mangleTemplateArgExpr(EIA->getCond()); + } else { + // Prior to Clang 12, we hardcoded the X/E around enable-if's argument, + // even though <template-arg> should not include an X/E around + // <expr-primary>. + Out << 'X'; + mangleExpression(EIA->getCond()); + Out << 'E'; + } } Out << 'E'; FunctionTypeDepth.pop(Saved); @@ -1837,7 +1852,15 @@ void CXXNameMangler::mangleLambda(const CXXRecordDecl *Lambda) { // (in lexical order) with that same <lambda-sig> and context. // // The AST keeps track of the number for us. - unsigned Number = Lambda->getLambdaManglingNumber(); + // + // In CUDA/HIP, to ensure the consistent lamba numbering between the device- + // and host-side compilations, an extra device mangle context may be created + // if the host-side CXX ABI has different numbering for lambda. In such case, + // if the mangle context is that device-side one, use the device-side lambda + // mangling number for this lambda. + unsigned Number = Context.isDeviceMangleContext() + ? Lambda->getDeviceLambdaManglingNumber() + : Lambda->getLambdaManglingNumber(); assert(Number > 0 && "Lambda should be mangled as an unnamed class"); if (Number > 1) mangleNumber(Number - 2); @@ -3528,8 +3551,8 @@ void CXXNameMangler::mangleType(const DependentSizedMatrixType *T) { Out << "u" << VendorQualifier.size() << VendorQualifier; Out << "I"; - mangleTemplateArg(T->getRowExpr(), false); - mangleTemplateArg(T->getColumnExpr(), false); + mangleTemplateArgExpr(T->getRowExpr()); + mangleTemplateArgExpr(T->getColumnExpr()); mangleType(T->getElementType()); Out << "E"; } @@ -3871,33 +3894,8 @@ void CXXNameMangler::mangleInitListElements(const InitListExpr *InitList) { mangleExpression(InitList->getInit(i)); } -void CXXNameMangler::mangleDeclRefExpr(const NamedDecl *D) { - switch (D->getKind()) { - default: - // <expr-primary> ::= L <mangled-name> E # external name - Out << 'L'; - mangle(D); - Out << 'E'; - break; - - case Decl::ParmVar: - mangleFunctionParam(cast<ParmVarDecl>(D)); - break; - - case Decl::EnumConstant: { - const EnumConstantDecl *ED = cast<EnumConstantDecl>(D); - mangleIntegerLiteral(ED->getType(), ED->getInitVal()); - break; - } - - case Decl::NonTypeTemplateParm: - const NonTypeTemplateParmDecl *PD = cast<NonTypeTemplateParmDecl>(D); - mangleTemplateParameter(PD->getDepth(), PD->getIndex()); - break; - } -} - -void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity) { +void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity, + bool AsTemplateArg) { // <expression> ::= <unary operator-name> <expression> // ::= <binary operator-name> <expression> <expression> // ::= <trinary operator-name> <expression> <expression> <expression> @@ -3911,18 +3909,64 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity) { // ::= at <type> # alignof (a type) // ::= <template-param> // ::= <function-param> + // ::= fpT # 'this' expression (part of <function-param>) // ::= sr <type> <unqualified-name> # dependent name // ::= sr <type> <unqualified-name> <template-args> # dependent template-id // ::= ds <expression> <expression> # expr.*expr // ::= sZ <template-param> # size of a parameter pack // ::= sZ <function-param> # size of a function parameter pack + // ::= u <source-name> <template-arg>* E # vendor extended expression // ::= <expr-primary> // <expr-primary> ::= L <type> <value number> E # integer literal - // ::= L <type <value float> E # floating literal + // ::= L <type> <value float> E # floating literal + // ::= L <type> <string type> E # string literal + // ::= L <nullptr type> E # nullptr literal "LDnE" + // ::= L <pointer type> 0 E # null pointer template argument + // ::= L <type> <real-part float> _ <imag-part float> E # complex floating point literal (C99); not used by clang // ::= L <mangled-name> E # external name - // ::= fpT # 'this' expression QualType ImplicitlyConvertedToType; + // A top-level expression that's not <expr-primary> needs to be wrapped in + // X...E in a template arg. + bool IsPrimaryExpr = true; + auto NotPrimaryExpr = [&] { + if (AsTemplateArg && IsPrimaryExpr) + Out << 'X'; + IsPrimaryExpr = false; + }; + + auto MangleDeclRefExpr = [&](const NamedDecl *D) { + switch (D->getKind()) { + default: + // <expr-primary> ::= L <mangled-name> E # external name + Out << 'L'; + mangle(D); + Out << 'E'; + break; + + case Decl::ParmVar: + NotPrimaryExpr(); + mangleFunctionParam(cast<ParmVarDecl>(D)); + break; + + case Decl::EnumConstant: { + // <expr-primary> + const EnumConstantDecl *ED = cast<EnumConstantDecl>(D); + mangleIntegerLiteral(ED->getType(), ED->getInitVal()); + break; + } + + case Decl::NonTypeTemplateParm: + NotPrimaryExpr(); + const NonTypeTemplateParmDecl *PD = cast<NonTypeTemplateParmDecl>(D); + mangleTemplateParameter(PD->getDepth(), PD->getIndex()); + break; + } + }; + + // 'goto recurse' is used when handling a simple "unwrapping" node which + // produces no output, where ImplicitlyConvertedToType and AsTemplateArg need + // to be preserved. recurse: switch (E->getStmtClass()) { case Expr::NoStmtClass: @@ -3994,6 +4038,7 @@ recurse: case Expr::SourceLocExprClass: case Expr::BuiltinBitCastExprClass: { + NotPrimaryExpr(); if (!NullOut) { // As bad as this diagnostic is, it's better than crashing. DiagnosticsEngine &Diags = Context.getDiags(); @@ -4001,33 +4046,48 @@ recurse: "cannot yet mangle expression type %0"); Diags.Report(E->getExprLoc(), DiagID) << E->getStmtClassName() << E->getSourceRange(); + return; } break; } case Expr::CXXUuidofExprClass: { + NotPrimaryExpr(); const CXXUuidofExpr *UE = cast<CXXUuidofExpr>(E); - if (UE->isTypeOperand()) { - QualType UuidT = UE->getTypeOperand(Context.getASTContext()); - Out << "u8__uuidoft"; - mangleType(UuidT); + // As of clang 12, uuidof uses the vendor extended expression + // mangling. Previously, it used a special-cased nonstandard extension. + if (Context.getASTContext().getLangOpts().getClangABICompat() > + LangOptions::ClangABI::Ver11) { + Out << "u8__uuidof"; + if (UE->isTypeOperand()) + mangleType(UE->getTypeOperand(Context.getASTContext())); + else + mangleTemplateArgExpr(UE->getExprOperand()); + Out << 'E'; } else { - Expr *UuidExp = UE->getExprOperand(); - Out << "u8__uuidofz"; - mangleExpression(UuidExp, Arity); + if (UE->isTypeOperand()) { + QualType UuidT = UE->getTypeOperand(Context.getASTContext()); + Out << "u8__uuidoft"; + mangleType(UuidT); + } else { + Expr *UuidExp = UE->getExprOperand(); + Out << "u8__uuidofz"; + mangleExpression(UuidExp); + } } break; } // Even gcc-4.5 doesn't mangle this. case Expr::BinaryConditionalOperatorClass: { + NotPrimaryExpr(); DiagnosticsEngine &Diags = Context.getDiags(); unsigned DiagID = Diags.getCustomDiagID(DiagnosticsEngine::Error, "?: operator with omitted middle operand cannot be mangled"); Diags.Report(E->getExprLoc(), DiagID) << E->getStmtClassName() << E->getSourceRange(); - break; + return; } // These are used for internal purposes and cannot be meaningfully mangled. @@ -4035,6 +4095,7 @@ recurse: llvm_unreachable("cannot mangle opaque value; mangling wrong thing?"); case Expr::InitListExprClass: { + NotPrimaryExpr(); Out << "il"; mangleInitListElements(cast<InitListExpr>(E)); Out << "E"; @@ -4042,6 +4103,7 @@ recurse: } case Expr::DesignatedInitExprClass: { + NotPrimaryExpr(); auto *DIE = cast<DesignatedInitExpr>(E); for (const auto &Designator : DIE->designators()) { if (Designator.isFieldDesignator()) { @@ -4063,27 +4125,27 @@ recurse: } case Expr::CXXDefaultArgExprClass: - mangleExpression(cast<CXXDefaultArgExpr>(E)->getExpr(), Arity); - break; + E = cast<CXXDefaultArgExpr>(E)->getExpr(); + goto recurse; case Expr::CXXDefaultInitExprClass: - mangleExpression(cast<CXXDefaultInitExpr>(E)->getExpr(), Arity); - break; + E = cast<CXXDefaultInitExpr>(E)->getExpr(); + goto recurse; case Expr::CXXStdInitializerListExprClass: - mangleExpression(cast<CXXStdInitializerListExpr>(E)->getSubExpr(), Arity); - break; + E = cast<CXXStdInitializerListExpr>(E)->getSubExpr(); + goto recurse; case Expr::SubstNonTypeTemplateParmExprClass: - mangleExpression(cast<SubstNonTypeTemplateParmExpr>(E)->getReplacement(), - Arity); - break; + E = cast<SubstNonTypeTemplateParmExpr>(E)->getReplacement(); + goto recurse; case Expr::UserDefinedLiteralClass: // We follow g++'s approach of mangling a UDL as a call to the literal // operator. case Expr::CXXMemberCallExprClass: // fallthrough case Expr::CallExprClass: { + NotPrimaryExpr(); const CallExpr *CE = cast<CallExpr>(E); // <expression> ::= cp <simple-id> <expression>* E @@ -4114,6 +4176,7 @@ recurse: } case Expr::CXXNewExprClass: { + NotPrimaryExpr(); const CXXNewExpr *New = cast<CXXNewExpr>(E); if (New->isGlobalNew()) Out << "gs"; Out << (New->isArray() ? "na" : "nw"); @@ -4149,6 +4212,7 @@ recurse: } case Expr::CXXPseudoDestructorExprClass: { + NotPrimaryExpr(); const auto *PDE = cast<CXXPseudoDestructorExpr>(E); if (const Expr *Base = PDE->getBase()) mangleMemberExprBase(Base, PDE->isArrow()); @@ -4175,6 +4239,7 @@ recurse: } case Expr::MemberExprClass: { + NotPrimaryExpr(); const MemberExpr *ME = cast<MemberExpr>(E); mangleMemberExpr(ME->getBase(), ME->isArrow(), ME->getQualifier(), nullptr, @@ -4185,6 +4250,7 @@ recurse: } case Expr::UnresolvedMemberExprClass: { + NotPrimaryExpr(); const UnresolvedMemberExpr *ME = cast<UnresolvedMemberExpr>(E); mangleMemberExpr(ME->isImplicitAccess() ? nullptr : ME->getBase(), ME->isArrow(), ME->getQualifier(), nullptr, @@ -4195,6 +4261,7 @@ recurse: } case Expr::CXXDependentScopeMemberExprClass: { + NotPrimaryExpr(); const CXXDependentScopeMemberExpr *ME = cast<CXXDependentScopeMemberExpr>(E); mangleMemberExpr(ME->isImplicitAccess() ? nullptr : ME->getBase(), @@ -4207,6 +4274,7 @@ recurse: } case Expr::UnresolvedLookupExprClass: { + NotPrimaryExpr(); const UnresolvedLookupExpr *ULE = cast<UnresolvedLookupExpr>(E); mangleUnresolvedName(ULE->getQualifier(), ULE->getName(), ULE->getTemplateArgs(), ULE->getNumTemplateArgs(), @@ -4215,6 +4283,7 @@ recurse: } case Expr::CXXUnresolvedConstructExprClass: { + NotPrimaryExpr(); const CXXUnresolvedConstructExpr *CE = cast<CXXUnresolvedConstructExpr>(E); unsigned N = CE->getNumArgs(); @@ -4225,7 +4294,7 @@ recurse: mangleType(CE->getType()); mangleInitListElements(IL); Out << "E"; - return; + break; } Out << "cv"; @@ -4237,14 +4306,17 @@ recurse: } case Expr::CXXConstructExprClass: { + // An implicit cast is silent, thus may contain <expr-primary>. const auto *CE = cast<CXXConstructExpr>(E); if (!CE->isListInitialization() || CE->isStdInitListInitialization()) { assert( CE->getNumArgs() >= 1 && (CE->getNumArgs() == 1 || isa<CXXDefaultArgExpr>(CE->getArg(1))) && "implicit CXXConstructExpr must have one argument"); - return mangleExpression(cast<CXXConstructExpr>(E)->getArg(0)); + E = cast<CXXConstructExpr>(E)->getArg(0); + goto recurse; } + NotPrimaryExpr(); Out << "il"; for (auto *E : CE->arguments()) mangleExpression(E); @@ -4253,6 +4325,7 @@ recurse: } case Expr::CXXTemporaryObjectExprClass: { + NotPrimaryExpr(); const auto *CE = cast<CXXTemporaryObjectExpr>(E); unsigned N = CE->getNumArgs(); bool List = CE->isListInitialization(); @@ -4282,17 +4355,20 @@ recurse: } case Expr::CXXScalarValueInitExprClass: + NotPrimaryExpr(); Out << "cv"; mangleType(E->getType()); Out << "_E"; break; case Expr::CXXNoexceptExprClass: + NotPrimaryExpr(); Out << "nx"; mangleExpression(cast<CXXNoexceptExpr>(E)->getOperand()); break; case Expr::UnaryExprOrTypeTraitExprClass: { + // Non-instantiation-dependent traits are an <expr-primary> integer literal. const UnaryExprOrTypeTraitExpr *SAE = cast<UnaryExprOrTypeTraitExpr>(E); if (!SAE->isInstantiationDependent()) { @@ -4312,13 +4388,41 @@ recurse: break; } + NotPrimaryExpr(); // But otherwise, they are not. + + auto MangleAlignofSizeofArg = [&] { + if (SAE->isArgumentType()) { + Out << 't'; + mangleType(SAE->getArgumentType()); + } else { + Out << 'z'; + mangleExpression(SAE->getArgumentExpr()); + } + }; + switch(SAE->getKind()) { case UETT_SizeOf: Out << 's'; + MangleAlignofSizeofArg(); break; case UETT_PreferredAlignOf: + // As of clang 12, we mangle __alignof__ differently than alignof. (They + // have acted differently since Clang 8, but were previously mangled the + // same.) + if (Context.getASTContext().getLangOpts().getClangABICompat() > + LangOptions::ClangABI::Ver11) { + Out << "u11__alignof__"; + if (SAE->isArgumentType()) + mangleType(SAE->getArgumentType()); + else + mangleTemplateArgExpr(SAE->getArgumentExpr()); + Out << 'E'; + break; + } + LLVM_FALLTHROUGH; case UETT_AlignOf: Out << 'a'; + MangleAlignofSizeofArg(); break; case UETT_VecStep: { DiagnosticsEngine &Diags = Context.getDiags(); @@ -4336,17 +4440,11 @@ recurse: return; } } - if (SAE->isArgumentType()) { - Out << 't'; - mangleType(SAE->getArgumentType()); - } else { - Out << 'z'; - mangleExpression(SAE->getArgumentExpr()); - } break; } case Expr::CXXThrowExprClass: { + NotPrimaryExpr(); const CXXThrowExpr *TE = cast<CXXThrowExpr>(E); // <expression> ::= tw <expression> # throw expression // ::= tr # rethrow @@ -4360,6 +4458,7 @@ recurse: } case Expr::CXXTypeidExprClass: { + NotPrimaryExpr(); const CXXTypeidExpr *TIE = cast<CXXTypeidExpr>(E); // <expression> ::= ti <type> # typeid (type) // ::= te <expression> # typeid (expression) @@ -4374,6 +4473,7 @@ recurse: } case Expr::CXXDeleteExprClass: { + NotPrimaryExpr(); const CXXDeleteExpr *DE = cast<CXXDeleteExpr>(E); // <expression> ::= [gs] dl <expression> # [::] delete expr // ::= [gs] da <expression> # [::] delete [] expr @@ -4384,6 +4484,7 @@ recurse: } case Expr::UnaryOperatorClass: { + NotPrimaryExpr(); const UnaryOperator *UO = cast<UnaryOperator>(E); mangleOperatorName(UnaryOperator::getOverloadedOperator(UO->getOpcode()), /*Arity=*/1); @@ -4392,6 +4493,7 @@ recurse: } case Expr::ArraySubscriptExprClass: { + NotPrimaryExpr(); const ArraySubscriptExpr *AE = cast<ArraySubscriptExpr>(E); // Array subscript is treated as a syntactically weird form of @@ -4403,6 +4505,7 @@ recurse: } case Expr::MatrixSubscriptExprClass: { + NotPrimaryExpr(); const MatrixSubscriptExpr *ME = cast<MatrixSubscriptExpr>(E); Out << "ixix"; mangleExpression(ME->getBase()); @@ -4413,6 +4516,7 @@ recurse: case Expr::CompoundAssignOperatorClass: // fallthrough case Expr::BinaryOperatorClass: { + NotPrimaryExpr(); const BinaryOperator *BO = cast<BinaryOperator>(E); if (BO->getOpcode() == BO_PtrMemD) Out << "ds"; @@ -4425,6 +4529,7 @@ recurse: } case Expr::CXXRewrittenBinaryOperatorClass: { + NotPrimaryExpr(); // The mangled form represents the original syntax. CXXRewrittenBinaryOperator::DecomposedForm Decomposed = cast<CXXRewrittenBinaryOperator>(E)->getDecomposedForm(); @@ -4436,6 +4541,7 @@ recurse: } case Expr::ConditionalOperatorClass: { + NotPrimaryExpr(); const ConditionalOperator *CO = cast<ConditionalOperator>(E); mangleOperatorName(OO_Conditional, /*Arity=*/3); mangleExpression(CO->getCond()); @@ -4451,19 +4557,22 @@ recurse: } case Expr::ObjCBridgedCastExprClass: { + NotPrimaryExpr(); // Mangle ownership casts as a vendor extended operator __bridge, // __bridge_transfer, or __bridge_retain. StringRef Kind = cast<ObjCBridgedCastExpr>(E)->getBridgeKindName(); Out << "v1U" << Kind.size() << Kind; + mangleCastExpression(E, "cv"); + break; } - // Fall through to mangle the cast itself. - LLVM_FALLTHROUGH; case Expr::CStyleCastExprClass: + NotPrimaryExpr(); mangleCastExpression(E, "cv"); break; case Expr::CXXFunctionalCastExprClass: { + NotPrimaryExpr(); auto *Sub = cast<ExplicitCastExpr>(E)->getSubExpr()->IgnoreImplicit(); // FIXME: Add isImplicit to CXXConstructExpr. if (auto *CCE = dyn_cast<CXXConstructExpr>(Sub)) @@ -4483,22 +4592,28 @@ recurse: } case Expr::CXXStaticCastExprClass: + NotPrimaryExpr(); mangleCastExpression(E, "sc"); break; case Expr::CXXDynamicCastExprClass: + NotPrimaryExpr(); mangleCastExpression(E, "dc"); break; case Expr::CXXReinterpretCastExprClass: + NotPrimaryExpr(); mangleCastExpression(E, "rc"); break; case Expr::CXXConstCastExprClass: + NotPrimaryExpr(); mangleCastExpression(E, "cc"); break; case Expr::CXXAddrspaceCastExprClass: + NotPrimaryExpr(); mangleCastExpression(E, "ac"); break; case Expr::CXXOperatorCallExprClass: { + NotPrimaryExpr(); const CXXOperatorCallExpr *CE = cast<CXXOperatorCallExpr>(E); unsigned NumArgs = CE->getNumArgs(); // A CXXOperatorCallExpr for OO_Arrow models only semantics, not syntax @@ -4512,9 +4627,8 @@ recurse: } case Expr::ParenExprClass: - mangleExpression(cast<ParenExpr>(E)->getSubExpr(), Arity); - break; - + E = cast<ParenExpr>(E)->getSubExpr(); + goto recurse; case Expr::ConceptSpecializationExprClass: { // <expr-primary> ::= L <mangled-name> E # external name @@ -4528,10 +4642,12 @@ recurse: } case Expr::DeclRefExprClass: - mangleDeclRefExpr(cast<DeclRefExpr>(E)->getDecl()); + // MangleDeclRefExpr helper handles primary-vs-nonprimary + MangleDeclRefExpr(cast<DeclRefExpr>(E)->getDecl()); break; case Expr::SubstNonTypeTemplateParmPackExprClass: + NotPrimaryExpr(); // FIXME: not clear how to mangle this! // template <unsigned N...> class A { // template <class U...> void foo(U (&x)[N]...); @@ -4540,14 +4656,16 @@ recurse: break; case Expr::FunctionParmPackExprClass: { + NotPrimaryExpr(); // FIXME: not clear how to mangle this! const FunctionParmPackExpr *FPPE = cast<FunctionParmPackExpr>(E); Out << "v110_SUBSTPACK"; - mangleDeclRefExpr(FPPE->getParameterPack()); + MangleDeclRefExpr(FPPE->getParameterPack()); break; } case Expr::DependentScopeDeclRefExprClass: { + NotPrimaryExpr(); const DependentScopeDeclRefExpr *DRE = cast<DependentScopeDeclRefExpr>(E); mangleUnresolvedName(DRE->getQualifier(), DRE->getDeclName(), DRE->getTemplateArgs(), DRE->getNumTemplateArgs(), @@ -4556,24 +4674,27 @@ recurse: } case Expr::CXXBindTemporaryExprClass: - mangleExpression(cast<CXXBindTemporaryExpr>(E)->getSubExpr()); - break; + E = cast<CXXBindTemporaryExpr>(E)->getSubExpr(); + goto recurse; case Expr::ExprWithCleanupsClass: - mangleExpression(cast<ExprWithCleanups>(E)->getSubExpr(), Arity); - break; + E = cast<ExprWithCleanups>(E)->getSubExpr(); + goto recurse; case Expr::FloatingLiteralClass: { + // <expr-primary> const FloatingLiteral *FL = cast<FloatingLiteral>(E); mangleFloatLiteral(FL->getType(), FL->getValue()); break; } case Expr::FixedPointLiteralClass: + // Currently unimplemented -- might be <expr-primary> in future? mangleFixedPointLiteral(); break; case Expr::CharacterLiteralClass: + // <expr-primary> Out << 'L'; mangleType(E->getType()); Out << cast<CharacterLiteral>(E)->getValue(); @@ -4582,18 +4703,21 @@ recurse: // FIXME. __objc_yes/__objc_no are mangled same as true/false case Expr::ObjCBoolLiteralExprClass: + // <expr-primary> Out << "Lb"; Out << (cast<ObjCBoolLiteralExpr>(E)->getValue() ? '1' : '0'); Out << 'E'; break; case Expr::CXXBoolLiteralExprClass: + // <expr-primary> Out << "Lb"; Out << (cast<CXXBoolLiteralExpr>(E)->getValue() ? '1' : '0'); Out << 'E'; break; case Expr::IntegerLiteralClass: { + // <expr-primary> llvm::APSInt Value(cast<IntegerLiteral>(E)->getValue()); if (E->getType()->isSignedIntegerType()) Value.setIsSigned(true); @@ -4602,6 +4726,7 @@ recurse: } case Expr::ImaginaryLiteralClass: { + // <expr-primary> const ImaginaryLiteral *IE = cast<ImaginaryLiteral>(E); // Mangle as if a complex literal. // Proposal from David Vandevoorde, 2010.06.30. @@ -4625,6 +4750,7 @@ recurse: } case Expr::StringLiteralClass: { + // <expr-primary> // Revised proposal from David Vandervoorde, 2010.07.15. Out << 'L'; assert(isa<ConstantArrayType>(E->getType())); @@ -4634,21 +4760,25 @@ recurse: } case Expr::GNUNullExprClass: + // <expr-primary> // Mangle as if an integer literal 0. mangleIntegerLiteral(E->getType(), llvm::APSInt(32)); break; case Expr::CXXNullPtrLiteralExprClass: { + // <expr-primary> Out << "LDnE"; break; } case Expr::PackExpansionExprClass: + NotPrimaryExpr(); Out << "sp"; mangleExpression(cast<PackExpansionExpr>(E)->getPattern()); break; case Expr::SizeOfPackExprClass: { + NotPrimaryExpr(); auto *SPE = cast<SizeOfPackExpr>(E); if (SPE->isPartiallySubstituted()) { Out << "sP"; @@ -4673,12 +4803,12 @@ recurse: break; } - case Expr::MaterializeTemporaryExprClass: { - mangleExpression(cast<MaterializeTemporaryExpr>(E)->getSubExpr()); - break; - } + case Expr::MaterializeTemporaryExprClass: + E = cast<MaterializeTemporaryExpr>(E)->getSubExpr(); + goto recurse; case Expr::CXXFoldExprClass: { + NotPrimaryExpr(); auto *FE = cast<CXXFoldExpr>(E); if (FE->isLeftFold()) Out << (FE->getInit() ? "fL" : "fl"); @@ -4700,27 +4830,34 @@ recurse: } case Expr::CXXThisExprClass: + NotPrimaryExpr(); Out << "fpT"; break; case Expr::CoawaitExprClass: // FIXME: Propose a non-vendor mangling. + NotPrimaryExpr(); Out << "v18co_await"; mangleExpression(cast<CoawaitExpr>(E)->getOperand()); break; case Expr::DependentCoawaitExprClass: // FIXME: Propose a non-vendor mangling. + NotPrimaryExpr(); Out << "v18co_await"; mangleExpression(cast<DependentCoawaitExpr>(E)->getOperand()); break; case Expr::CoyieldExprClass: // FIXME: Propose a non-vendor mangling. + NotPrimaryExpr(); Out << "v18co_yield"; mangleExpression(cast<CoawaitExpr>(E)->getOperand()); break; } + + if (AsTemplateArg && !IsPrimaryExpr) + Out << 'E'; } /// Mangle an expression which refers to a parameter variable. @@ -4970,26 +5107,9 @@ void CXXNameMangler::mangleTemplateArg(TemplateArgument A, bool NeedExactType) { Out << "Dp"; mangleType(A.getAsTemplateOrTemplatePattern()); break; - case TemplateArgument::Expression: { - // It's possible to end up with a DeclRefExpr here in certain - // dependent cases, in which case we should mangle as a - // declaration. - const Expr *E = A.getAsExpr()->IgnoreParenImpCasts(); - if (const DeclRefExpr *DRE = dyn_cast<DeclRefExpr>(E)) { - const ValueDecl *D = DRE->getDecl(); - if (isa<VarDecl>(D) || isa<FunctionDecl>(D)) { - Out << 'L'; - mangle(D); - Out << 'E'; - break; - } - } - - Out << 'X'; - mangleExpression(E); - Out << 'E'; + case TemplateArgument::Expression: + mangleTemplateArgExpr(A.getAsExpr()); break; - } case TemplateArgument::Integral: mangleIntegerLiteral(A.getIntegralType(), A.getAsIntegral()); break; @@ -5044,6 +5164,38 @@ void CXXNameMangler::mangleTemplateArg(TemplateArgument A, bool NeedExactType) { } } +void CXXNameMangler::mangleTemplateArgExpr(const Expr *E) { + ASTContext &Ctx = Context.getASTContext(); + if (Ctx.getLangOpts().getClangABICompat() > LangOptions::ClangABI::Ver11) { + mangleExpression(E, UnknownArity, /*AsTemplateArg=*/true); + return; + } + + // Prior to Clang 12, we didn't omit the X .. E around <expr-primary> + // correctly in cases where the template argument was + // constructed from an expression rather than an already-evaluated + // literal. In such a case, we would then e.g. emit 'XLi0EE' instead of + // 'Li0E'. + // + // We did special-case DeclRefExpr to attempt to DTRT for that one + // expression-kind, but while doing so, unfortunately handled ParmVarDecl + // (subtype of VarDecl) _incorrectly_, and emitted 'L_Z .. E' instead of + // the proper 'Xfp_E'. + E = E->IgnoreParenImpCasts(); + if (const DeclRefExpr *DRE = dyn_cast<DeclRefExpr>(E)) { + const ValueDecl *D = DRE->getDecl(); + if (isa<VarDecl>(D) || isa<FunctionDecl>(D)) { + Out << 'L'; + mangle(D); + Out << 'E'; + return; + } + } + Out << 'X'; + mangleExpression(E); + Out << 'E'; +} + /// Determine whether a given value is equivalent to zero-initialization for /// the purpose of discarding a trailing portion of a 'tl' mangling. /// diff --git a/clang/lib/AST/MicrosoftCXXABI.cpp b/clang/lib/AST/MicrosoftCXXABI.cpp index f9f9fe985b6f..166aa3b3bd60 100644 --- a/clang/lib/AST/MicrosoftCXXABI.cpp +++ b/clang/lib/AST/MicrosoftCXXABI.cpp @@ -16,6 +16,7 @@ #include "clang/AST/Attr.h" #include "clang/AST/CXXInheritance.h" #include "clang/AST/DeclCXX.h" +#include "clang/AST/Mangle.h" #include "clang/AST/MangleNumberingContext.h" #include "clang/AST/RecordLayout.h" #include "clang/AST/Type.h" @@ -64,6 +65,19 @@ public: } }; +class MSHIPNumberingContext : public MicrosoftNumberingContext { + std::unique_ptr<MangleNumberingContext> DeviceCtx; + +public: + MSHIPNumberingContext(MangleContext *DeviceMangler) { + DeviceCtx = createItaniumNumberingContext(DeviceMangler); + } + + unsigned getDeviceManglingNumber(const CXXMethodDecl *CallOperator) override { + return DeviceCtx->getManglingNumber(CallOperator); + } +}; + class MicrosoftCXXABI : public CXXABI { ASTContext &Context; llvm::SmallDenseMap<CXXRecordDecl *, CXXConstructorDecl *> RecordToCopyCtor; @@ -73,8 +87,20 @@ class MicrosoftCXXABI : public CXXABI { llvm::SmallDenseMap<TagDecl *, TypedefNameDecl *> UnnamedTagDeclToTypedefNameDecl; + // MangleContext for device numbering context, which is based on Itanium C++ + // ABI. + std::unique_ptr<MangleContext> DeviceMangler; + public: - MicrosoftCXXABI(ASTContext &Ctx) : Context(Ctx) { } + MicrosoftCXXABI(ASTContext &Ctx) : Context(Ctx) { + if (Context.getLangOpts().CUDA && Context.getAuxTargetInfo()) { + assert(Context.getTargetInfo().getCXXABI().isMicrosoft() && + Context.getAuxTargetInfo()->getCXXABI().isItaniumFamily() && + "Unexpected combination of C++ ABIs."); + DeviceMangler.reset( + Context.createMangleContext(Context.getAuxTargetInfo())); + } + } MemberPointerInfo getMemberPointerInfo(const MemberPointerType *MPT) const override; @@ -133,6 +159,10 @@ public: std::unique_ptr<MangleNumberingContext> createMangleNumberingContext() const override { + if (Context.getLangOpts().CUDA && Context.getAuxTargetInfo()) { + assert(DeviceMangler && "Missing device mangler"); + return std::make_unique<MSHIPNumberingContext>(DeviceMangler.get()); + } return std::make_unique<MicrosoftNumberingContext>(); } }; @@ -266,4 +296,3 @@ CXXABI::MemberPointerInfo MicrosoftCXXABI::getMemberPointerInfo( CXXABI *clang::CreateMicrosoftCXXABI(ASTContext &Ctx) { return new MicrosoftCXXABI(Ctx); } - diff --git a/clang/lib/ASTMatchers/ASTMatchFinder.cpp b/clang/lib/ASTMatchers/ASTMatchFinder.cpp index 8ddd3c87e09d..69957a952d17 100644 --- a/clang/lib/ASTMatchers/ASTMatchFinder.cpp +++ b/clang/lib/ASTMatchers/ASTMatchFinder.cpp @@ -243,10 +243,14 @@ public: return true; ScopedIncrement ScopedDepth(&CurrentDepth); if (auto *Init = Node->getInit()) - if (!match(*Init)) + if (!traverse(*Init)) return false; - if (!match(*Node->getLoopVariable()) || !match(*Node->getRangeInit()) || - !match(*Node->getBody())) + if (!match(*Node->getLoopVariable())) + return false; + if (match(*Node->getRangeInit())) + if (!VisitorBase::TraverseStmt(Node->getRangeInit())) + return false; + if (!match(*Node->getBody())) return false; return VisitorBase::TraverseStmt(Node->getBody()); } @@ -291,7 +295,7 @@ public: if (!match(*Node->getBody())) return false; - return true; + return VisitorBase::TraverseStmt(Node->getBody()); } bool shouldVisitTemplateInstantiations() const { return true; } @@ -488,15 +492,21 @@ public: bool dataTraverseNode(Stmt *S, DataRecursionQueue *Queue) { if (auto *RF = dyn_cast<CXXForRangeStmt>(S)) { - for (auto *SubStmt : RF->children()) { - if (SubStmt == RF->getInit() || SubStmt == RF->getLoopVarStmt() || - SubStmt == RF->getRangeInit() || SubStmt == RF->getBody()) { - TraverseStmt(SubStmt, Queue); - } else { - ASTNodeNotSpelledInSourceScope RAII(this, true); - TraverseStmt(SubStmt, Queue); + { + ASTNodeNotAsIsSourceScope RAII(this, true); + TraverseStmt(RF->getInit()); + // Don't traverse under the loop variable + match(*RF->getLoopVariable()); + TraverseStmt(RF->getRangeInit()); + } + { + ASTNodeNotSpelledInSourceScope RAII(this, true); + for (auto *SubStmt : RF->children()) { + if (SubStmt != RF->getBody()) + TraverseStmt(SubStmt); } } + TraverseStmt(RF->getBody()); return true; } else if (auto *RBO = dyn_cast<CXXRewrittenBinaryOperator>(S)) { { @@ -556,9 +566,9 @@ public: if (LE->hasExplicitResultType()) TraverseTypeLoc(Proto.getReturnLoc()); TraverseStmt(LE->getTrailingRequiresClause()); - - TraverseStmt(LE->getBody()); } + + TraverseStmt(LE->getBody()); return true; } return RecursiveASTVisitor<MatchASTVisitor>::dataTraverseNode(S, Queue); @@ -697,6 +707,10 @@ public: bool shouldVisitTemplateInstantiations() const { return true; } bool shouldVisitImplicitCode() const { return true; } + // We visit the lambda body explicitly, so instruct the RAV + // to not visit it on our behalf too. + bool shouldVisitLambdaBody() const { return false; } + bool IsMatchingInASTNodeNotSpelledInSource() const override { return TraversingASTNodeNotSpelledInSource; } @@ -823,6 +837,14 @@ private: if (EnableCheckProfiling) Timer.setBucket(&TimeByBucket[MP.second->getID()]); BoundNodesTreeBuilder Builder; + + { + TraversalKindScope RAII(getASTContext(), MP.first.getTraversalKind()); + if (getASTContext().getParentMapContext().traverseIgnored(DynNode) != + DynNode) + continue; + } + if (MP.first.matches(DynNode, this, &Builder)) { MatchVisitor Visitor(ActiveASTContext, MP.second); Builder.visitMatches(&Visitor); diff --git a/clang/lib/ASTMatchers/ASTMatchersInternal.cpp b/clang/lib/ASTMatchers/ASTMatchersInternal.cpp index 6c7e14e3499a..705f1cdf3153 100644 --- a/clang/lib/ASTMatchers/ASTMatchersInternal.cpp +++ b/clang/lib/ASTMatchers/ASTMatchersInternal.cpp @@ -732,7 +732,7 @@ const internal::VariadicDynCastAllOfMatcher<Decl, TypeAliasDecl> typeAliasDecl; const internal::VariadicDynCastAllOfMatcher<Decl, TypeAliasTemplateDecl> typeAliasTemplateDecl; const internal::VariadicAllOfMatcher<Decl> decl; -const internal::VariadicAllOfMatcher<DecompositionDecl> decompositionDecl; +const internal::VariadicDynCastAllOfMatcher<Decl, DecompositionDecl> decompositionDecl; const internal::VariadicDynCastAllOfMatcher<Decl, LinkageSpecDecl> linkageSpecDecl; const internal::VariadicDynCastAllOfMatcher<Decl, NamedDecl> namedDecl; diff --git a/clang/lib/Basic/ProfileList.cpp b/clang/lib/Basic/ProfileList.cpp index 56bc37a79301..2cb05c1c3c07 100644 --- a/clang/lib/Basic/ProfileList.cpp +++ b/clang/lib/Basic/ProfileList.cpp @@ -82,6 +82,7 @@ static StringRef getSectionName(CodeGenOptions::ProfileInstrKind Kind) { case CodeGenOptions::ProfileCSIRInstr: return "csllvm"; } + llvm_unreachable("Unhandled CodeGenOptions::ProfileInstrKind enum"); } llvm::Optional<bool> diff --git a/clang/lib/Basic/Targets/PPC.cpp b/clang/lib/Basic/Targets/PPC.cpp index cfede6e6e756..ff09c0fa2a23 100644 --- a/clang/lib/Basic/Targets/PPC.cpp +++ b/clang/lib/Basic/Targets/PPC.cpp @@ -318,9 +318,6 @@ bool PPCTargetInfo::initFeatureMap( .Case("pwr9", true) .Case("pwr8", true) .Default(false); - Features["float128"] = llvm::StringSwitch<bool>(CPU) - .Case("pwr9", true) - .Default(false); Features["spe"] = llvm::StringSwitch<bool>(CPU) .Case("8548", true) diff --git a/clang/lib/Basic/Targets/RISCV.cpp b/clang/lib/Basic/Targets/RISCV.cpp index 0bf02e605740..786201ea340d 100644 --- a/clang/lib/Basic/Targets/RISCV.cpp +++ b/clang/lib/Basic/Targets/RISCV.cpp @@ -150,7 +150,7 @@ void RISCVTargetInfo::getTargetDefines(const LangOptions &Opts, } if (HasV) { - Builder.defineMacro("__riscv_v", "1000000"); + Builder.defineMacro("__riscv_v", "10000"); Builder.defineMacro("__riscv_vector"); } @@ -191,10 +191,10 @@ void RISCVTargetInfo::getTargetDefines(const LangOptions &Opts, Builder.defineMacro("__riscv_zfh", "1000"); if (HasZvamo) - Builder.defineMacro("__riscv_zvamo", "1000000"); + Builder.defineMacro("__riscv_zvamo", "10000"); if (HasZvlsseg) - Builder.defineMacro("__riscv_zvlsseg", "1000000"); + Builder.defineMacro("__riscv_zvlsseg", "10000"); } /// Return true if has this feature, need to sync with handleTargetFeatures. diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 113541bd5024..10e3820d9657 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -13794,12 +13794,14 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__builtin_ia32_reduce_fadd_ps512: { Function *F = CGM.getIntrinsic(Intrinsic::vector_reduce_fadd, Ops[1]->getType()); + Builder.getFastMathFlags().setAllowReassoc(true); return Builder.CreateCall(F, {Ops[0], Ops[1]}); } case X86::BI__builtin_ia32_reduce_fmul_pd512: case X86::BI__builtin_ia32_reduce_fmul_ps512: { Function *F = CGM.getIntrinsic(Intrinsic::vector_reduce_fmul, Ops[1]->getType()); + Builder.getFastMathFlags().setAllowReassoc(true); return Builder.CreateCall(F, {Ops[0], Ops[1]}); } case X86::BI__builtin_ia32_reduce_mul_d512: diff --git a/clang/lib/CodeGen/CGCUDANV.cpp b/clang/lib/CodeGen/CGCUDANV.cpp index 33a2d6f4483e..e03631a7243a 100644 --- a/clang/lib/CodeGen/CGCUDANV.cpp +++ b/clang/lib/CodeGen/CGCUDANV.cpp @@ -184,6 +184,14 @@ CGNVCUDARuntime::CGNVCUDARuntime(CodeGenModule &CGM) CharPtrTy = llvm::PointerType::getUnqual(Types.ConvertType(Ctx.CharTy)); VoidPtrTy = cast<llvm::PointerType>(Types.ConvertType(Ctx.VoidPtrTy)); VoidPtrPtrTy = VoidPtrTy->getPointerTo(); + if (CGM.getContext().getAuxTargetInfo()) { + // If the host and device have different C++ ABIs, mark it as the device + // mangle context so that the mangling needs to retrieve the additonal + // device lambda mangling number instead of the regular host one. + DeviceMC->setDeviceMangleContext( + CGM.getContext().getTargetInfo().getCXXABI().isMicrosoft() && + CGM.getContext().getAuxTargetInfo()->getCXXABI().isItaniumFamily()); + } } llvm::FunctionCallee CGNVCUDARuntime::getSetupArgumentFn() const { diff --git a/clang/lib/Driver/ToolChains/Arch/RISCV.cpp b/clang/lib/Driver/ToolChains/Arch/RISCV.cpp index ffae47e5672e..c7f2a3ea5e02 100644 --- a/clang/lib/Driver/ToolChains/Arch/RISCV.cpp +++ b/clang/lib/Driver/ToolChains/Arch/RISCV.cpp @@ -63,7 +63,7 @@ isExperimentalExtension(StringRef Ext) { Ext == "zbr" || Ext == "zbs" || Ext == "zbt" || Ext == "zbproposedc") return RISCVExtensionVersion{"0", "93"}; if (Ext == "v" || Ext == "zvamo" || Ext == "zvlsseg") - return RISCVExtensionVersion{"1", "0"}; + return RISCVExtensionVersion{"0", "10"}; if (Ext == "zfh") return RISCVExtensionVersion{"0", "1"}; return None; diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index fdb8a58cd1b3..f8e637974662 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -4669,20 +4669,14 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, } } - if (Triple.isOSAIX() && Args.hasArg(options::OPT_maltivec)) { - if (Args.getLastArg(options::OPT_mabi_EQ_vec_extabi)) { - CmdArgs.push_back("-mabi=vec-extabi"); - } else { - D.Diag(diag::err_aix_default_altivec_abi); - } - } - if (Arg *A = Args.getLastArg(options::OPT_mabi_EQ_vec_extabi, options::OPT_mabi_EQ_vec_default)) { if (!Triple.isOSAIX()) D.Diag(diag::err_drv_unsupported_opt_for_target) << A->getSpelling() << RawTriple.str(); - if (A->getOption().getID() == options::OPT_mabi_EQ_vec_default) + if (A->getOption().getID() == options::OPT_mabi_EQ_vec_extabi) + CmdArgs.push_back("-mabi=vec-extabi"); + else D.Diag(diag::err_aix_default_altivec_abi); } diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp index 6a95aa5ec628..bcaea71dca94 100644 --- a/clang/lib/Driver/ToolChains/CommonArgs.cpp +++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp @@ -605,6 +605,11 @@ void tools::addLTOOptions(const ToolChain &ToolChain, const ArgList &Args, CmdArgs.push_back("-plugin-opt=new-pass-manager"); } + // Pass an option to enable pseudo probe emission. + if (Args.hasFlag(options::OPT_fpseudo_probe_for_profiling, + options::OPT_fno_pseudo_probe_for_profiling, false)) + CmdArgs.push_back("-plugin-opt=pseudo-probe-for-profiling"); + // Setup statistics file output. SmallString<128> StatsFile = getStatsFileName(Args, Output, Input, D); if (!StatsFile.empty()) diff --git a/clang/lib/Driver/ToolChains/Linux.cpp b/clang/lib/Driver/ToolChains/Linux.cpp index e17a6bd4bdd2..9663a7390ada 100644 --- a/clang/lib/Driver/ToolChains/Linux.cpp +++ b/clang/lib/Driver/ToolChains/Linux.cpp @@ -236,15 +236,6 @@ Linux::Linux(const Driver &D, const llvm::Triple &Triple, const ArgList &Args) ExtraOpts.push_back("relro"); } - if (Triple.isAndroid() && Triple.isAndroidVersionLT(29)) { - // https://github.com/android/ndk/issues/1196 - // The unwinder used by the crash handler on versions of Android prior to - // API 29 did not correctly handle binaries built with rosegment, which is - // enabled by default for LLD. Android only supports LLD, so it's not an - // issue that this flag is not accepted by other linkers. - ExtraOpts.push_back("--no-rosegment"); - } - // Android ARM/AArch64 use max-page-size=4096 to reduce VMA usage. Note, lld // from 11 onwards default max-page-size to 65536 for both ARM and AArch64. if ((Triple.isARM() || Triple.isAArch64()) && Triple.isAndroid()) { diff --git a/clang/lib/Format/UnwrappedLineFormatter.cpp b/clang/lib/Format/UnwrappedLineFormatter.cpp index d1138bbc9c36..5dd0ccdfa6fd 100644 --- a/clang/lib/Format/UnwrappedLineFormatter.cpp +++ b/clang/lib/Format/UnwrappedLineFormatter.cpp @@ -371,7 +371,7 @@ private: if (Previous->is(tok::comment)) Previous = Previous->getPreviousNonComment(); if (Previous) { - if (Previous->is(tok::greater)) + if (Previous->is(tok::greater) && !I[-1]->InPPDirective) return 0; if (Previous->is(tok::identifier)) { const FormatToken *PreviousPrevious = diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp index d8be4ea14868..036388ebd355 100644 --- a/clang/lib/Frontend/CompilerInvocation.cpp +++ b/clang/lib/Frontend/CompilerInvocation.cpp @@ -2470,6 +2470,8 @@ void CompilerInvocation::ParseLangArgs(LangOptions &Opts, ArgList &Args, bool IsTargetSpecified = Opts.OpenMPIsDevice || Args.hasArg(options::OPT_fopenmp_targets_EQ); + Opts.ConvergentFunctions = Opts.ConvergentFunctions || Opts.OpenMPIsDevice; + if (Opts.OpenMP || Opts.OpenMPSimd) { if (int Version = getLastArgIntValue( Args, OPT_fopenmp_version_EQ, diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h index 2ee4350b14d4..f226382cbb2c 100644 --- a/clang/lib/Headers/avx512fintrin.h +++ b/clang/lib/Headers/avx512fintrin.h @@ -9297,9 +9297,12 @@ _mm512_mask_abs_pd(__m512d __W, __mmask8 __K, __m512d __A) /* Vector-reduction arithmetic accepts vectors as inputs and produces scalars as * outputs. This class of vector operation forms the basis of many scientific - * computations. In vector-reduction arithmetic, the evaluation off is + * computations. In vector-reduction arithmetic, the evaluation order is * independent of the order of the input elements of V. + * For floating point types, we always assume the elements are reassociable even + * if -fast-math is off. + * Used bisection method. At each step, we partition the vector with previous * step in half, and the operation is performed on its two halves. * This takes log2(n) steps where n is the number of elements in the vector. @@ -9345,8 +9348,11 @@ _mm512_mask_reduce_or_epi64(__mmask8 __M, __m512i __W) { return __builtin_ia32_reduce_or_q512(__W); } +// -0.0 is used to ignore the start value since it is the neutral value of +// floating point addition. For more information, please refer to +// https://llvm.org/docs/LangRef.html#llvm-vector-reduce-fadd-intrinsic static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_reduce_add_pd(__m512d __W) { - return __builtin_ia32_reduce_fadd_pd512(0.0, __W); + return __builtin_ia32_reduce_fadd_pd512(-0.0, __W); } static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_reduce_mul_pd(__m512d __W) { @@ -9356,7 +9362,7 @@ static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_reduce_mul_pd(__m512d __W) static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_add_pd(__mmask8 __M, __m512d __W) { __W = _mm512_maskz_mov_pd(__M, __W); - return __builtin_ia32_reduce_fadd_pd512(0.0, __W); + return __builtin_ia32_reduce_fadd_pd512(-0.0, __W); } static __inline__ double __DEFAULT_FN_ATTRS512 @@ -9411,7 +9417,7 @@ _mm512_mask_reduce_or_epi32(__mmask16 __M, __m512i __W) { static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_reduce_add_ps(__m512 __W) { - return __builtin_ia32_reduce_fadd_ps512(0.0f, __W); + return __builtin_ia32_reduce_fadd_ps512(-0.0f, __W); } static __inline__ float __DEFAULT_FN_ATTRS512 @@ -9422,7 +9428,7 @@ _mm512_reduce_mul_ps(__m512 __W) { static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_add_ps(__mmask16 __M, __m512 __W) { __W = _mm512_maskz_mov_ps(__M, __W); - return __builtin_ia32_reduce_fadd_ps512(0.0f, __W); + return __builtin_ia32_reduce_fadd_ps512(-0.0f, __W); } static __inline__ float __DEFAULT_FN_ATTRS512 diff --git a/clang/lib/Lex/Preprocessor.cpp b/clang/lib/Lex/Preprocessor.cpp index 94f1ce91f884..177786d90390 100644 --- a/clang/lib/Lex/Preprocessor.cpp +++ b/clang/lib/Lex/Preprocessor.cpp @@ -119,12 +119,8 @@ Preprocessor::Preprocessor(std::shared_ptr<PreprocessorOptions> PPOpts, // a macro. They get unpoisoned where it is allowed. (Ident__VA_ARGS__ = getIdentifierInfo("__VA_ARGS__"))->setIsPoisoned(); SetPoisonReason(Ident__VA_ARGS__,diag::ext_pp_bad_vaargs_use); - if (getLangOpts().CPlusPlus20) { - (Ident__VA_OPT__ = getIdentifierInfo("__VA_OPT__"))->setIsPoisoned(); - SetPoisonReason(Ident__VA_OPT__,diag::ext_pp_bad_vaopt_use); - } else { - Ident__VA_OPT__ = nullptr; - } + (Ident__VA_OPT__ = getIdentifierInfo("__VA_OPT__"))->setIsPoisoned(); + SetPoisonReason(Ident__VA_OPT__,diag::ext_pp_bad_vaopt_use); // Initialize the pragma handlers. RegisterBuiltinPragmas(); diff --git a/clang/lib/Lex/TokenLexer.cpp b/clang/lib/Lex/TokenLexer.cpp index 97cb2cf0bb8c..da5681aaf478 100644 --- a/clang/lib/Lex/TokenLexer.cpp +++ b/clang/lib/Lex/TokenLexer.cpp @@ -148,12 +148,12 @@ bool TokenLexer::MaybeRemoveCommaBeforeVaArgs( return false; // GCC removes the comma in the expansion of " ... , ## __VA_ARGS__ " if - // __VA_ARGS__ is empty, but not in strict mode where there are no - // named arguments, where it remains. With GNU extensions, it is removed - // regardless of named arguments. + // __VA_ARGS__ is empty, but not in strict C99 mode where there are no + // named arguments, where it remains. In all other modes, including C99 + // with GNU extensions, it is removed regardless of named arguments. // Microsoft also appears to support this extension, unofficially. - if (!PP.getLangOpts().GNUMode && !PP.getLangOpts().MSVCCompat && - Macro->getNumParams() < 2) + if (PP.getLangOpts().C99 && !PP.getLangOpts().GNUMode + && Macro->getNumParams() < 2) return false; // Is a comma available to be removed? diff --git a/clang/lib/Parse/ParseDecl.cpp b/clang/lib/Parse/ParseDecl.cpp index 571164139630..347d992b1643 100644 --- a/clang/lib/Parse/ParseDecl.cpp +++ b/clang/lib/Parse/ParseDecl.cpp @@ -4216,7 +4216,7 @@ void Parser::ParseStructUnionBody(SourceLocation RecordLoc, } // Parse _Static_assert declaration. - if (Tok.is(tok::kw__Static_assert)) { + if (Tok.isOneOf(tok::kw__Static_assert, tok::kw_static_assert)) { SourceLocation DeclEnd; ParseStaticAssertDeclaration(DeclEnd); continue; @@ -5180,6 +5180,7 @@ bool Parser::isDeclarationSpecifier(bool DisambiguatingWithExpression) { case tok::kw_friend: // static_assert-declaration + case tok::kw_static_assert: case tok::kw__Static_assert: // GNU typeof support. diff --git a/clang/lib/Sema/Sema.cpp b/clang/lib/Sema/Sema.cpp index 55cb3aee6194..450f9c020f7f 100644 --- a/clang/lib/Sema/Sema.cpp +++ b/clang/lib/Sema/Sema.cpp @@ -14,6 +14,7 @@ #include "UsedDeclVisitor.h" #include "clang/AST/ASTContext.h" #include "clang/AST/ASTDiagnostic.h" +#include "clang/AST/Decl.h" #include "clang/AST/DeclCXX.h" #include "clang/AST/DeclFriend.h" #include "clang/AST/DeclObjC.h" @@ -537,6 +538,13 @@ void Sema::diagnoseZeroToNullptrConversion(CastKind Kind, const Expr* E) { if (E->IgnoreParenImpCasts()->getType()->isNullPtrType()) return; + // Don't diagnose the conversion from a 0 literal to a null pointer argument + // in a synthesized call to operator<=>. + if (!CodeSynthesisContexts.empty() && + CodeSynthesisContexts.back().Kind == + CodeSynthesisContext::RewritingOperatorAsSpaceship) + return; + // If it is a macro from system header, and if the macro name is not "NULL", // do not warn. SourceLocation MaybeMacroLoc = E->getBeginLoc(); @@ -1733,11 +1741,12 @@ Sema::SemaDiagnosticBuilder::~SemaDiagnosticBuilder() { } } -Sema::SemaDiagnosticBuilder Sema::targetDiag(SourceLocation Loc, - unsigned DiagID) { +Sema::SemaDiagnosticBuilder +Sema::targetDiag(SourceLocation Loc, unsigned DiagID, FunctionDecl *FD) { + FD = FD ? FD : getCurFunctionDecl(); if (LangOpts.OpenMP) - return LangOpts.OpenMPIsDevice ? diagIfOpenMPDeviceCode(Loc, DiagID) - : diagIfOpenMPHostCode(Loc, DiagID); + return LangOpts.OpenMPIsDevice ? diagIfOpenMPDeviceCode(Loc, DiagID, FD) + : diagIfOpenMPHostCode(Loc, DiagID, FD); if (getLangOpts().CUDA) return getLangOpts().CUDAIsDevice ? CUDADiagIfDeviceCode(Loc, DiagID) : CUDADiagIfHostCode(Loc, DiagID); @@ -1746,7 +1755,7 @@ Sema::SemaDiagnosticBuilder Sema::targetDiag(SourceLocation Loc, return SYCLDiagIfDeviceCode(Loc, DiagID); return SemaDiagnosticBuilder(SemaDiagnosticBuilder::K_Immediate, Loc, DiagID, - getCurFunctionDecl(), *this); + FD, *this); } Sema::SemaDiagnosticBuilder Sema::Diag(SourceLocation Loc, unsigned DiagID, @@ -1765,15 +1774,14 @@ Sema::SemaDiagnosticBuilder Sema::Diag(SourceLocation Loc, unsigned DiagID, DiagID, getCurFunctionDecl(), *this); } - SemaDiagnosticBuilder DB = - getLangOpts().CUDAIsDevice - ? CUDADiagIfDeviceCode(Loc, DiagID) - : CUDADiagIfHostCode(Loc, DiagID); + SemaDiagnosticBuilder DB = getLangOpts().CUDAIsDevice + ? CUDADiagIfDeviceCode(Loc, DiagID) + : CUDADiagIfHostCode(Loc, DiagID); SetIsLastErrorImmediate(DB.isImmediate()); return DB; } -void Sema::checkDeviceDecl(const ValueDecl *D, SourceLocation Loc) { +void Sema::checkDeviceDecl(ValueDecl *D, SourceLocation Loc) { if (isUnevaluatedContext()) return; @@ -1791,13 +1799,17 @@ void Sema::checkDeviceDecl(const ValueDecl *D, SourceLocation Loc) { return; } + // Try to associate errors with the lexical context, if that is a function, or + // the value declaration otherwise. + FunctionDecl *FD = + isa<FunctionDecl>(C) ? cast<FunctionDecl>(C) : dyn_cast<FunctionDecl>(D); auto CheckType = [&](QualType Ty) { if (Ty->isDependentType()) return; if (Ty->isExtIntType()) { if (!Context.getTargetInfo().hasExtIntType()) { - targetDiag(Loc, diag::err_device_unsupported_type) + targetDiag(Loc, diag::err_device_unsupported_type, FD) << D << false /*show bit size*/ << 0 /*bitsize*/ << Ty << Context.getTargetInfo().getTriple().str(); } @@ -1810,11 +1822,12 @@ void Sema::checkDeviceDecl(const ValueDecl *D, SourceLocation Loc) { !Context.getTargetInfo().hasFloat128Type()) || (Ty->isIntegerType() && Context.getTypeSize(Ty) == 128 && !Context.getTargetInfo().hasInt128Type())) { - targetDiag(Loc, diag::err_device_unsupported_type) + if (targetDiag(Loc, diag::err_device_unsupported_type, FD) << D << true /*show bit size*/ << static_cast<unsigned>(Context.getTypeSize(Ty)) << Ty - << Context.getTargetInfo().getTriple().str(); - targetDiag(D->getLocation(), diag::note_defined_here) << D; + << Context.getTargetInfo().getTriple().str()) + D->setInvalidDecl(); + targetDiag(D->getLocation(), diag::note_defined_here, FD) << D; } }; @@ -1826,6 +1839,8 @@ void Sema::checkDeviceDecl(const ValueDecl *D, SourceLocation Loc) { CheckType(ParamTy); CheckType(FPTy->getReturnType()); } + if (const auto *FNPTy = dyn_cast<FunctionNoProtoType>(Ty)) + CheckType(FNPTy->getReturnType()); } /// Looks through the macro-expansion chain for the given diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp index 3ee0c43097d7..1f7ab49ccdd7 100644 --- a/clang/lib/Sema/SemaDecl.cpp +++ b/clang/lib/Sema/SemaDecl.cpp @@ -9420,6 +9420,9 @@ Sema::ActOnFunctionDeclarator(Scope *S, Declarator &D, DeclContext *DC, } } + if (LangOpts.SYCLIsDevice || (LangOpts.OpenMP && LangOpts.OpenMPIsDevice)) + checkDeviceDecl(NewFD, D.getBeginLoc()); + if (!getLangOpts().CPlusPlus) { // Perform semantic checking on the function declaration. if (!NewFD->isInvalidDecl() && NewFD->isMain()) @@ -18329,42 +18332,51 @@ Sema::FunctionEmissionStatus Sema::getEmissionStatus(FunctionDecl *FD, if (FD->isDependentContext()) return FunctionEmissionStatus::TemplateDiscarded; - FunctionEmissionStatus OMPES = FunctionEmissionStatus::Unknown; + // Check whether this function is an externally visible definition. + auto IsEmittedForExternalSymbol = [this, FD]() { + // We have to check the GVA linkage of the function's *definition* -- if we + // only have a declaration, we don't know whether or not the function will + // be emitted, because (say) the definition could include "inline". + FunctionDecl *Def = FD->getDefinition(); + + return Def && !isDiscardableGVALinkage( + getASTContext().GetGVALinkageForFunction(Def)); + }; + if (LangOpts.OpenMPIsDevice) { + // In OpenMP device mode we will not emit host only functions, or functions + // we don't need due to their linkage. Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = OMPDeclareTargetDeclAttr::getDeviceType(FD->getCanonicalDecl()); - if (DevTy.hasValue()) { + // DevTy may be changed later by + // #pragma omp declare target to(*) device_type(*). + // Therefore DevTyhaving no value does not imply host. The emission status + // will be checked again at the end of compilation unit with Final = true. + if (DevTy.hasValue()) if (*DevTy == OMPDeclareTargetDeclAttr::DT_Host) - OMPES = FunctionEmissionStatus::OMPDiscarded; - else if (*DevTy == OMPDeclareTargetDeclAttr::DT_NoHost || - *DevTy == OMPDeclareTargetDeclAttr::DT_Any) { - OMPES = FunctionEmissionStatus::Emitted; - } - } - } else if (LangOpts.OpenMP) { - // In OpenMP 4.5 all the functions are host functions. - if (LangOpts.OpenMP <= 45) { - OMPES = FunctionEmissionStatus::Emitted; - } else { - Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = - OMPDeclareTargetDeclAttr::getDeviceType(FD->getCanonicalDecl()); - // In OpenMP 5.0 or above, DevTy may be changed later by - // #pragma omp declare target to(*) device_type(*). Therefore DevTy - // having no value does not imply host. The emission status will be - // checked again at the end of compilation unit. - if (DevTy.hasValue()) { - if (*DevTy == OMPDeclareTargetDeclAttr::DT_NoHost) { - OMPES = FunctionEmissionStatus::OMPDiscarded; - } else if (*DevTy == OMPDeclareTargetDeclAttr::DT_Host || - *DevTy == OMPDeclareTargetDeclAttr::DT_Any) - OMPES = FunctionEmissionStatus::Emitted; - } else if (Final) - OMPES = FunctionEmissionStatus::Emitted; - } - } - if (OMPES == FunctionEmissionStatus::OMPDiscarded || - (OMPES == FunctionEmissionStatus::Emitted && !LangOpts.CUDA)) - return OMPES; + return FunctionEmissionStatus::OMPDiscarded; + // If we have an explicit value for the device type, or we are in a target + // declare context, we need to emit all extern and used symbols. + if (isInOpenMPDeclareTargetContext() || DevTy.hasValue()) + if (IsEmittedForExternalSymbol()) + return FunctionEmissionStatus::Emitted; + // Device mode only emits what it must, if it wasn't tagged yet and needed, + // we'll omit it. + if (Final) + return FunctionEmissionStatus::OMPDiscarded; + } else if (LangOpts.OpenMP > 45) { + // In OpenMP host compilation prior to 5.0 everything was an emitted host + // function. In 5.0, no_host was introduced which might cause a function to + // be ommitted. + Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = + OMPDeclareTargetDeclAttr::getDeviceType(FD->getCanonicalDecl()); + if (DevTy.hasValue()) + if (*DevTy == OMPDeclareTargetDeclAttr::DT_NoHost) + return FunctionEmissionStatus::OMPDiscarded; + } + + if (Final && LangOpts.OpenMP && !LangOpts.CUDA) + return FunctionEmissionStatus::Emitted; if (LangOpts.CUDA) { // When compiling for device, host functions are never emitted. Similarly, @@ -18378,17 +18390,7 @@ Sema::FunctionEmissionStatus Sema::getEmissionStatus(FunctionDecl *FD, (T == Sema::CFT_Device || T == Sema::CFT_Global)) return FunctionEmissionStatus::CUDADiscarded; - // Check whether this function is externally visible -- if so, it's - // known-emitted. - // - // We have to check the GVA linkage of the function's *definition* -- if we - // only have a declaration, we don't know whether or not the function will - // be emitted, because (say) the definition could include "inline". - FunctionDecl *Def = FD->getDefinition(); - - if (Def && - !isDiscardableGVALinkage(getASTContext().GetGVALinkageForFunction(Def)) - && (!LangOpts.OpenMP || OMPES == FunctionEmissionStatus::Emitted)) + if (IsEmittedForExternalSymbol()) return FunctionEmissionStatus::Emitted; } diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp index 45616dadcbee..ae8508d6c601 100644 --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -373,7 +373,7 @@ bool Sema::DiagnoseUseOfDecl(NamedDecl *D, ArrayRef<SourceLocation> Locs, } if (LangOpts.SYCLIsDevice || (LangOpts.OpenMP && LangOpts.OpenMPIsDevice)) { - if (const auto *VD = dyn_cast<ValueDecl>(D)) + if (auto *VD = dyn_cast<ValueDecl>(D)) checkDeviceDecl(VD, Loc); if (!Context.getTargetInfo().isTLSSupported()) diff --git a/clang/lib/Sema/SemaLambda.cpp b/clang/lib/Sema/SemaLambda.cpp index af61c82c2002..c1c6a4bf5c68 100644 --- a/clang/lib/Sema/SemaLambda.cpp +++ b/clang/lib/Sema/SemaLambda.cpp @@ -432,15 +432,16 @@ CXXMethodDecl *Sema::startLambdaDefinition(CXXRecordDecl *Class, void Sema::handleLambdaNumbering( CXXRecordDecl *Class, CXXMethodDecl *Method, - Optional<std::tuple<unsigned, bool, Decl *>> Mangling) { + Optional<std::tuple<bool, unsigned, unsigned, Decl *>> Mangling) { if (Mangling) { - unsigned ManglingNumber; bool HasKnownInternalLinkage; + unsigned ManglingNumber, DeviceManglingNumber; Decl *ManglingContextDecl; - std::tie(ManglingNumber, HasKnownInternalLinkage, ManglingContextDecl) = - Mangling.getValue(); + std::tie(HasKnownInternalLinkage, ManglingNumber, DeviceManglingNumber, + ManglingContextDecl) = Mangling.getValue(); Class->setLambdaMangling(ManglingNumber, ManglingContextDecl, HasKnownInternalLinkage); + Class->setDeviceLambdaManglingNumber(DeviceManglingNumber); return; } @@ -476,6 +477,7 @@ void Sema::handleLambdaNumbering( unsigned ManglingNumber = MCtx->getManglingNumber(Method); Class->setLambdaMangling(ManglingNumber, ManglingContextDecl, HasKnownInternalLinkage); + Class->setDeviceLambdaManglingNumber(MCtx->getDeviceManglingNumber(Method)); } } diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp index 78707484f588..4063c185388d 100644 --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -1884,8 +1884,7 @@ void Sema::popOpenMPFunctionRegion(const FunctionScopeInfo *OldFSI) { static bool isOpenMPDeviceDelayedContext(Sema &S) { assert(S.LangOpts.OpenMP && S.LangOpts.OpenMPIsDevice && "Expected OpenMP device compilation."); - return !S.isInOpenMPTargetExecutionDirective() && - !S.isInOpenMPDeclareTargetContext(); + return !S.isInOpenMPTargetExecutionDirective(); } namespace { @@ -1898,11 +1897,11 @@ enum class FunctionEmissionStatus { } // anonymous namespace Sema::SemaDiagnosticBuilder Sema::diagIfOpenMPDeviceCode(SourceLocation Loc, - unsigned DiagID) { + unsigned DiagID, + FunctionDecl *FD) { assert(LangOpts.OpenMP && LangOpts.OpenMPIsDevice && "Expected OpenMP device compilation."); - FunctionDecl *FD = getCurFunctionDecl(); SemaDiagnosticBuilder::Kind Kind = SemaDiagnosticBuilder::K_Nop; if (FD) { FunctionEmissionStatus FES = getEmissionStatus(FD); @@ -1911,6 +1910,13 @@ Sema::SemaDiagnosticBuilder Sema::diagIfOpenMPDeviceCode(SourceLocation Loc, Kind = SemaDiagnosticBuilder::K_Immediate; break; case FunctionEmissionStatus::Unknown: + // TODO: We should always delay diagnostics here in case a target + // region is in a function we do not emit. However, as the + // current diagnostics are associated with the function containing + // the target region and we do not emit that one, we would miss out + // on diagnostics for the target region itself. We need to anchor + // the diagnostics with the new generated function *or* ensure we + // emit diagnostics associated with the surrounding function. Kind = isOpenMPDeviceDelayedContext(*this) ? SemaDiagnosticBuilder::K_Deferred : SemaDiagnosticBuilder::K_Immediate; @@ -1925,14 +1931,15 @@ Sema::SemaDiagnosticBuilder Sema::diagIfOpenMPDeviceCode(SourceLocation Loc, } } - return SemaDiagnosticBuilder(Kind, Loc, DiagID, getCurFunctionDecl(), *this); + return SemaDiagnosticBuilder(Kind, Loc, DiagID, FD, *this); } Sema::SemaDiagnosticBuilder Sema::diagIfOpenMPHostCode(SourceLocation Loc, - unsigned DiagID) { + unsigned DiagID, + FunctionDecl *FD) { assert(LangOpts.OpenMP && !LangOpts.OpenMPIsDevice && "Expected OpenMP host compilation."); - FunctionEmissionStatus FES = getEmissionStatus(getCurFunctionDecl()); + FunctionEmissionStatus FES = getEmissionStatus(FD); SemaDiagnosticBuilder::Kind Kind = SemaDiagnosticBuilder::K_Nop; switch (FES) { case FunctionEmissionStatus::Emitted: @@ -1948,7 +1955,7 @@ Sema::SemaDiagnosticBuilder Sema::diagIfOpenMPHostCode(SourceLocation Loc, break; } - return SemaDiagnosticBuilder(Kind, Loc, DiagID, getCurFunctionDecl(), *this); + return SemaDiagnosticBuilder(Kind, Loc, DiagID, FD, *this); } static OpenMPDefaultmapClauseKind diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h index 0a596e50658b..3c68f9458e58 100644 --- a/clang/lib/Sema/TreeTransform.h +++ b/clang/lib/Sema/TreeTransform.h @@ -12504,10 +12504,11 @@ TreeTransform<Derived>::TransformLambdaExpr(LambdaExpr *E) { E->getCaptureDefault()); getDerived().transformedLocalDecl(OldClass, {Class}); - Optional<std::tuple<unsigned, bool, Decl *>> Mangling; + Optional<std::tuple<bool, unsigned, unsigned, Decl *>> Mangling; if (getDerived().ReplacingOriginal()) - Mangling = std::make_tuple(OldClass->getLambdaManglingNumber(), - OldClass->hasKnownLambdaInternalLinkage(), + Mangling = std::make_tuple(OldClass->hasKnownLambdaInternalLinkage(), + OldClass->getLambdaManglingNumber(), + OldClass->getDeviceLambdaManglingNumber(), OldClass->getLambdaContextDecl()); // Build the call operator. diff --git a/clang/lib/Serialization/ASTReaderDecl.cpp b/clang/lib/Serialization/ASTReaderDecl.cpp index 6bfb9bd783b5..18ab4666a7d8 100644 --- a/clang/lib/Serialization/ASTReaderDecl.cpp +++ b/clang/lib/Serialization/ASTReaderDecl.cpp @@ -1748,6 +1748,7 @@ void ASTDeclReader::ReadCXXDefinitionData( Lambda.NumExplicitCaptures = Record.readInt(); Lambda.HasKnownInternalLinkage = Record.readInt(); Lambda.ManglingNumber = Record.readInt(); + D->setDeviceLambdaManglingNumber(Record.readInt()); Lambda.ContextDecl = readDeclID(); Lambda.Captures = (Capture *)Reader.getContext().Allocate( sizeof(Capture) * Lambda.NumCaptures); diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp index 6bfa7b0e7d6d..40900af6f9e0 100644 --- a/clang/lib/Serialization/ASTWriter.cpp +++ b/clang/lib/Serialization/ASTWriter.cpp @@ -5667,6 +5667,7 @@ void ASTRecordWriter::AddCXXDefinitionData(const CXXRecordDecl *D) { Record->push_back(Lambda.NumExplicitCaptures); Record->push_back(Lambda.HasKnownInternalLinkage); Record->push_back(Lambda.ManglingNumber); + Record->push_back(D->getDeviceLambdaManglingNumber()); AddDeclRef(D->getLambdaContextDecl()); AddTypeSourceInfo(Lambda.MethodTyInfo); for (unsigned I = 0, N = Lambda.NumCaptures; I != N; ++I) { diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h b/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h index 7f7b38d4215b..068fc9829e57 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h @@ -226,7 +226,7 @@ (SI_FREEBSD || SI_NETBSD || SI_MAC || SI_LINUX_NOT_ANDROID || SI_SOLARIS) #define SANITIZER_INTERCEPT_GETPWENT \ (SI_FREEBSD || SI_NETBSD || SI_MAC || SI_LINUX_NOT_ANDROID || SI_SOLARIS) -#define SANITIZER_INTERCEPT_FGETGRENT_R (SI_FREEBSD || SI_GLIBC || SI_SOLARIS) +#define SANITIZER_INTERCEPT_FGETGRENT_R (SI_GLIBC || SI_SOLARIS) #define SANITIZER_INTERCEPT_FGETPWENT SI_LINUX_NOT_ANDROID || SI_SOLARIS #define SANITIZER_INTERCEPT_GETPWENT_R \ (SI_FREEBSD || SI_NETBSD || SI_GLIBC || SI_SOLARIS) diff --git a/libcxx/include/__locale b/libcxx/include/__locale index a2da7d78049f..77e5faab2676 100644 --- a/libcxx/include/__locale +++ b/libcxx/include/__locale @@ -21,30 +21,30 @@ #include <locale.h> #if defined(_LIBCPP_MSVCRT_LIKE) # include <cstring> -# include <support/win32/locale_win32.h> +# include <__support/win32/locale_win32.h> #elif defined(__NuttX__) -# include <support/nuttx/xlocale.h> +# include <__support/nuttx/xlocale.h> #elif defined(_AIX) || defined(__MVS__) -# include <support/ibm/xlocale.h> +# include <__support/ibm/xlocale.h> #elif defined(__ANDROID__) -# include <support/android/locale_bionic.h> +# include <__support/android/locale_bionic.h> #elif defined(__sun__) # include <xlocale.h> -# include <support/solaris/xlocale.h> +# include <__support/solaris/xlocale.h> #elif defined(_NEWLIB_VERSION) -# include <support/newlib/xlocale.h> +# include <__support/newlib/xlocale.h> #elif defined(__OpenBSD__) -# include <support/openbsd/xlocale.h> +# include <__support/openbsd/xlocale.h> #elif (defined(__APPLE__) || defined(__FreeBSD__) \ || defined(__EMSCRIPTEN__) || defined(__IBMCPP__)) # include <xlocale.h> #elif defined(__Fuchsia__) -# include <support/fuchsia/xlocale.h> +# include <__support/fuchsia/xlocale.h> #elif defined(__wasi__) // WASI libc uses musl's locales support. -# include <support/musl/xlocale.h> +# include <__support/musl/xlocale.h> #elif defined(_LIBCPP_HAS_MUSL_LIBC) -# include <support/musl/xlocale.h> +# include <__support/musl/xlocale.h> #endif #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) diff --git a/libcxx/include/__threading_support b/libcxx/include/__threading_support index 473c9c3bbe49..de572f3ff84d 100644 --- a/libcxx/include/__threading_support +++ b/libcxx/include/__threading_support @@ -17,7 +17,7 @@ #include <errno.h> #ifdef __MVS__ -# include <support/ibm/nanosleep.h> +# include <__support/ibm/nanosleep.h> #endif #ifndef _LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER diff --git a/libcxx/include/bit b/libcxx/include/bit index fe360179c5ca..f8c37c3d6bbf 100644 --- a/libcxx/include/bit +++ b/libcxx/include/bit @@ -62,7 +62,7 @@ namespace std { #include <__debug> #if defined(__IBMCPP__) -#include "support/ibm/support.h" +#include "__support/ibm/support.h" #endif #if defined(_LIBCPP_COMPILER_MSVC) #include <intrin.h> diff --git a/libcxx/include/limits b/libcxx/include/limits index 6d5d1e1aca75..8f97cd10a8b1 100644 --- a/libcxx/include/limits +++ b/libcxx/include/limits @@ -105,11 +105,11 @@ template<> class numeric_limits<cv long double>; #include <type_traits> #if defined(_LIBCPP_COMPILER_MSVC) -#include "support/win32/limits_msvc_win32.h" +#include "__support/win32/limits_msvc_win32.h" #endif // _LIBCPP_MSVCRT #if defined(__IBMCPP__) -#include "support/ibm/limits.h" +#include "__support/ibm/limits.h" #endif // __IBMCPP__ #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) diff --git a/libcxx/include/memory b/libcxx/include/memory index a00916c8c03f..39d0f5bee6a5 100644 --- a/libcxx/include/memory +++ b/libcxx/include/memory @@ -2647,7 +2647,7 @@ private: _Alloc *__alloc = reinterpret_cast<_Alloc*>(__first); return __alloc; } - _Tp* __get_elem() _NOEXCEPT { + _LIBCPP_NO_CFI _Tp* __get_elem() _NOEXCEPT { _CompressedPair *__as_pair = reinterpret_cast<_CompressedPair*>(__blob_); typename _CompressedPair::_Base2* __second = _CompressedPair::__get_second_base(__as_pair); _Tp *__elem = reinterpret_cast<_Tp*>(__second); diff --git a/libcxx/src/atomic.cpp b/libcxx/src/atomic.cpp index 6b73ed771cd1..9ae1fb5199bf 100644 --- a/libcxx/src/atomic.cpp +++ b/libcxx/src/atomic.cpp @@ -19,6 +19,12 @@ #include <linux/futex.h> #include <sys/syscall.h> +// libc++ uses SYS_futex as a universal syscall name. However, on 32 bit architectures +// with a 64 bit time_t, we need to specify SYS_futex_time64. +#if !defined(SYS_futex) && defined(SYS_futex_time64) +# define SYS_futex SYS_futex_time64 +#endif + #else // <- Add other operating systems here // Baseline needs no new headers diff --git a/libcxx/src/locale.cpp b/libcxx/src/locale.cpp index f109389f68f3..a0209d0ce8cf 100644 --- a/libcxx/src/locale.cpp +++ b/libcxx/src/locale.cpp @@ -29,7 +29,7 @@ #include "cwctype" #include "__sso_allocator" #if defined(_LIBCPP_MSVCRT) || defined(__MINGW32__) -#include "support/win32/locale_win32.h" +#include "__support/win32/locale_win32.h" #elif !defined(__BIONIC__) && !defined(__NuttX__) #include <langinfo.h> #endif diff --git a/lld/ELF/InputSection.cpp b/lld/ELF/InputSection.cpp index f40bb258b9af..6f16fc7abc48 100644 --- a/lld/ELF/InputSection.cpp +++ b/lld/ELF/InputSection.cpp @@ -901,7 +901,10 @@ void InputSection::relocateNonAlloc(uint8_t *buf, ArrayRef<RelTy> rels) { continue; } - if (expr != R_ABS && expr != R_DTPREL && expr != R_RISCV_ADD) { + // R_ABS/R_DTPREL and some other relocations can be used from non-SHF_ALLOC + // sections. + if (expr != R_ABS && expr != R_DTPREL && expr != R_GOTPLTREL && + expr != R_RISCV_ADD) { std::string msg = getLocation<ELFT>(offset) + ": has non-ABS relocation " + toString(type) + " against symbol '" + toString(sym) + "'"; diff --git a/lld/docs/ReleaseNotes.rst b/lld/docs/ReleaseNotes.rst index e0b17ca3e030..ea1403888eba 100644 --- a/lld/docs/ReleaseNotes.rst +++ b/lld/docs/ReleaseNotes.rst @@ -35,12 +35,28 @@ Breaking changes COFF Improvements ----------------- -* ... +* Error out clearly if creating a DLL with too many exported symbols. + (`D86701 <https://reviews.llvm.org/D86701>`_) MinGW Improvements ------------------ -* ... +* Enabled dynamicbase by default. (`D86654 <https://reviews.llvm.org/D86654>`_) + +* Tolerate mismatches between COMDAT section sizes with different amount of + padding (produced by binutils) by inspecting the aux section definition. + (`D86659 <https://reviews.llvm.org/D86659>`_) + +* Support setting the subsystem version via the subsystem argument. + (`D88804 <https://reviews.llvm.org/D88804>`_) + +* Implemented the GNU -wrap option. + (`D89004 <https://reviews.llvm.org/D89004>`_, + `D91689 <https://reviews.llvm.org/D91689>`_) + +* Handle the ``--demangle`` and ``--no-demangle`` options. + (`D93950 <https://reviews.llvm.org/D93950>`_) + MachO Improvements ------------------ diff --git a/llvm/include/llvm-c/Core.h b/llvm/include/llvm-c/Core.h index 8274213aa839..a78df16ca404 100644 --- a/llvm/include/llvm-c/Core.h +++ b/llvm/include/llvm-c/Core.h @@ -160,10 +160,10 @@ typedef enum { LLVMVectorTypeKind, /**< Fixed width SIMD vector type */ LLVMMetadataTypeKind, /**< Metadata */ LLVMX86_MMXTypeKind, /**< X86 MMX */ - LLVMX86_AMXTypeKind, /**< X86 AMX */ LLVMTokenTypeKind, /**< Tokens */ LLVMScalableVectorTypeKind, /**< Scalable SIMD vector type */ - LLVMBFloatTypeKind /**< 16 bit brain floating point type */ + LLVMBFloatTypeKind, /**< 16 bit brain floating point type */ + LLVMX86_AMXTypeKind /**< X86 AMX */ } LLVMTypeKind; typedef enum { @@ -270,7 +270,6 @@ typedef enum { LLVMConstantVectorValueKind, LLVMUndefValueValueKind, - LLVMPoisonValueValueKind, LLVMConstantAggregateZeroValueKind, LLVMConstantDataArrayValueKind, LLVMConstantDataVectorValueKind, @@ -283,6 +282,7 @@ typedef enum { LLVMInlineAsmValueKind, LLVMInstructionValueKind, + LLVMPoisonValueValueKind } LLVMValueKind; typedef enum { diff --git a/llvm/include/llvm/Analysis/AssumptionCache.h b/llvm/include/llvm/Analysis/AssumptionCache.h index 0ef63dc68e1c..c4602d3449c0 100644 --- a/llvm/include/llvm/Analysis/AssumptionCache.h +++ b/llvm/include/llvm/Analysis/AssumptionCache.h @@ -45,7 +45,7 @@ public: enum : unsigned { ExprResultIdx = std::numeric_limits<unsigned>::max() }; struct ResultElem { - WeakTrackingVH Assume; + WeakVH Assume; /// contains either ExprResultIdx or the index of the operand bundle /// containing the knowledge. diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h index c3221aac8eea..40115fbd2f15 100644 --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -2785,6 +2785,10 @@ public: return false; } + /// Does this target require the clearing of high-order bits in a register + /// passed to the fp16 to fp conversion library function. + virtual bool shouldKeepZExtForFP16Conv() const { return false; } + //===--------------------------------------------------------------------===// // Runtime Library hooks // diff --git a/llvm/include/llvm/Demangle/ItaniumDemangle.h b/llvm/include/llvm/Demangle/ItaniumDemangle.h index 6bfc02d15379..e5fca98f9271 100644 --- a/llvm/include/llvm/Demangle/ItaniumDemangle.h +++ b/llvm/include/llvm/Demangle/ItaniumDemangle.h @@ -96,7 +96,6 @@ X(InitListExpr) \ X(FoldExpr) \ X(ThrowExpr) \ - X(UUIDOfExpr) \ X(BoolExpr) \ X(StringLiteral) \ X(LambdaExpr) \ @@ -2035,21 +2034,6 @@ public: } }; -// MSVC __uuidof extension, generated by clang in -fms-extensions mode. -class UUIDOfExpr : public Node { - Node *Operand; -public: - UUIDOfExpr(Node *Operand_) : Node(KUUIDOfExpr), Operand(Operand_) {} - - template<typename Fn> void match(Fn F) const { F(Operand); } - - void printLeft(OutputStream &S) const override { - S << "__uuidof("; - Operand->print(S); - S << ")"; - } -}; - class BoolExpr : public Node { bool Value; @@ -5013,6 +4997,43 @@ Node *AbstractManglingParser<Derived, Alloc>::parseExpr() { } } return nullptr; + case 'u': { + ++First; + Node *Name = getDerived().parseSourceName(/*NameState=*/nullptr); + if (!Name) + return nullptr; + // Special case legacy __uuidof mangling. The 't' and 'z' appear where the + // standard encoding expects a <template-arg>, and would be otherwise be + // interpreted as <type> node 'short' or 'ellipsis'. However, neither + // __uuidof(short) nor __uuidof(...) can actually appear, so there is no + // actual conflict here. + if (Name->getBaseName() == "__uuidof") { + if (numLeft() < 2) + return nullptr; + if (*First == 't') { + ++First; + Node *Ty = getDerived().parseType(); + if (!Ty) + return nullptr; + return make<CallExpr>(Name, makeNodeArray(&Ty, &Ty + 1)); + } + if (*First == 'z') { + ++First; + Node *Ex = getDerived().parseExpr(); + if (!Ex) + return nullptr; + return make<CallExpr>(Name, makeNodeArray(&Ex, &Ex + 1)); + } + } + size_t ExprsBegin = Names.size(); + while (!consumeIf('E')) { + Node *E = getDerived().parseTemplateArg(); + if (E == nullptr) + return E; + Names.push_back(E); + } + return make<CallExpr>(Name, popTrailingNodeArray(ExprsBegin)); + } case '1': case '2': case '3': @@ -5024,21 +5045,6 @@ Node *AbstractManglingParser<Derived, Alloc>::parseExpr() { case '9': return getDerived().parseUnresolvedName(); } - - if (consumeIf("u8__uuidoft")) { - Node *Ty = getDerived().parseType(); - if (!Ty) - return nullptr; - return make<UUIDOfExpr>(Ty); - } - - if (consumeIf("u8__uuidofz")) { - Node *Ex = getDerived().parseExpr(); - if (!Ex) - return nullptr; - return make<UUIDOfExpr>(Ex); - } - return nullptr; } diff --git a/llvm/include/llvm/IR/IntrinsicInst.h b/llvm/include/llvm/IR/IntrinsicInst.h index 9d68f3fdde6c..df3a1d568756 100644 --- a/llvm/include/llvm/IR/IntrinsicInst.h +++ b/llvm/include/llvm/IR/IntrinsicInst.h @@ -981,12 +981,16 @@ public: return cast<ConstantInt>(const_cast<Value *>(getArgOperand(0))); } + ConstantInt *getIndex() const { + return cast<ConstantInt>(const_cast<Value *>(getArgOperand(1))); + } + ConstantInt *getAttributes() const { return cast<ConstantInt>(const_cast<Value *>(getArgOperand(2))); } - ConstantInt *getIndex() const { - return cast<ConstantInt>(const_cast<Value *>(getArgOperand(1))); + ConstantInt *getFactor() const { + return cast<ConstantInt>(const_cast<Value *>(getArgOperand(3))); } }; diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td index b2bfc6e6f9e6..21307ed1bd91 100644 --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -1298,7 +1298,7 @@ def int_sideeffect : DefaultAttrsIntrinsic<[], [], [IntrInaccessibleMemOnly, Int // Like the sideeffect intrinsic defined above, this intrinsic is treated by the // optimizer as having opaque side effects so that it won't be get rid of or moved // out of the block it probes. -def int_pseudoprobe : Intrinsic<[], [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty], +def int_pseudoprobe : Intrinsic<[], [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_i64_ty], [IntrInaccessibleMemOnly, IntrWillReturn]>; // Intrinsics to support half precision floating point format diff --git a/llvm/include/llvm/IR/IntrinsicsRISCV.td b/llvm/include/llvm/IR/IntrinsicsRISCV.td index ab5b09b72ac3..c4056895f68e 100644 --- a/llvm/include/llvm/IR/IntrinsicsRISCV.td +++ b/llvm/include/llvm/IR/IntrinsicsRISCV.td @@ -790,6 +790,9 @@ let TargetPrefix = "riscv" in { defm vsoxei : RISCVIStore; defm vsuxei : RISCVIStore; + def int_riscv_vle1 : RISCVUSLoad; + def int_riscv_vse1 : RISCVUSStore; + defm vamoswap : RISCVAMO; defm vamoadd : RISCVAMO; defm vamoxor : RISCVAMO; @@ -940,8 +943,8 @@ let TargetPrefix = "riscv" in { defm vfwnmsac : RISCVTernaryWide; defm vfsqrt : RISCVUnaryAA; - defm vfrsqrte7 : RISCVUnaryAA; - defm vfrece7 : RISCVUnaryAA; + defm vfrsqrt7 : RISCVUnaryAA; + defm vfrec7 : RISCVUnaryAA; defm vfmin : RISCVBinaryAAX; defm vfmax : RISCVBinaryAAX; diff --git a/llvm/include/llvm/IR/PseudoProbe.h b/llvm/include/llvm/IR/PseudoProbe.h index e0370c264102..5165e80caa2d 100644 --- a/llvm/include/llvm/IR/PseudoProbe.h +++ b/llvm/include/llvm/IR/PseudoProbe.h @@ -16,28 +16,39 @@ #include "llvm/ADT/Optional.h" #include <cassert> #include <cstdint> +#include <limits> namespace llvm { class Instruction; +class BasicBlock; constexpr const char *PseudoProbeDescMetadataName = "llvm.pseudo_probe_desc"; enum class PseudoProbeType { Block = 0, IndirectCall, DirectCall }; +// The saturated distrution factor representing 100% for block probes. +constexpr static uint64_t PseudoProbeFullDistributionFactor = + std::numeric_limits<uint64_t>::max(); + struct PseudoProbeDwarfDiscriminator { +public: // The following APIs encodes/decodes per-probe information to/from a // 32-bit integer which is organized as: // [2:0] - 0x7, this is reserved for regular discriminator, // see DWARF discriminator encoding rule // [18:3] - probe id - // [25:19] - reserved + // [25:19] - probe distribution factor // [28:26] - probe type, see PseudoProbeType // [31:29] - reserved for probe attributes - static uint32_t packProbeData(uint32_t Index, uint32_t Type) { + static uint32_t packProbeData(uint32_t Index, uint32_t Type, uint32_t Flags, + uint32_t Factor) { assert(Index <= 0xFFFF && "Probe index too big to encode, exceeding 2^16"); assert(Type <= 0x7 && "Probe type too big to encode, exceeding 7"); - return (Index << 3) | (Type << 26) | 0x7; + assert(Flags <= 0x7); + assert(Factor <= 100 && + "Probe distribution factor too big to encode, exceeding 100"); + return (Index << 3) | (Factor << 19) | (Type << 26) | 0x7; } static uint32_t extractProbeIndex(uint32_t Value) { @@ -51,16 +62,26 @@ struct PseudoProbeDwarfDiscriminator { static uint32_t extractProbeAttributes(uint32_t Value) { return (Value >> 29) & 0x7; } + + static uint32_t extractProbeFactor(uint32_t Value) { + return (Value >> 19) & 0x7F; + } + + // The saturated distrution factor representing 100% for callsites. + constexpr static uint8_t FullDistributionFactor = 100; }; struct PseudoProbe { uint32_t Id; uint32_t Type; uint32_t Attr; + float Factor; }; Optional<PseudoProbe> extractProbe(const Instruction &Inst); +void setProbeDistributionFactor(Instruction &Inst, float Factor); + } // end namespace llvm #endif // LLVM_IR_PSEUDOPROBE_H diff --git a/llvm/include/llvm/Passes/StandardInstrumentations.h b/llvm/include/llvm/Passes/StandardInstrumentations.h index 795a980878e2..61c86b0468f2 100644 --- a/llvm/include/llvm/Passes/StandardInstrumentations.h +++ b/llvm/include/llvm/Passes/StandardInstrumentations.h @@ -22,6 +22,7 @@ #include "llvm/IR/PassTimingInfo.h" #include "llvm/IR/ValueHandle.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Transforms/IPO/SampleProfileProbe.h" #include <string> #include <utility> @@ -273,6 +274,7 @@ class StandardInstrumentations { OptBisectInstrumentation OptBisect; PreservedCFGCheckerInstrumentation PreservedCFGChecker; IRChangedPrinter PrintChangedIR; + PseudoProbeVerifier PseudoProbeVerification; VerifyInstrumentation Verify; bool VerifyEach; diff --git a/llvm/include/llvm/ProfileData/SampleProf.h b/llvm/include/llvm/ProfileData/SampleProf.h index c45ace9e68c1..25d5b2376c11 100644 --- a/llvm/include/llvm/ProfileData/SampleProf.h +++ b/llvm/include/llvm/ProfileData/SampleProf.h @@ -347,6 +347,16 @@ public: return SortedTargets; } + /// Prorate call targets by a distribution factor. + static const CallTargetMap adjustCallTargets(const CallTargetMap &Targets, + float DistributionFactor) { + CallTargetMap AdjustedTargets; + for (const auto &I : Targets) { + AdjustedTargets[I.first()] = I.second * DistributionFactor; + } + return AdjustedTargets; + } + /// Merge the samples in \p Other into this record. /// Optionally scale sample counts by \p Weight. sampleprof_error merge(const SampleRecord &Other, uint64_t Weight = 1) { @@ -439,9 +449,11 @@ public: void clearState(ContextStateMask S) { State &= (uint32_t)~S; } bool hasContext() const { return State != UnknownContext; } bool isBaseContext() const { return CallingContext.empty(); } - StringRef getName() const { return Name; } + StringRef getNameWithoutContext() const { return Name; } StringRef getCallingContext() const { return CallingContext; } - StringRef getNameWithContext() const { return FullContext; } + StringRef getNameWithContext(bool WithBracket = false) const { + return WithBracket ? InputContext : FullContext; + } private: // Give a context string, decode and populate internal states like @@ -449,6 +461,7 @@ private: // `ContextStr`: `[main:3 @ _Z5funcAi:1 @ _Z8funcLeafi]` void setContext(StringRef ContextStr, ContextStateMask CState) { assert(!ContextStr.empty()); + InputContext = ContextStr; // Note that `[]` wrapped input indicates a full context string, otherwise // it's treated as context-less function name only. bool HasContext = ContextStr.startswith("["); @@ -480,6 +493,9 @@ private: } } + // Input context string including bracketed calling context and leaf function + // name + StringRef InputContext; // Full context string including calling context and leaf function name StringRef FullContext; // Function name for the associated sample profile @@ -676,7 +692,8 @@ public: Name = Other.getName(); if (!GUIDToFuncNameMap) GUIDToFuncNameMap = Other.GUIDToFuncNameMap; - + if (Context.getNameWithContext(true).empty()) + Context = Other.getContext(); if (FunctionHash == 0) { // Set the function hash code for the target profile. FunctionHash = Other.getFunctionHash(); @@ -743,8 +760,10 @@ public: StringRef getName() const { return Name; } /// Return function name with context. - StringRef getNameWithContext() const { - return FunctionSamples::ProfileIsCS ? Context.getNameWithContext() : Name; + StringRef getNameWithContext(bool WithBracket = false) const { + return FunctionSamples::ProfileIsCS + ? Context.getNameWithContext(WithBracket) + : Name; } /// Return the original function name. diff --git a/llvm/include/llvm/ProfileData/SampleProfReader.h b/llvm/include/llvm/ProfileData/SampleProfReader.h index 3f52a2f6163b..999e75eddffa 100644 --- a/llvm/include/llvm/ProfileData/SampleProfReader.h +++ b/llvm/include/llvm/ProfileData/SampleProfReader.h @@ -488,8 +488,12 @@ protected: /// \brief Whether samples are collected based on pseudo probes. bool ProfileIsProbeBased = false; + /// Whether function profiles are context-sensitive. bool ProfileIsCS = false; + /// Number of context-sensitive profiles. + uint32_t CSProfileCount = 0; + /// \brief The format of sample. SampleProfileFormat Format = SPF_None; }; diff --git a/llvm/include/llvm/Support/CommandLine.h b/llvm/include/llvm/Support/CommandLine.h index 38f3e188be55..0706aa226c0e 100644 --- a/llvm/include/llvm/Support/CommandLine.h +++ b/llvm/include/llvm/Support/CommandLine.h @@ -369,9 +369,22 @@ public: virtual void setDefault() = 0; + // Prints the help string for an option. + // + // This maintains the Indent for multi-line descriptions. + // FirstLineIndentedBy is the count of chars of the first line + // i.e. the one containing the --<option name>. static void printHelpStr(StringRef HelpStr, size_t Indent, size_t FirstLineIndentedBy); + // Prints the help string for an enum value. + // + // This maintains the Indent for multi-line descriptions. + // FirstLineIndentedBy is the count of chars of the first line + // i.e. the one containing the =<value>. + static void printEnumValHelpStr(StringRef HelpStr, size_t Indent, + size_t FirstLineIndentedBy); + virtual void getExtraOptionNames(SmallVectorImpl<StringRef> &) {} // addOccurrence - Wrapper around handleOccurrence that enforces Flags. diff --git a/llvm/include/llvm/Transforms/IPO/SampleContextTracker.h b/llvm/include/llvm/Transforms/IPO/SampleContextTracker.h index 5b2600144fa3..526e141838c4 100644 --- a/llvm/include/llvm/Transforms/IPO/SampleContextTracker.h +++ b/llvm/include/llvm/Transforms/IPO/SampleContextTracker.h @@ -23,6 +23,7 @@ #include "llvm/ProfileData/SampleProf.h" #include <list> #include <map> +#include <vector> using namespace llvm; using namespace sampleprof; @@ -42,7 +43,7 @@ public: CallSiteLoc(CallLoc){}; ContextTrieNode *getChildContext(const LineLocation &CallSite, StringRef CalleeName); - ContextTrieNode *getChildContext(const LineLocation &CallSite); + ContextTrieNode *getHottestChildContext(const LineLocation &CallSite); ContextTrieNode *getOrCreateChildContext(const LineLocation &CallSite, StringRef CalleeName, bool AllowCreate = true); @@ -94,6 +95,9 @@ public: // call-site. The full context is identified by location of call instruction. FunctionSamples *getCalleeContextSamplesFor(const CallBase &Inst, StringRef CalleeName); + // Get samples for indirect call targets for call site at given location. + std::vector<const FunctionSamples *> + getIndirectCalleeContextSamplesFor(const DILocation *DIL); // Query context profile for a given location. The full context // is identified by input DILocation. FunctionSamples *getContextSamplesFor(const DILocation *DIL); diff --git a/llvm/include/llvm/Transforms/IPO/SampleProfileProbe.h b/llvm/include/llvm/Transforms/IPO/SampleProfileProbe.h index 78117fd4a9c2..cab893b50d19 100644 --- a/llvm/include/llvm/Transforms/IPO/SampleProfileProbe.h +++ b/llvm/include/llvm/Transforms/IPO/SampleProfileProbe.h @@ -16,6 +16,10 @@ #define LLVM_TRANSFORMS_IPO_SAMPLEPROFILEPROBE_H #include "llvm/ADT/DenseMap.h" +#include "llvm/Analysis/CallGraphSCCPass.h" +#include "llvm/Analysis/LazyCallGraph.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/IR/PassInstrumentation.h" #include "llvm/IR/PassManager.h" #include "llvm/IR/PseudoProbe.h" #include "llvm/ProfileData/SampleProf.h" @@ -29,6 +33,8 @@ class Module; using namespace sampleprof; using BlockIdMap = std::unordered_map<BasicBlock *, uint32_t>; using InstructionIdMap = std::unordered_map<Instruction *, uint32_t>; +using ProbeFactorMap = std::unordered_map<uint64_t, float>; +using FuncProbeFactorMap = StringMap<ProbeFactorMap>; enum class PseudoProbeReservedId { Invalid = 0, Last = Invalid }; @@ -43,6 +49,33 @@ public: uint64_t getFunctionHash() const { return FunctionHash; } }; +// A pseudo probe verifier that can be run after each IR passes to detect the +// violation of updating probe factors. In principle, the sum of distribution +// factor for a probe should be identical before and after a pass. For a +// function pass, the factor sum for a probe would be typically 100%. +class PseudoProbeVerifier { +public: + void registerCallbacks(PassInstrumentationCallbacks &PIC); + + // Implementation of pass instrumentation callbacks for new pass manager. + void runAfterPass(StringRef PassID, Any IR); + +private: + // Allow a little bias due the rounding to integral factors. + constexpr static float DistributionFactorVariance = 0.02; + // Distribution factors from last pass. + FuncProbeFactorMap FunctionProbeFactors; + + void collectProbeFactors(const BasicBlock *BB, ProbeFactorMap &ProbeFactors); + void runAfterPass(const Module *M); + void runAfterPass(const LazyCallGraph::SCC *C); + void runAfterPass(const Function *F); + void runAfterPass(const Loop *L); + bool shouldVerifyFunction(const Function *F); + void verifyProbeFactors(const Function *F, + const ProbeFactorMap &ProbeFactors); +}; + // This class serves sample counts correlation for SampleProfileLoader by // analyzing pseudo probes and their function descriptors injected by // SampleProfileProber. @@ -102,5 +135,13 @@ public: PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); }; +class PseudoProbeUpdatePass : public PassInfoMixin<PseudoProbeUpdatePass> { + void runOnFunction(Function &F, FunctionAnalysisManager &FAM); + +public: + PseudoProbeUpdatePass() {} + PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); +}; + } // end namespace llvm #endif // LLVM_TRANSFORMS_IPO_SAMPLEPROFILEPROBE_H diff --git a/llvm/lib/Analysis/MemorySSA.cpp b/llvm/lib/Analysis/MemorySSA.cpp index 52dca7d378e1..4722b68e20e9 100644 --- a/llvm/lib/Analysis/MemorySSA.cpp +++ b/llvm/lib/Analysis/MemorySSA.cpp @@ -281,7 +281,6 @@ instructionClobbersQuery(const MemoryDef *MD, const MemoryLocation &UseLoc, // clobbers where they don't really exist at all. Please see D43269 for // context. switch (II->getIntrinsicID()) { - case Intrinsic::lifetime_end: case Intrinsic::invariant_start: case Intrinsic::invariant_end: case Intrinsic::assume: @@ -358,22 +357,6 @@ struct UpwardsMemoryQuery { } // end anonymous namespace -static bool lifetimeEndsAt(MemoryDef *MD, const MemoryLocation &Loc, - BatchAAResults &AA) { - Instruction *Inst = MD->getMemoryInst(); - if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) { - switch (II->getIntrinsicID()) { - case Intrinsic::lifetime_end: { - MemoryLocation ArgLoc = MemoryLocation::getAfter(II->getArgOperand(1)); - return AA.alias(ArgLoc, Loc) == MustAlias; - } - default: - return false; - } - } - return false; -} - template <typename AliasAnalysisType> static bool isUseTriviallyOptimizableToLiveOnEntry(AliasAnalysisType &AA, const Instruction *I) { @@ -1465,15 +1448,6 @@ void MemorySSA::OptimizeUses::optimizeUsesInBlock( } MemoryDef *MD = cast<MemoryDef>(VersionStack[UpperBound]); - // If the lifetime of the pointer ends at this instruction, it's live on - // entry. - if (!UseMLOC.IsCall && lifetimeEndsAt(MD, UseMLOC.getLoc(), *AA)) { - // Reset UpperBound to liveOnEntryDef's place in the stack - UpperBound = 0; - FoundClobberResult = true; - LocInfo.AR = MustAlias; - break; - } ClobberAlias CA = instructionClobbersQuery(MD, MU, UseMLOC, *AA); if (CA.IsClobber) { FoundClobberResult = true; diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp index df0219fcfa64..a9353bdfb780 100644 --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -968,10 +968,11 @@ bool CombinerHelper::matchOptBrCondByInvertingCond(MachineInstr &MI) { if (BrCond->getOpcode() != TargetOpcode::G_BRCOND) return false; - // Check that the next block is the conditional branch target. - if (!MBB->isLayoutSuccessor(BrCond->getOperand(1).getMBB())) - return false; - return true; + // Check that the next block is the conditional branch target. Also make sure + // that it isn't the same as the G_BR's target (otherwise, this will loop.) + MachineBasicBlock *BrCondTarget = BrCond->getOperand(1).getMBB(); + return BrCondTarget != MI.getOperand(0).getMBB() && + MBB->isLayoutSuccessor(BrCondTarget); } void CombinerHelper::applyOptBrCondByInvertingCond(MachineInstr &MI) { diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 615bea2a4905..89670d708264 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -21174,7 +21174,7 @@ SDValue DAGCombiner::visitFP16_TO_FP(SDNode *N) { SDValue N0 = N->getOperand(0); // fold fp16_to_fp(op & 0xffff) -> fp16_to_fp(op) - if (N0->getOpcode() == ISD::AND) { + if (!TLI.shouldKeepZExtForFP16Conv() && N0->getOpcode() == ISD::AND) { ConstantSDNode *AndConst = getAsNonOpaqueConstant(N0.getOperand(1)); if (AndConst && AndConst->getAPIntValue() == 0xffff) { return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), N->getValueType(0), diff --git a/llvm/lib/IR/ConstantFold.cpp b/llvm/lib/IR/ConstantFold.cpp index 03cb108cc485..95dd55237e5f 100644 --- a/llvm/lib/IR/ConstantFold.cpp +++ b/llvm/lib/IR/ConstantFold.cpp @@ -630,7 +630,7 @@ Constant *llvm::ConstantFoldCastInstruction(unsigned opc, Constant *V, V.convertToInteger(IntVal, APFloat::rmTowardZero, &ignored)) { // Undefined behavior invoked - the destination type can't represent // the input constant. - return PoisonValue::get(DestTy); + return UndefValue::get(DestTy); } return ConstantInt::get(FPC->getContext(), IntVal); } @@ -916,7 +916,7 @@ Constant *llvm::ConstantFoldInsertElementInstruction(Constant *Val, unsigned NumElts = ValTy->getNumElements(); if (CIdx->uge(NumElts)) - return PoisonValue::get(Val->getType()); + return UndefValue::get(Val->getType()); SmallVector<Constant*, 16> Result; Result.reserve(NumElts); @@ -1151,21 +1151,23 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode, Constant *C1, } case Instruction::SDiv: case Instruction::UDiv: - // X / undef -> poison - // X / 0 -> poison - if (match(C2, m_CombineOr(m_Undef(), m_Zero()))) - return PoisonValue::get(C2->getType()); + // X / undef -> undef + if (isa<UndefValue>(C2)) + return C2; + // undef / 0 -> undef // undef / 1 -> undef - if (match(C2, m_One())) + if (match(C2, m_Zero()) || match(C2, m_One())) return C1; // undef / X -> 0 otherwise return Constant::getNullValue(C1->getType()); case Instruction::URem: case Instruction::SRem: - // X % undef -> poison - // X % 0 -> poison - if (match(C2, m_CombineOr(m_Undef(), m_Zero()))) - return PoisonValue::get(C2->getType()); + // X % undef -> undef + if (match(C2, m_Undef())) + return C2; + // undef % 0 -> undef + if (match(C2, m_Zero())) + return C1; // undef % X -> 0 otherwise return Constant::getNullValue(C1->getType()); case Instruction::Or: // X | undef -> -1 @@ -1173,28 +1175,28 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode, Constant *C1, return C1; return Constant::getAllOnesValue(C1->getType()); // undef | X -> ~0 case Instruction::LShr: - // X >>l undef -> poison + // X >>l undef -> undef if (isa<UndefValue>(C2)) - return PoisonValue::get(C2->getType()); + return C2; // undef >>l 0 -> undef if (match(C2, m_Zero())) return C1; // undef >>l X -> 0 return Constant::getNullValue(C1->getType()); case Instruction::AShr: - // X >>a undef -> poison + // X >>a undef -> undef if (isa<UndefValue>(C2)) - return PoisonValue::get(C2->getType()); + return C2; // undef >>a 0 -> undef if (match(C2, m_Zero())) return C1; - // TODO: undef >>a X -> poison if the shift is exact + // TODO: undef >>a X -> undef if the shift is exact // undef >>a X -> 0 return Constant::getNullValue(C1->getType()); case Instruction::Shl: // X << undef -> undef if (isa<UndefValue>(C2)) - return PoisonValue::get(C2->getType()); + return C2; // undef << 0 -> undef if (match(C2, m_Zero())) return C1; @@ -1247,14 +1249,14 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode, Constant *C1, if (CI2->isOne()) return C1; // X / 1 == X if (CI2->isZero()) - return PoisonValue::get(CI2->getType()); // X / 0 == poison + return UndefValue::get(CI2->getType()); // X / 0 == undef break; case Instruction::URem: case Instruction::SRem: if (CI2->isOne()) return Constant::getNullValue(CI2->getType()); // X % 1 == 0 if (CI2->isZero()) - return PoisonValue::get(CI2->getType()); // X % 0 == poison + return UndefValue::get(CI2->getType()); // X % 0 == undef break; case Instruction::And: if (CI2->isZero()) return C2; // X & 0 == 0 @@ -1368,7 +1370,7 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode, Constant *C1, case Instruction::SDiv: assert(!CI2->isZero() && "Div by zero handled above"); if (C2V.isAllOnesValue() && C1V.isMinSignedValue()) - return PoisonValue::get(CI1->getType()); // MIN_INT / -1 -> poison + return UndefValue::get(CI1->getType()); // MIN_INT / -1 -> undef return ConstantInt::get(CI1->getContext(), C1V.sdiv(C2V)); case Instruction::URem: assert(!CI2->isZero() && "Div by zero handled above"); @@ -1376,7 +1378,7 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode, Constant *C1, case Instruction::SRem: assert(!CI2->isZero() && "Div by zero handled above"); if (C2V.isAllOnesValue() && C1V.isMinSignedValue()) - return PoisonValue::get(CI1->getType()); // MIN_INT % -1 -> poison + return UndefValue::get(CI1->getType()); // MIN_INT % -1 -> undef return ConstantInt::get(CI1->getContext(), C1V.srem(C2V)); case Instruction::And: return ConstantInt::get(CI1->getContext(), C1V & C2V); @@ -1387,15 +1389,15 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode, Constant *C1, case Instruction::Shl: if (C2V.ult(C1V.getBitWidth())) return ConstantInt::get(CI1->getContext(), C1V.shl(C2V)); - return PoisonValue::get(C1->getType()); // too big shift is poison + return UndefValue::get(C1->getType()); // too big shift is undef case Instruction::LShr: if (C2V.ult(C1V.getBitWidth())) return ConstantInt::get(CI1->getContext(), C1V.lshr(C2V)); - return PoisonValue::get(C1->getType()); // too big shift is poison + return UndefValue::get(C1->getType()); // too big shift is undef case Instruction::AShr: if (C2V.ult(C1V.getBitWidth())) return ConstantInt::get(CI1->getContext(), C1V.ashr(C2V)); - return PoisonValue::get(C1->getType()); // too big shift is poison + return UndefValue::get(C1->getType()); // too big shift is undef } } @@ -1441,7 +1443,7 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode, Constant *C1, // Fast path for splatted constants. if (Constant *C2Splat = C2->getSplatValue()) { if (Instruction::isIntDivRem(Opcode) && C2Splat->isNullValue()) - return PoisonValue::get(VTy); + return UndefValue::get(VTy); if (Constant *C1Splat = C1->getSplatValue()) { return ConstantVector::getSplat( VTy->getElementCount(), @@ -1458,9 +1460,9 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode, Constant *C1, Constant *LHS = ConstantExpr::getExtractElement(C1, ExtractIdx); Constant *RHS = ConstantExpr::getExtractElement(C2, ExtractIdx); - // If any element of a divisor vector is zero, the whole op is poison. + // If any element of a divisor vector is zero, the whole op is undef. if (Instruction::isIntDivRem(Opcode) && RHS->isNullValue()) - return PoisonValue::get(VTy); + return UndefValue::get(VTy); Result.push_back(ConstantExpr::get(Opcode, LHS, RHS)); } @@ -2343,8 +2345,7 @@ Constant *llvm::ConstantFoldGetElementPtr(Type *PointeeTy, Constant *C, return PoisonValue::get(GEPTy); if (isa<UndefValue>(C)) - // If inbounds, we can choose an out-of-bounds pointer as a base pointer. - return InBounds ? PoisonValue::get(GEPTy) : UndefValue::get(GEPTy); + return UndefValue::get(GEPTy); Constant *Idx0 = cast<Constant>(Idxs[0]); if (Idxs.size() == 1 && (Idx0->isNullValue() || isa<UndefValue>(Idx0))) diff --git a/llvm/lib/IR/PseudoProbe.cpp b/llvm/lib/IR/PseudoProbe.cpp index 804214f06e7a..80d2963938d4 100644 --- a/llvm/lib/IR/PseudoProbe.cpp +++ b/llvm/lib/IR/PseudoProbe.cpp @@ -35,6 +35,9 @@ Optional<PseudoProbe> extractProbeFromDiscriminator(const Instruction &Inst) { PseudoProbeDwarfDiscriminator::extractProbeType(Discriminator); Probe.Attr = PseudoProbeDwarfDiscriminator::extractProbeAttributes(Discriminator); + Probe.Factor = + PseudoProbeDwarfDiscriminator::extractProbeFactor(Discriminator) / + (float)PseudoProbeDwarfDiscriminator::FullDistributionFactor; return Probe; } } @@ -47,6 +50,8 @@ Optional<PseudoProbe> extractProbe(const Instruction &Inst) { Probe.Id = II->getIndex()->getZExtValue(); Probe.Type = (uint32_t)PseudoProbeType::Block; Probe.Attr = II->getAttributes()->getZExtValue(); + Probe.Factor = II->getFactor()->getZExtValue() / + (float)PseudoProbeFullDistributionFactor; return Probe; } @@ -55,4 +60,40 @@ Optional<PseudoProbe> extractProbe(const Instruction &Inst) { return None; } + +void setProbeDistributionFactor(Instruction &Inst, float Factor) { + assert(Factor >= 0 && Factor <= 1 && + "Distribution factor must be in [0, 1.0]"); + if (auto *II = dyn_cast<PseudoProbeInst>(&Inst)) { + IRBuilder<> Builder(&Inst); + uint64_t IntFactor = PseudoProbeFullDistributionFactor; + if (Factor < 1) + IntFactor *= Factor; + auto OrigFactor = II->getFactor()->getZExtValue(); + if (IntFactor != OrigFactor) + II->replaceUsesOfWith(II->getFactor(), Builder.getInt64(IntFactor)); + } else if (isa<CallBase>(&Inst) && !isa<IntrinsicInst>(&Inst)) { + if (const DebugLoc &DLoc = Inst.getDebugLoc()) { + const DILocation *DIL = DLoc; + auto Discriminator = DIL->getDiscriminator(); + if (DILocation::isPseudoProbeDiscriminator(Discriminator)) { + auto Index = + PseudoProbeDwarfDiscriminator::extractProbeIndex(Discriminator); + auto Type = + PseudoProbeDwarfDiscriminator::extractProbeType(Discriminator); + auto Attr = PseudoProbeDwarfDiscriminator::extractProbeAttributes( + Discriminator); + // Round small factors to 0 to avoid over-counting. + uint32_t IntFactor = + PseudoProbeDwarfDiscriminator::FullDistributionFactor; + if (Factor < 1) + IntFactor *= Factor; + uint32_t V = PseudoProbeDwarfDiscriminator::packProbeData( + Index, Type, Attr, IntFactor); + DIL = DIL->cloneWithDiscriminator(V); + Inst.setDebugLoc(DIL); + } + } + } +} } // namespace llvm diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp index 100e881c8fa8..6dd299ee9845 100644 --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -1070,12 +1070,6 @@ void Verifier::visitDICompositeType(const DICompositeType &N) { if (auto *Params = N.getRawTemplateParams()) visitTemplateParams(N, *Params); - if (N.getTag() == dwarf::DW_TAG_class_type || - N.getTag() == dwarf::DW_TAG_union_type) { - AssertDI(N.getFile() && !N.getFile()->getFilename().empty(), - "class/union requires a filename", &N, N.getFile()); - } - if (auto *D = N.getRawDiscriminator()) { AssertDI(isa<DIDerivedType>(D) && N.getTag() == dwarf::DW_TAG_variant_part, "discriminator can only appear on variant part"); diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp index d4c4c6e01ef5..6c1a7c75d30a 100644 --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -1423,6 +1423,9 @@ PassBuilder::buildPerModuleDefaultPipeline(OptimizationLevel Level, // Now add the optimization pipeline. MPM.addPass(buildModuleOptimizationPipeline(Level, LTOPreLink)); + if (PGOOpt && PGOOpt->PseudoProbeForProfiling) + MPM.addPass(PseudoProbeUpdatePass()); + // Emit annotation remarks. addAnnotationRemarksPass(MPM); @@ -1477,6 +1480,9 @@ PassBuilder::buildThinLTOPreLinkDefaultPipeline(OptimizationLevel Level) { if (PTO.Coroutines) MPM.addPass(createModuleToFunctionPassAdaptor(CoroCleanupPass())); + if (PGOOpt && PGOOpt->PseudoProbeForProfiling) + MPM.addPass(PseudoProbeUpdatePass()); + // Emit annotation remarks. addAnnotationRemarksPass(MPM); diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def index 860bfade733d..877cb9ed13b3 100644 --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -119,6 +119,7 @@ MODULE_PASS("kasan-module", ModuleAddressSanitizerPass(/*CompileKernel=*/true, f MODULE_PASS("sancov-module", ModuleSanitizerCoveragePass()) MODULE_PASS("memprof-module", ModuleMemProfilerPass()) MODULE_PASS("poison-checking", PoisonCheckingPass()) +MODULE_PASS("pseudo-probe-update", PseudoProbeUpdatePass()) #undef MODULE_PASS #ifndef CGSCC_ANALYSIS diff --git a/llvm/lib/Passes/StandardInstrumentations.cpp b/llvm/lib/Passes/StandardInstrumentations.cpp index a8bfe02d4432..6795aed7b04e 100644 --- a/llvm/lib/Passes/StandardInstrumentations.cpp +++ b/llvm/lib/Passes/StandardInstrumentations.cpp @@ -882,6 +882,7 @@ void StandardInstrumentations::registerCallbacks( OptBisect.registerCallbacks(PIC); PreservedCFGChecker.registerCallbacks(PIC); PrintChangedIR.registerCallbacks(PIC); + PseudoProbeVerification.registerCallbacks(PIC); if (VerifyEach) Verify.registerCallbacks(PIC); } diff --git a/llvm/lib/ProfileData/SampleProfReader.cpp b/llvm/lib/ProfileData/SampleProfReader.cpp index c42931174bc0..370ffc8e2885 100644 --- a/llvm/lib/ProfileData/SampleProfReader.cpp +++ b/llvm/lib/ProfileData/SampleProfReader.cpp @@ -222,8 +222,6 @@ std::error_code SampleProfileReaderText::readImpl() { sampleprof_error Result = sampleprof_error::success; InlineCallStack InlineStack; - int CSProfileCount = 0; - int RegularProfileCount = 0; uint32_t ProbeProfileCount = 0; // SeenMetadata tracks whether we have processed metadata for the current @@ -257,11 +255,9 @@ std::error_code SampleProfileReaderText::readImpl() { SampleContext FContext(FName); if (FContext.hasContext()) ++CSProfileCount; - else - ++RegularProfileCount; Profiles[FContext] = FunctionSamples(); FunctionSamples &FProfile = Profiles[FContext]; - FProfile.setName(FContext.getName()); + FProfile.setName(FContext.getNameWithoutContext()); FProfile.setContext(FContext); MergeResult(Result, FProfile.addTotalSamples(NumSamples)); MergeResult(Result, FProfile.addHeadSamples(NumHeadSamples)); @@ -324,13 +320,14 @@ std::error_code SampleProfileReaderText::readImpl() { } } - assert((RegularProfileCount == 0 || CSProfileCount == 0) && + assert((CSProfileCount == 0 || CSProfileCount == Profiles.size()) && "Cannot have both context-sensitive and regular profile"); ProfileIsCS = (CSProfileCount > 0); assert((ProbeProfileCount == 0 || ProbeProfileCount == Profiles.size()) && "Cannot have both probe-based profiles and regular profiles"); ProfileIsProbeBased = (ProbeProfileCount > 0); FunctionSamples::ProfileIsProbeBased = ProfileIsProbeBased; + FunctionSamples::ProfileIsCS = ProfileIsCS; if (Result == sampleprof_error::success) computeSummary(); @@ -546,12 +543,16 @@ SampleProfileReaderBinary::readFuncProfile(const uint8_t *Start) { if (std::error_code EC = FName.getError()) return EC; - Profiles[*FName] = FunctionSamples(); - FunctionSamples &FProfile = Profiles[*FName]; - FProfile.setName(*FName); - + SampleContext FContext(*FName); + Profiles[FContext] = FunctionSamples(); + FunctionSamples &FProfile = Profiles[FContext]; + FProfile.setName(FContext.getNameWithoutContext()); + FProfile.setContext(FContext); FProfile.addHeadSamples(*NumHeadSamples); + if (FContext.hasContext()) + CSProfileCount++; + if (std::error_code EC = readProfile(FProfile)) return EC; return sampleprof_error::success; @@ -654,40 +655,44 @@ std::error_code SampleProfileReaderExtBinaryBase::readFuncProfiles() { return EC; } assert(Data == End && "More data is read than expected"); - return sampleprof_error::success; - } - - if (Remapper) { - for (auto Name : FuncsToUse) { - Remapper->insert(Name); + } else { + if (Remapper) { + for (auto Name : FuncsToUse) { + Remapper->insert(Name); + } } - } - if (useMD5()) { - for (auto Name : FuncsToUse) { - auto GUID = std::to_string(MD5Hash(Name)); - auto iter = FuncOffsetTable.find(StringRef(GUID)); - if (iter == FuncOffsetTable.end()) - continue; - const uint8_t *FuncProfileAddr = Start + iter->second; - assert(FuncProfileAddr < End && "out of LBRProfile section"); - if (std::error_code EC = readFuncProfile(FuncProfileAddr)) - return EC; - } - } else { - for (auto NameOffset : FuncOffsetTable) { - auto FuncName = NameOffset.first; - if (!FuncsToUse.count(FuncName) && - (!Remapper || !Remapper->exist(FuncName))) - continue; - const uint8_t *FuncProfileAddr = Start + NameOffset.second; - assert(FuncProfileAddr < End && "out of LBRProfile section"); - if (std::error_code EC = readFuncProfile(FuncProfileAddr)) - return EC; + if (useMD5()) { + for (auto Name : FuncsToUse) { + auto GUID = std::to_string(MD5Hash(Name)); + auto iter = FuncOffsetTable.find(StringRef(GUID)); + if (iter == FuncOffsetTable.end()) + continue; + const uint8_t *FuncProfileAddr = Start + iter->second; + assert(FuncProfileAddr < End && "out of LBRProfile section"); + if (std::error_code EC = readFuncProfile(FuncProfileAddr)) + return EC; + } + } else { + for (auto NameOffset : FuncOffsetTable) { + SampleContext FContext(NameOffset.first); + auto FuncName = FContext.getNameWithoutContext(); + if (!FuncsToUse.count(FuncName) && + (!Remapper || !Remapper->exist(FuncName))) + continue; + const uint8_t *FuncProfileAddr = Start + NameOffset.second; + assert(FuncProfileAddr < End && "out of LBRProfile section"); + if (std::error_code EC = readFuncProfile(FuncProfileAddr)) + return EC; + } } + Data = End; } - Data = End; + assert((CSProfileCount == 0 || CSProfileCount == Profiles.size()) && + "Cannot have both context-sensitive and regular profile"); + ProfileIsCS = (CSProfileCount > 0); + FunctionSamples::ProfileIsCS = ProfileIsCS; return sampleprof_error::success; } @@ -878,7 +883,7 @@ std::error_code SampleProfileReaderExtBinaryBase::readNameTableSec(bool IsMD5) { std::error_code SampleProfileReaderExtBinaryBase::readFuncMetadata() { if (!ProfileIsProbeBased) return sampleprof_error::success; - for (unsigned I = 0; I < Profiles.size(); ++I) { + while (Data < End) { auto FName(readStringFromTable()); if (std::error_code EC = FName.getError()) return EC; @@ -887,8 +892,14 @@ std::error_code SampleProfileReaderExtBinaryBase::readFuncMetadata() { if (std::error_code EC = Checksum.getError()) return EC; - Profiles[*FName].setFunctionHash(*Checksum); + SampleContext FContext(*FName); + // No need to load metadata for profiles that are not loaded in the current + // module. + if (Profiles.count(FContext)) + Profiles[FContext].setFunctionHash(*Checksum); } + + assert(Data == End && "More data is read than expected"); return sampleprof_error::success; } diff --git a/llvm/lib/ProfileData/SampleProfWriter.cpp b/llvm/lib/ProfileData/SampleProfWriter.cpp index 71dba6281f76..d3bc05e06fdf 100644 --- a/llvm/lib/ProfileData/SampleProfWriter.cpp +++ b/llvm/lib/ProfileData/SampleProfWriter.cpp @@ -147,7 +147,7 @@ std::error_code SampleProfileWriterExtBinaryBase::write( std::error_code SampleProfileWriterExtBinaryBase::writeSample(const FunctionSamples &S) { uint64_t Offset = OutputStream->tell(); - StringRef Name = S.getName(); + StringRef Name = S.getNameWithContext(true); FuncOffsetTable[Name] = Offset - SecLBRProfileStart; encodeULEB128(S.getHeadSamples(), *OutputStream); return writeBody(S); @@ -635,7 +635,7 @@ std::error_code SampleProfileWriterBinary::writeSummary() { std::error_code SampleProfileWriterBinary::writeBody(const FunctionSamples &S) { auto &OS = *OutputStream; - if (std::error_code EC = writeNameIdx(S.getName())) + if (std::error_code EC = writeNameIdx(S.getNameWithContext(true))) return EC; encodeULEB128(S.getTotalSamples(), OS); diff --git a/llvm/lib/Support/CommandLine.cpp b/llvm/lib/Support/CommandLine.cpp index 6d89481bf28a..e2f014d1815b 100644 --- a/llvm/lib/Support/CommandLine.cpp +++ b/llvm/lib/Support/CommandLine.cpp @@ -1726,6 +1726,19 @@ void Option::printHelpStr(StringRef HelpStr, size_t Indent, } } +void Option::printEnumValHelpStr(StringRef HelpStr, size_t BaseIndent, + size_t FirstLineIndentedBy) { + const StringRef ValHelpPrefix = " "; + assert(BaseIndent >= FirstLineIndentedBy + ValHelpPrefix.size()); + std::pair<StringRef, StringRef> Split = HelpStr.split('\n'); + outs().indent(BaseIndent - FirstLineIndentedBy) + << ArgHelpPrefix << ValHelpPrefix << Split.first << "\n"; + while (!Split.second.empty()) { + Split = Split.second.split('\n'); + outs().indent(BaseIndent + ValHelpPrefix.size()) << Split.first << "\n"; + } +} + // Print out the option for the alias. void alias::printOptionInfo(size_t GlobalWidth) const { outs() << PrintArg(ArgStr); @@ -1971,17 +1984,17 @@ void generic_parser_base::printOptionInfo(const Option &O, StringRef Description = getDescription(i); if (!shouldPrintOption(OptionName, Description, O)) continue; - assert(GlobalWidth >= OptionName.size() + OptionPrefixesSize); - size_t NumSpaces = GlobalWidth - OptionName.size() - OptionPrefixesSize; + size_t FirstLineIndent = OptionName.size() + OptionPrefixesSize; outs() << OptionPrefix << OptionName; if (OptionName.empty()) { outs() << EmptyOption; - assert(NumSpaces >= EmptyOption.size()); - NumSpaces -= EmptyOption.size(); + assert(FirstLineIndent >= EmptyOption.size()); + FirstLineIndent += EmptyOption.size(); } if (!Description.empty()) - outs().indent(NumSpaces) << ArgHelpPrefix << " " << Description; - outs() << '\n'; + Option::printEnumValHelpStr(Description, GlobalWidth, FirstLineIndent); + else + outs() << '\n'; } } else { if (!O.HelpStr.empty()) diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp index 37c924d879b1..68c721cb0d72 100644 --- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp +++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp @@ -111,7 +111,7 @@ AArch64MCAsmInfoMicrosoftCOFF::AArch64MCAsmInfoMicrosoftCOFF() { SupportsDebugInformation = true; CodePointerSize = 8; - CommentString = ";"; + CommentString = "//"; ExceptionsType = ExceptionHandling::WinEH; WinEHEncodingType = WinEH::EncodingType::Itanium; } diff --git a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp index 2628070f219c..cdb78aae1c4f 100644 --- a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp +++ b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp @@ -75,17 +75,19 @@ static bool foldImmediates(MachineInstr &MI, const SIInstrInfo *TII, MachineOperand &MovSrc = Def->getOperand(1); bool ConstantFolded = false; - if (MovSrc.isImm() && (isInt<32>(MovSrc.getImm()) || - isUInt<32>(MovSrc.getImm()))) { - Src0.ChangeToImmediate(MovSrc.getImm()); - ConstantFolded = true; - } else if (MovSrc.isFI()) { - Src0.ChangeToFrameIndex(MovSrc.getIndex()); - ConstantFolded = true; - } else if (MovSrc.isGlobal()) { - Src0.ChangeToGA(MovSrc.getGlobal(), MovSrc.getOffset(), - MovSrc.getTargetFlags()); - ConstantFolded = true; + if (TII->isOperandLegal(MI, Src0Idx, &MovSrc)) { + if (MovSrc.isImm() && + (isInt<32>(MovSrc.getImm()) || isUInt<32>(MovSrc.getImm()))) { + Src0.ChangeToImmediate(MovSrc.getImm()); + ConstantFolded = true; + } else if (MovSrc.isFI()) { + Src0.ChangeToFrameIndex(MovSrc.getIndex()); + ConstantFolded = true; + } else if (MovSrc.isGlobal()) { + Src0.ChangeToGA(MovSrc.getGlobal(), MovSrc.getOffset(), + MovSrc.getTargetFlags()); + ConstantFolded = true; + } } if (ConstantFolded) { diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index 397979b4ab1e..598062672a56 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -18661,6 +18661,8 @@ ARMTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { : AtomicExpansionKind::None; } +// Similar to shouldExpandAtomicRMWInIR, ldrex/strex can be used up to 32 +// bits, and up to 64 bits on the non-M profiles. TargetLowering::AtomicExpansionKind ARMTargetLowering::shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const { // At -O0, fast-regalloc cannot cope with the live vregs necessary to @@ -18668,9 +18670,11 @@ ARMTargetLowering::shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const { // on the stack and close enough to the spill slot, this can lead to a // situation where the monitor always gets cleared and the atomic operation // can never succeed. So at -O0 we need a late-expanded pseudo-inst instead. + unsigned Size = AI->getOperand(1)->getType()->getPrimitiveSizeInBits(); bool HasAtomicCmpXchg = !Subtarget->isThumb() || Subtarget->hasV8MBaselineOps(); - if (getTargetMachine().getOptLevel() != 0 && HasAtomicCmpXchg) + if (getTargetMachine().getOptLevel() != 0 && HasAtomicCmpXchg && + Size <= (Subtarget->isMClass() ? 32U : 64U)) return AtomicExpansionKind::LLSC; return AtomicExpansionKind::None; } diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 9215c17cb94b..929a72ac687e 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -8604,16 +8604,20 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op, // If it is a splat of a double, check if we can shrink it to a 32 bit // non-denormal float which when converted back to double gives us the same - // double. This is to exploit the XXSPLTIDP instruction.+ // If we lose precision, we use XXSPLTI32DX. + // double. This is to exploit the XXSPLTIDP instruction. + // If we lose precision, we use XXSPLTI32DX. if (BVNIsConstantSplat && (SplatBitSize == 64) && Subtarget.hasPrefixInstrs()) { - if (convertToNonDenormSingle(APSplatBits) && - (Op->getValueType(0) == MVT::v2f64)) { + // Check the type first to short-circuit so we don't modify APSplatBits if + // this block isn't executed. + if ((Op->getValueType(0) == MVT::v2f64) && + convertToNonDenormSingle(APSplatBits)) { SDValue SplatNode = DAG.getNode( PPCISD::XXSPLTI_SP_TO_DP, dl, MVT::v2f64, DAG.getTargetConstant(APSplatBits.getZExtValue(), dl, MVT::i32)); return DAG.getBitcast(Op.getValueType(), SplatNode); - } else { // We may lose precision, so we have to use XXSPLTI32DX. + } else { + // We may lose precision, so we have to use XXSPLTI32DX. uint32_t Hi = (uint32_t)((APSplatBits.getZExtValue() & 0xFFFFFFFF00000000LL) >> 32); diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h index 477105bd03ac..0dda2c181572 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.h +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -987,6 +987,9 @@ namespace llvm { shouldExpandBuildVectorWithShuffles(EVT VT, unsigned DefinedValues) const override; + // Keep the zero-extensions for arguments to libcalls. + bool shouldKeepZExtForFP16Conv() const override { return true; } + /// createFastISel - This method returns a target-specific FastISel object, /// or null if the target does not support "fast" instruction selection. FastISel *createFastISel(FunctionLoweringInfo &FuncInfo, diff --git a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp index e7e590153605..dcf7525d7458 100644 --- a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp +++ b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp @@ -2126,7 +2126,7 @@ bool RISCVAsmParser::parseDirectiveAttribute() { if (getFeatureBits(RISCV::FeatureStdExtB)) formalArchStr = (Twine(formalArchStr) + "_b0p93").str(); if (getFeatureBits(RISCV::FeatureStdExtV)) - formalArchStr = (Twine(formalArchStr) + "_v1p0").str(); + formalArchStr = (Twine(formalArchStr) + "_v0p10").str(); if (getFeatureBits(RISCV::FeatureExtZfh)) formalArchStr = (Twine(formalArchStr) + "_zfh0p1").str(); if (getFeatureBits(RISCV::FeatureExtZba)) @@ -2152,9 +2152,9 @@ bool RISCVAsmParser::parseDirectiveAttribute() { if (getFeatureBits(RISCV::FeatureExtZbt)) formalArchStr = (Twine(formalArchStr) + "_zbt0p93").str(); if (getFeatureBits(RISCV::FeatureExtZvamo)) - formalArchStr = (Twine(formalArchStr) + "_zvamo1p0").str(); + formalArchStr = (Twine(formalArchStr) + "_zvamo0p10").str(); if (getFeatureBits(RISCV::FeatureStdExtZvlsseg)) - formalArchStr = (Twine(formalArchStr) + "_zvlsseg1p0").str(); + formalArchStr = (Twine(formalArchStr) + "_zvlsseg0p10").str(); getTargetStreamer().emitTextAttribute(Tag, formalArchStr); } diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.cpp index 72434a15bedb..13c4b84aa300 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.cpp +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.cpp @@ -63,7 +63,7 @@ void RISCVTargetStreamer::emitTargetAttributes(const MCSubtargetInfo &STI) { if (STI.hasFeature(RISCV::FeatureStdExtB)) Arch += "_b0p93"; if (STI.hasFeature(RISCV::FeatureStdExtV)) - Arch += "_v1p0"; + Arch += "_v0p10"; if (STI.hasFeature(RISCV::FeatureExtZfh)) Arch += "_zfh0p1"; if (STI.hasFeature(RISCV::FeatureExtZba)) @@ -89,9 +89,9 @@ void RISCVTargetStreamer::emitTargetAttributes(const MCSubtargetInfo &STI) { if (STI.hasFeature(RISCV::FeatureExtZbt)) Arch += "_zbt0p93"; if (STI.hasFeature(RISCV::FeatureExtZvamo)) - Arch += "_zvamo1p0"; + Arch += "_zvamo0p10"; if (STI.hasFeature(RISCV::FeatureStdExtZvlsseg)) - Arch += "_zvlsseg1p0"; + Arch += "_zvlsseg0p10"; emitTextAttribute(RISCVAttrs::ARCH, Arch); } diff --git a/llvm/lib/Target/RISCV/RISCVCleanupVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVCleanupVSETVLI.cpp index 6a12f99b8903..ae32cbd1ae59 100644 --- a/llvm/lib/Target/RISCV/RISCVCleanupVSETVLI.cpp +++ b/llvm/lib/Target/RISCV/RISCVCleanupVSETVLI.cpp @@ -59,7 +59,8 @@ bool RISCVCleanupVSETVLI::runOnMachineBasicBlock(MachineBasicBlock &MBB) { for (auto MII = MBB.begin(), MIE = MBB.end(); MII != MIE;) { MachineInstr &MI = *MII++; - if (MI.getOpcode() != RISCV::PseudoVSETVLI) { + if (MI.getOpcode() != RISCV::PseudoVSETVLI && + MI.getOpcode() != RISCV::PseudoVSETIVLI) { if (PrevVSETVLI && (MI.isCall() || MI.modifiesRegister(RISCV::VL) || MI.modifiesRegister(RISCV::VTYPE))) { @@ -69,26 +70,48 @@ bool RISCVCleanupVSETVLI::runOnMachineBasicBlock(MachineBasicBlock &MBB) { continue; } - // If we don't have a previous VSETVLI or the VL output isn't dead, we + // If we don't have a previous VSET{I}VLI or the VL output isn't dead, we // can't remove this VSETVLI. if (!PrevVSETVLI || !MI.getOperand(0).isDead()) { PrevVSETVLI = &MI; continue; } - Register PrevAVLReg = PrevVSETVLI->getOperand(1).getReg(); - Register AVLReg = MI.getOperand(1).getReg(); + // If a previous "set vl" instruction opcode is different from this one, we + // can't differentiate the AVL values. + if (PrevVSETVLI->getOpcode() != MI.getOpcode()) { + PrevVSETVLI = &MI; + continue; + } + + // The remaining two cases are + // 1. PrevVSETVLI = PseudoVSETVLI + // MI = PseudoVSETVLI + // + // 2. PrevVSETVLI = PseudoVSETIVLI + // MI = PseudoVSETIVLI + Register AVLReg; + bool SameAVL = false; + if (MI.getOpcode() == RISCV::PseudoVSETVLI) { + AVLReg = MI.getOperand(1).getReg(); + SameAVL = PrevVSETVLI->getOperand(1).getReg() == AVLReg; + } else { // RISCV::PseudoVSETIVLI + SameAVL = + PrevVSETVLI->getOperand(1).getImm() == MI.getOperand(1).getImm(); + } int64_t PrevVTYPEImm = PrevVSETVLI->getOperand(2).getImm(); int64_t VTYPEImm = MI.getOperand(2).getImm(); - // Does this VSETVLI use the same AVL register and VTYPE immediate? - if (PrevAVLReg != AVLReg || PrevVTYPEImm != VTYPEImm) { + // Does this VSET{I}VLI use the same AVL register/value and VTYPE immediate? + if (!SameAVL || PrevVTYPEImm != VTYPEImm) { PrevVSETVLI = &MI; continue; } // If the AVLReg is X0 we need to look at the output VL of both VSETVLIs. - if (AVLReg == RISCV::X0) { + if ((MI.getOpcode() == RISCV::PseudoVSETVLI) && (AVLReg == RISCV::X0)) { + assert((PrevVSETVLI->getOpcode() == RISCV::PseudoVSETVLI) && + "Unexpected vsetvli opcode."); Register PrevOutVL = PrevVSETVLI->getOperand(0).getReg(); Register OutVL = MI.getOperand(0).getReg(); // We can't remove if the previous VSETVLI left VL unchanged and the diff --git a/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp b/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp index 5f50892ca886..ec9a39569952 100644 --- a/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp +++ b/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp @@ -103,6 +103,7 @@ bool RISCVExpandPseudo::expandMI(MachineBasicBlock &MBB, case RISCV::PseudoLA_TLS_GD: return expandLoadTLSGDAddress(MBB, MBBI, NextMBBI); case RISCV::PseudoVSETVLI: + case RISCV::PseudoVSETIVLI: return expandVSetVL(MBB, MBBI); case RISCV::PseudoVMCLR_M_B1: case RISCV::PseudoVMCLR_M_B2: @@ -217,9 +218,15 @@ bool RISCVExpandPseudo::expandVSetVL(MachineBasicBlock &MBB, DebugLoc DL = MBBI->getDebugLoc(); - assert(MBBI->getOpcode() == RISCV::PseudoVSETVLI && + assert((MBBI->getOpcode() == RISCV::PseudoVSETVLI || + MBBI->getOpcode() == RISCV::PseudoVSETIVLI) && "Unexpected pseudo instruction"); - const MCInstrDesc &Desc = TII->get(RISCV::VSETVLI); + unsigned Opcode; + if (MBBI->getOpcode() == RISCV::PseudoVSETVLI) + Opcode = RISCV::VSETVLI; + else + Opcode = RISCV::VSETIVLI; + const MCInstrDesc &Desc = TII->get(Opcode); assert(Desc.getNumOperands() == 3 && "Unexpected instruction format"); Register DstReg = MBBI->getOperand(0).getReg(); diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp index 7b0f38671f06..43bf16c53a62 100644 --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -569,12 +569,14 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) { SDValue VLOperand = Node->getOperand(2); if (auto *C = dyn_cast<ConstantSDNode>(VLOperand)) { - if (C->isNullValue()) { - VLOperand = SDValue( - CurDAG->getMachineNode(RISCV::ADDI, DL, XLenVT, - CurDAG->getRegister(RISCV::X0, XLenVT), - CurDAG->getTargetConstant(0, DL, XLenVT)), - 0); + uint64_t AVL = C->getZExtValue(); + if (isUInt<5>(AVL)) { + SDValue VLImm = CurDAG->getTargetConstant(AVL, DL, XLenVT); + ReplaceNode(Node, + CurDAG->getMachineNode(RISCV::PseudoVSETIVLI, DL, XLenVT, + MVT::Other, VLImm, VTypeIOp, + /* Chain */ Node->getOperand(0))); + return; } } @@ -824,93 +826,6 @@ bool RISCVDAGToDAGISel::MatchSRLIW(SDNode *N) const { return (Mask | maskTrailingOnes<uint64_t>(ShAmt)) == 0xffffffff; } -// Check that it is a SLOI (Shift Left Ones Immediate). A PatFrag has already -// determined it has the right structure: -// -// (OR (SHL RS1, VC2), VC1) -// -// Check that VC1, the mask used to fill with ones, is compatible -// with VC2, the shamt: -// -// VC1 == maskTrailingOnes(VC2) -// -bool RISCVDAGToDAGISel::MatchSLOI(SDNode *N) const { - assert(N->getOpcode() == ISD::OR); - assert(N->getOperand(0).getOpcode() == ISD::SHL); - assert(isa<ConstantSDNode>(N->getOperand(1))); - assert(isa<ConstantSDNode>(N->getOperand(0).getOperand(1))); - - SDValue Shl = N->getOperand(0); - if (Subtarget->is64Bit()) { - uint64_t VC1 = N->getConstantOperandVal(1); - uint64_t VC2 = Shl.getConstantOperandVal(1); - return VC1 == maskTrailingOnes<uint64_t>(VC2); - } - - uint32_t VC1 = N->getConstantOperandVal(1); - uint32_t VC2 = Shl.getConstantOperandVal(1); - return VC1 == maskTrailingOnes<uint32_t>(VC2); -} - -// Check that it is a SROI (Shift Right Ones Immediate). A PatFrag has already -// determined it has the right structure: -// -// (OR (SRL RS1, VC2), VC1) -// -// Check that VC1, the mask used to fill with ones, is compatible -// with VC2, the shamt: -// -// VC1 == maskLeadingOnes(VC2) -// -bool RISCVDAGToDAGISel::MatchSROI(SDNode *N) const { - assert(N->getOpcode() == ISD::OR); - assert(N->getOperand(0).getOpcode() == ISD::SRL); - assert(isa<ConstantSDNode>(N->getOperand(1))); - assert(isa<ConstantSDNode>(N->getOperand(0).getOperand(1))); - - SDValue Srl = N->getOperand(0); - if (Subtarget->is64Bit()) { - uint64_t VC1 = N->getConstantOperandVal(1); - uint64_t VC2 = Srl.getConstantOperandVal(1); - return VC1 == maskLeadingOnes<uint64_t>(VC2); - } - - uint32_t VC1 = N->getConstantOperandVal(1); - uint32_t VC2 = Srl.getConstantOperandVal(1); - return VC1 == maskLeadingOnes<uint32_t>(VC2); -} - -// Check that it is a SROIW (Shift Right Ones Immediate i32 on RV64). A PatFrag -// has already determined it has the right structure: -// -// (OR (SRL RS1, VC2), VC1) -// -// and then we check that VC1, the mask used to fill with ones, is compatible -// with VC2, the shamt: -// -// VC2 < 32 -// VC1 == maskTrailingZeros<uint64_t>(32 - VC2) -// -bool RISCVDAGToDAGISel::MatchSROIW(SDNode *N) const { - assert(N->getOpcode() == ISD::OR); - assert(N->getOperand(0).getOpcode() == ISD::SRL); - assert(isa<ConstantSDNode>(N->getOperand(1))); - assert(isa<ConstantSDNode>(N->getOperand(0).getOperand(1))); - - // The IsRV64 predicate is checked after PatFrag predicates so we can get - // here even on RV32. - if (!Subtarget->is64Bit()) - return false; - - SDValue Srl = N->getOperand(0); - uint64_t VC1 = N->getConstantOperandVal(1); - uint64_t VC2 = Srl.getConstantOperandVal(1); - - // Immediate range should be enforced by uimm5 predicate. - assert(VC2 < 32 && "Unexpected immediate"); - return VC1 == maskTrailingZeros<uint64_t>(32 - VC2); -} - // Check that it is a SLLIUW (Shift Logical Left Immediate Unsigned i32 // on RV64). // SLLIUW is the same as SLLI except for the fact that it clears the bits @@ -946,6 +861,23 @@ bool RISCVDAGToDAGISel::MatchSLLIUW(SDNode *N) const { return (VC1 >> VC2) == UINT64_C(0xFFFFFFFF); } +// X0 has special meaning for vsetvl/vsetvli. +// rd | rs1 | AVL value | Effect on vl +//-------------------------------------------------------------- +// !X0 | X0 | VLMAX | Set vl to VLMAX +// X0 | X0 | Value in vl | Keep current vl, just change vtype. +bool RISCVDAGToDAGISel::selectVLOp(SDValue N, SDValue &VL) { + // If the VL value is a constant 0, manually select it to an ADDI with 0 + // immediate to prevent the default selection path from matching it to X0. + auto *C = dyn_cast<ConstantSDNode>(N); + if (C && C->isNullValue()) + VL = SDValue(selectImm(CurDAG, SDLoc(N), 0, Subtarget->getXLenVT()), 0); + else + VL = N; + + return true; +} + bool RISCVDAGToDAGISel::selectVSplat(SDValue N, SDValue &SplatVal) { if (N.getOpcode() != ISD::SPLAT_VECTOR && N.getOpcode() != RISCVISD::SPLAT_VECTOR_I64) diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h index 23601c3b8f06..6099586d049d 100644 --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h @@ -46,11 +46,10 @@ public: bool SelectAddrFI(SDValue Addr, SDValue &Base); bool MatchSRLIW(SDNode *N) const; - bool MatchSLOI(SDNode *N) const; - bool MatchSROI(SDNode *N) const; - bool MatchSROIW(SDNode *N) const; bool MatchSLLIUW(SDNode *N) const; + bool selectVLOp(SDValue N, SDValue &VL); + bool selectVSplat(SDValue N, SDValue &SplatVal); bool selectVSplatSimm5(SDValue N, SDValue &SplatVal); bool selectVSplatUimm5(SDValue N, SDValue &SplatVal); diff --git a/llvm/lib/Target/RISCV/RISCVInstrFormatsV.td b/llvm/lib/Target/RISCV/RISCVInstrFormatsV.td index 147993127e78..80f46b73bfd7 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrFormatsV.td +++ b/llvm/lib/Target/RISCV/RISCVInstrFormatsV.td @@ -38,9 +38,11 @@ class RISCVLSUMOP<bits<5> val> { bits<5> Value = val; } def LUMOPUnitStride : RISCVLSUMOP<0b00000>; +def LUMOPUnitStrideMask : RISCVLSUMOP<0b01011>; def LUMOPUnitStrideWholeReg : RISCVLSUMOP<0b01000>; def LUMOPUnitStrideFF: RISCVLSUMOP<0b10000>; def SUMOPUnitStride : RISCVLSUMOP<0b00000>; +def SUMOPUnitStrideMask : RISCVLSUMOP<0b01011>; def SUMOPUnitStrideWholeReg : RISCVLSUMOP<0b01000>; class RISCVAMOOP<bits<5> val> { @@ -63,10 +65,23 @@ def LSWidth8 : RISCVWidth<0b0000>; def LSWidth16 : RISCVWidth<0b0101>; def LSWidth32 : RISCVWidth<0b0110>; def LSWidth64 : RISCVWidth<0b0111>; -def LSWidth128 : RISCVWidth<0b1000>; -def LSWidth256 : RISCVWidth<0b1101>; -def LSWidth512 : RISCVWidth<0b1110>; -def LSWidth1024 : RISCVWidth<0b1111>; + +class RVInstSetiVLi<dag outs, dag ins, string opcodestr, string argstr> + : RVInst<outs, ins, opcodestr, argstr, [], InstFormatI> { + bits<5> uimm; + bits<5> rd; + bits<10> vtypei; + + let Inst{31} = 1; + let Inst{30} = 1; + let Inst{29-20} = vtypei{9-0}; + let Inst{19-15} = uimm; + let Inst{14-12} = 0b111; + let Inst{11-7} = rd; + let Opcode = OPC_OP_V.Value; + + let Defs = [VTYPE, VL]; +} class RVInstSetVLi<dag outs, dag ins, string opcodestr, string argstr> : RVInst<outs, ins, opcodestr, argstr, [], InstFormatI> { diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoB.td b/llvm/lib/Target/RISCV/RISCVInstrInfoB.td index 1bc288b5177c..7888ac7bac8e 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoB.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoB.td @@ -45,25 +45,6 @@ def shfl_uimm : Operand<XLenVT>, ImmLeaf<XLenVT, [{ }]; } - -// Check that it is a SLOI (Shift Left Ones Immediate). -def SLOIPat : PatFrag<(ops node:$A, node:$B), - (or (shl node:$A, node:$B), imm), [{ - return MatchSLOI(N); -}]>; - -// Check that it is a SROI (Shift Right Ones Immediate). -def SROIPat : PatFrag<(ops node:$A, node:$B), - (or (srl node:$A, node:$B), imm), [{ - return MatchSROI(N); -}]>; - -// Check that it is a SROIW (Shift Right Ones Immediate i32 on RV64). -def SROIWPat : PatFrag<(ops node:$A, node:$B), - (or (srl node:$A, node:$B), imm), [{ - return MatchSROIW(N); -}]>; - // Checks if this mask has a single 0 bit and cannot be used with ANDI. def BCLRMask : ImmLeaf<XLenVT, [{ if (Subtarget->is64Bit()) @@ -210,11 +191,6 @@ def SH2ADD : ALU_rr<0b0010000, 0b100, "sh2add">, Sched<[]>; def SH3ADD : ALU_rr<0b0010000, 0b110, "sh3add">, Sched<[]>; } // Predicates = [HasStdExtZba] -let Predicates = [HasStdExtZbp] in { -def SLO : ALU_rr<0b0010000, 0b001, "slo">, Sched<[]>; -def SRO : ALU_rr<0b0010000, 0b101, "sro">, Sched<[]>; -} // Predicates = [HasStdExtZbp] - let Predicates = [HasStdExtZbbOrZbp] in { def ROL : ALU_rr<0b0110000, 0b001, "rol">, Sched<[]>; def ROR : ALU_rr<0b0110000, 0b101, "ror">, Sched<[]>; @@ -238,11 +214,6 @@ def XPERMB : ALU_rr<0b0010100, 0b100, "xperm.b">, Sched<[]>; def XPERMH : ALU_rr<0b0010100, 0b110, "xperm.h">, Sched<[]>; } // Predicates = [HasStdExtZbp] -let Predicates = [HasStdExtZbp] in { -def SLOI : RVBShift_ri<0b00100, 0b001, OPC_OP_IMM, "sloi">, Sched<[]>; -def SROI : RVBShift_ri<0b00100, 0b101, OPC_OP_IMM, "sroi">, Sched<[]>; -} // Predicates = [HasStdExtZbp] - let Predicates = [HasStdExtZbbOrZbp] in def RORI : RVBShift_ri<0b01100, 0b101, OPC_OP_IMM, "rori">, Sched<[]>; @@ -369,11 +340,6 @@ def SH2ADDUW : ALUW_rr<0b0010000, 0b100, "sh2add.uw">, Sched<[]>; def SH3ADDUW : ALUW_rr<0b0010000, 0b110, "sh3add.uw">, Sched<[]>; } // Predicates = [HasStdExtZbb, IsRV64] -let Predicates = [HasStdExtZbp, IsRV64] in { -def SLOW : ALUW_rr<0b0010000, 0b001, "slow">, Sched<[]>; -def SROW : ALUW_rr<0b0010000, 0b101, "srow">, Sched<[]>; -} // Predicates = [HasStdExtZbp, IsRV64] - let Predicates = [HasStdExtZbbOrZbp, IsRV64] in { def ROLW : ALUW_rr<0b0110000, 0b001, "rolw">, Sched<[]>; def RORW : ALUW_rr<0b0110000, 0b101, "rorw">, Sched<[]>; @@ -395,11 +361,6 @@ let Predicates = [HasStdExtZbp, IsRV64] in { def XPERMW : ALU_rr<0b0010100, 0b000, "xperm.w">, Sched<[]>; } // Predicates = [HasStdExtZbp, IsRV64] -let Predicates = [HasStdExtZbp, IsRV64] in { -def SLOIW : RVBShiftW_ri<0b0010000, 0b001, OPC_OP_IMM_32, "sloiw">, Sched<[]>; -def SROIW : RVBShiftW_ri<0b0010000, 0b101, OPC_OP_IMM_32, "sroiw">, Sched<[]>; -} // Predicates = [HasStdExtZbp, IsRV64] - let Predicates = [HasStdExtZbbOrZbp, IsRV64] in def RORIW : RVBShiftW_ri<0b0110000, 0b101, OPC_OP_IMM_32, "roriw">, Sched<[]>; @@ -673,13 +634,6 @@ def : Pat<(or GPR:$rs1, (not GPR:$rs2)), (ORN GPR:$rs1, GPR:$rs2)>; def : Pat<(xor GPR:$rs1, (not GPR:$rs2)), (XNOR GPR:$rs1, GPR:$rs2)>; } // Predicates = [HasStdExtZbbOrZbp] -let Predicates = [HasStdExtZbp] in { -def : Pat<(not (shiftop<shl> (not GPR:$rs1), GPR:$rs2)), - (SLO GPR:$rs1, GPR:$rs2)>; -def : Pat<(not (shiftop<srl> (not GPR:$rs1), GPR:$rs2)), - (SRO GPR:$rs1, GPR:$rs2)>; -} // Predicates = [HasStdExtZbp] - let Predicates = [HasStdExtZbbOrZbp] in { def : Pat<(rotl GPR:$rs1, GPR:$rs2), (ROL GPR:$rs1, GPR:$rs2)>; def : Pat<(rotr GPR:$rs1, GPR:$rs2), (ROR GPR:$rs1, GPR:$rs2)>; @@ -710,13 +664,6 @@ def : Pat<(and (srl GPR:$rs1, uimmlog2xlen:$shamt), (XLenVT 1)), (BEXTI GPR:$rs1, uimmlog2xlen:$shamt)>; } -let Predicates = [HasStdExtZbp] in { -def : Pat<(SLOIPat GPR:$rs1, uimmlog2xlen:$shamt), - (SLOI GPR:$rs1, uimmlog2xlen:$shamt)>; -def : Pat<(SROIPat GPR:$rs1, uimmlog2xlen:$shamt), - (SROI GPR:$rs1, uimmlog2xlen:$shamt)>; -} // Predicates = [HasStdExtZbp] - // There's no encoding for roli in the the 'B' extension as it can be // implemented with rori by negating the immediate. let Predicates = [HasStdExtZbbOrZbp] in { @@ -936,13 +883,6 @@ def : Pat<(add (SLLIUWPat GPR:$rs1, (XLenVT 3)), GPR:$rs2), (SH3ADDUW GPR:$rs1, GPR:$rs2)>; } // Predicates = [HasStdExtZba, IsRV64] -let Predicates = [HasStdExtZbp, IsRV64] in { -def : Pat<(not (shiftopw<riscv_sllw> (not GPR:$rs1), GPR:$rs2)), - (SLOW GPR:$rs1, GPR:$rs2)>; -def : Pat<(not (shiftopw<riscv_srlw> (not GPR:$rs1), GPR:$rs2)), - (SROW GPR:$rs1, GPR:$rs2)>; -} // Predicates = [HasStdExtZbp, IsRV64] - let Predicates = [HasStdExtZbbOrZbp, IsRV64] in { def : Pat<(riscv_rolw GPR:$rs1, GPR:$rs2), (ROLW GPR:$rs1, GPR:$rs2)>; @@ -983,13 +923,6 @@ def : Pat<(xor (assertsexti32 GPR:$rs1), BSETINVWMask:$mask), } // Predicates = [HasStdExtZbs, IsRV64] let Predicates = [HasStdExtZbp, IsRV64] in { -def : Pat<(sext_inreg (SLOIPat GPR:$rs1, uimm5:$shamt), i32), - (SLOIW GPR:$rs1, uimm5:$shamt)>; -def : Pat<(SROIWPat GPR:$rs1, uimm5:$shamt), - (SROIW GPR:$rs1, uimm5:$shamt)>; -} // Predicates = [HasStdExtZbp, IsRV64] - -let Predicates = [HasStdExtZbp, IsRV64] in { def : Pat<(riscv_rorw (riscv_greviw GPR:$rs1, 24), (i64 16)), (GREVIW GPR:$rs1, 8)>; def : Pat<(riscv_rolw (riscv_greviw GPR:$rs1, 24), (i64 16)), (GREVIW GPR:$rs1, 8)>; def : Pat<(riscv_greviw GPR:$rs1, timm:$shamt), (GREVIW GPR:$rs1, timm:$shamt)>; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoV.td b/llvm/lib/Target/RISCV/RISCVInstrInfoV.td index 4f9e9cfbdb98..86fbc73d81d5 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoV.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoV.td @@ -7,7 +7,7 @@ //===----------------------------------------------------------------------===// /// /// This file describes the RISC-V instructions from the standard 'V' Vector -/// extension, version 0.9. +/// extension, version 0.10. /// This version is still experimental as the 'V' extension hasn't been /// ratified yet. /// @@ -82,6 +82,12 @@ def simm5_plus1 : Operand<XLenVT>, ImmLeaf<XLenVT, //===----------------------------------------------------------------------===// let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in { +// load vd, (rs1) +class VUnitStrideLoadMask<string opcodestr> + : RVInstVLU<0b000, LSWidth8.Value{3}, LUMOPUnitStrideMask, LSWidth8.Value{2-0}, + (outs VR:$vd), + (ins GPR:$rs1), opcodestr, "$vd, (${rs1})">; + // load vd, (rs1), vm class VUnitStrideLoad<RISCVLSUMOP lumop, RISCVWidth width, string opcodestr> @@ -138,6 +144,12 @@ class VIndexedSegmentLoad<bits<3> nf, RISCVMOP mop, RISCVWidth width, let hasSideEffects = 0, mayLoad = 0, mayStore = 1 in { // store vd, vs3, (rs1), vm +class VUnitStrideStoreMask<string opcodestr> + : RVInstVSU<0b000, LSWidth8.Value{3}, SUMOPUnitStrideMask, LSWidth8.Value{2-0}, + (outs), (ins VR:$vs3, GPR:$rs1), opcodestr, + "$vs3, (${rs1})">; + +// store vd, vs3, (rs1), vm class VUnitStrideStore<RISCVLSUMOP sumop, RISCVWidth width, string opcodestr> : RVInstVSU<0b000, width.Value{3}, sumop, width.Value{2-0}, @@ -423,10 +435,6 @@ multiclass VWholeLoad<bits<3> nf, string opcodestr> { def E16_V : VWholeLoad<nf, LSWidth16, opcodestr # "e16.v">; def E32_V : VWholeLoad<nf, LSWidth32, opcodestr # "e32.v">; def E64_V : VWholeLoad<nf, LSWidth64, opcodestr # "e64.v">; - def E128_V : VWholeLoad<nf, LSWidth128, opcodestr # "e128.v">; - def E256_V : VWholeLoad<nf, LSWidth256, opcodestr # "e256.v">; - def E512_V : VWholeLoad<nf, LSWidth512, opcodestr # "e512.v">; - def E1024_V : VWholeLoad<nf, LSWidth1024, opcodestr # "e1024.v">; } //===----------------------------------------------------------------------===// @@ -438,6 +446,9 @@ let hasSideEffects = 1, mayLoad = 0, mayStore = 0 in { def VSETVLI : RVInstSetVLi<(outs GPR:$rd), (ins GPR:$rs1, VTypeIOp:$vtypei), "vsetvli", "$rd, $rs1, $vtypei">; +def VSETIVLI : RVInstSetiVLi<(outs GPR:$rd), (ins uimm5:$uimm, VTypeIOp:$vtypei), + "vsetivli", "$rd, $uimm, $vtypei">; + def VSETVL : RVInstSetVL<(outs GPR:$rd), (ins GPR:$rs1, GPR:$rs2), "vsetvl", "$rd, $rs1, $rs2">; } // hasSideEffects = 1, mayLoad = 0, mayStore = 0 @@ -447,47 +458,30 @@ def VLE8_V : VUnitStrideLoad<LUMOPUnitStride, LSWidth8, "vle8.v">; def VLE16_V : VUnitStrideLoad<LUMOPUnitStride, LSWidth16, "vle16.v">; def VLE32_V : VUnitStrideLoad<LUMOPUnitStride, LSWidth32, "vle32.v">; def VLE64_V : VUnitStrideLoad<LUMOPUnitStride, LSWidth64, "vle64.v">; -def VLE128_V : VUnitStrideLoad<LUMOPUnitStride, LSWidth128, "vle128.v">; -def VLE256_V : VUnitStrideLoad<LUMOPUnitStride, LSWidth256, "vle256.v">; -def VLE512_V : VUnitStrideLoad<LUMOPUnitStride, LSWidth512, "vle512.v">; -def VLE1024_V : VUnitStrideLoad<LUMOPUnitStride, LSWidth1024, "vle1024.v">; def VLE8FF_V : VUnitStrideLoad<LUMOPUnitStrideFF, LSWidth8, "vle8ff.v">; def VLE16FF_V : VUnitStrideLoad<LUMOPUnitStrideFF, LSWidth16, "vle16ff.v">; def VLE32FF_V : VUnitStrideLoad<LUMOPUnitStrideFF, LSWidth32, "vle32ff.v">; def VLE64FF_V : VUnitStrideLoad<LUMOPUnitStrideFF, LSWidth64, "vle64ff.v">; -def VLE128FF_V : VUnitStrideLoad<LUMOPUnitStrideFF, LSWidth128, "vle128ff.v">; -def VLE256FF_V : VUnitStrideLoad<LUMOPUnitStrideFF, LSWidth256, "vle256ff.v">; -def VLE512FF_V : VUnitStrideLoad<LUMOPUnitStrideFF, LSWidth512, "vle512ff.v">; -def VLE1024FF_V : VUnitStrideLoad<LUMOPUnitStrideFF, LSWidth1024, "vle1024ff.v">; + +def VLE1_V : VUnitStrideLoadMask<"vle1.v">; +def VSE1_V : VUnitStrideStoreMask<"vse1.v">; def VSE8_V : VUnitStrideStore<SUMOPUnitStride, LSWidth8, "vse8.v">; def VSE16_V : VUnitStrideStore<SUMOPUnitStride, LSWidth16, "vse16.v">; def VSE32_V : VUnitStrideStore<SUMOPUnitStride, LSWidth32, "vse32.v">; def VSE64_V : VUnitStrideStore<SUMOPUnitStride, LSWidth64, "vse64.v">; -def VSE128_V : VUnitStrideStore<SUMOPUnitStride, LSWidth128, "vse128.v">; -def VSE256_V : VUnitStrideStore<SUMOPUnitStride, LSWidth256, "vse256.v">; -def VSE512_V : VUnitStrideStore<SUMOPUnitStride, LSWidth512, "vse512.v">; -def VSE1024_V : VUnitStrideStore<SUMOPUnitStride, LSWidth1024, "vse1024.v">; // Vector Strided Instructions def VLSE8_V : VStridedLoad<LSWidth8, "vlse8.v">; def VLSE16_V : VStridedLoad<LSWidth16, "vlse16.v">; def VLSE32_V : VStridedLoad<LSWidth32, "vlse32.v">; def VLSE64_V : VStridedLoad<LSWidth64, "vlse64.v">; -def VLSE128_V : VStridedLoad<LSWidth128, "vlse128.v">; -def VLSE256_V : VStridedLoad<LSWidth256, "vlse256.v">; -def VLSE512_V : VStridedLoad<LSWidth512, "vlse512.v">; -def VLSE1024_V : VStridedLoad<LSWidth1024, "vlse1024.v">; def VSSE8_V : VStridedStore<LSWidth8, "vsse8.v">; def VSSE16_V : VStridedStore<LSWidth16, "vsse16.v">; def VSSE32_V : VStridedStore<LSWidth32, "vsse32.v">; def VSSE64_V : VStridedStore<LSWidth64, "vsse64.v">; -def VSSE128_V : VStridedStore<LSWidth128, "vsse128.v">; -def VSSE256_V : VStridedStore<LSWidth256, "vsse256.v">; -def VSSE512_V : VStridedStore<LSWidth512, "vsse512.v">; -def VSSE1024_V : VStridedStore<LSWidth1024, "vsse1024.v">; // Vector Indexed Instructions def VLUXEI8_V : VIndexedLoad<MOPLDIndexedUnord, LSWidth8, "vluxei8.v">; @@ -806,8 +800,8 @@ defm VFWNMSAC_V : VALUr_FV_V_F<"vfwnmsac", 0b111111>; // Vector Floating-Point Square-Root Instruction defm VFSQRT_V : VALU_FV_VS2<"vfsqrt.v", 0b010011, 0b00000>; -defm VFRSQRTE7_V : VALU_FV_VS2<"vfrsqrte7.v", 0b010011, 0b00100>; -defm VFRECE7_V : VALU_FV_VS2<"vfrece7.v", 0b010011, 0b00101>; +defm VFRSQRT7_V : VALU_FV_VS2<"vfrsqrt7.v", 0b010011, 0b00100>; +defm VFREC7_V : VALU_FV_VS2<"vfrec7.v", 0b010011, 0b00101>; // Vector Floating-Point MIN/MAX Instructions defm VFMIN_V : VALU_FV_V_F<"vfmin", 0b000100>; @@ -1058,47 +1052,27 @@ let Predicates = [HasStdExtZvlsseg] in { def VLSEG#nf#E16_V : VUnitStrideSegmentLoad<!add(nf, -1), LUMOPUnitStride, LSWidth16, "vlseg"#nf#"e16.v">; def VLSEG#nf#E32_V : VUnitStrideSegmentLoad<!add(nf, -1), LUMOPUnitStride, LSWidth32, "vlseg"#nf#"e32.v">; def VLSEG#nf#E64_V : VUnitStrideSegmentLoad<!add(nf, -1), LUMOPUnitStride, LSWidth64, "vlseg"#nf#"e64.v">; - def VLSEG#nf#E128_V : VUnitStrideSegmentLoad<!add(nf, -1), LUMOPUnitStride, LSWidth128, "vlseg"#nf#"e128.v">; - def VLSEG#nf#E256_V : VUnitStrideSegmentLoad<!add(nf, -1), LUMOPUnitStride, LSWidth256, "vlseg"#nf#"e256.v">; - def VLSEG#nf#E512_V : VUnitStrideSegmentLoad<!add(nf, -1), LUMOPUnitStride, LSWidth512, "vlseg"#nf#"e512.v">; - def VLSEG#nf#E1024_V : VUnitStrideSegmentLoad<!add(nf, -1), LUMOPUnitStride, LSWidth1024, "vlseg"#nf#"e1024.v">; def VLSEG#nf#E8FF_V : VUnitStrideSegmentLoad<!add(nf, -1), LUMOPUnitStrideFF, LSWidth8, "vlseg"#nf#"e8ff.v">; def VLSEG#nf#E16FF_V : VUnitStrideSegmentLoad<!add(nf, -1), LUMOPUnitStrideFF, LSWidth16, "vlseg"#nf#"e16ff.v">; def VLSEG#nf#E32FF_V : VUnitStrideSegmentLoad<!add(nf, -1), LUMOPUnitStrideFF, LSWidth32, "vlseg"#nf#"e32ff.v">; def VLSEG#nf#E64FF_V : VUnitStrideSegmentLoad<!add(nf, -1), LUMOPUnitStrideFF, LSWidth64, "vlseg"#nf#"e64ff.v">; - def VLSEG#nf#E128FF_V : VUnitStrideSegmentLoad<!add(nf, -1), LUMOPUnitStrideFF, LSWidth128, "vlseg"#nf#"e128ff.v">; - def VLSEG#nf#E256FF_V : VUnitStrideSegmentLoad<!add(nf, -1), LUMOPUnitStrideFF, LSWidth256, "vlseg"#nf#"e256ff.v">; - def VLSEG#nf#E512FF_V : VUnitStrideSegmentLoad<!add(nf, -1), LUMOPUnitStrideFF, LSWidth512, "vlseg"#nf#"e512ff.v">; - def VLSEG#nf#E1024FF_V : VUnitStrideSegmentLoad<!add(nf, -1), LUMOPUnitStrideFF, LSWidth1024, "vlseg"#nf#"e1024ff.v">; def VSSEG#nf#E8_V : VUnitStrideSegmentStore<!add(nf, -1), LSWidth8, "vsseg"#nf#"e8.v">; def VSSEG#nf#E16_V : VUnitStrideSegmentStore<!add(nf, -1), LSWidth16, "vsseg"#nf#"e16.v">; def VSSEG#nf#E32_V : VUnitStrideSegmentStore<!add(nf, -1), LSWidth32, "vsseg"#nf#"e32.v">; def VSSEG#nf#E64_V : VUnitStrideSegmentStore<!add(nf, -1), LSWidth64, "vsseg"#nf#"e64.v">; - def VSSEG#nf#E128_V : VUnitStrideSegmentStore<!add(nf, -1), LSWidth128, "vsseg"#nf#"e128.v">; - def VSSEG#nf#E256_V : VUnitStrideSegmentStore<!add(nf, -1), LSWidth256, "vsseg"#nf#"e256.v">; - def VSSEG#nf#E512_V : VUnitStrideSegmentStore<!add(nf, -1), LSWidth512, "vsseg"#nf#"e512.v">; - def VSSEG#nf#E1024_V : VUnitStrideSegmentStore<!add(nf, -1), LSWidth1024, "vsseg"#nf#"e1024.v">; // Vector Strided Instructions def VLSSEG#nf#E8_V : VStridedSegmentLoad<!add(nf, -1), LSWidth8, "vlsseg"#nf#"e8.v">; def VLSSEG#nf#E16_V : VStridedSegmentLoad<!add(nf, -1), LSWidth16, "vlsseg"#nf#"e16.v">; def VLSSEG#nf#E32_V : VStridedSegmentLoad<!add(nf, -1), LSWidth32, "vlsseg"#nf#"e32.v">; def VLSSEG#nf#E64_V : VStridedSegmentLoad<!add(nf, -1), LSWidth64, "vlsseg"#nf#"e64.v">; - def VLSSEG#nf#E128_V : VStridedSegmentLoad<!add(nf, -1), LSWidth128, "vlsseg"#nf#"e128.v">; - def VLSSEG#nf#E256_V : VStridedSegmentLoad<!add(nf, -1), LSWidth256, "vlsseg"#nf#"e256.v">; - def VLSSEG#nf#E512_V : VStridedSegmentLoad<!add(nf, -1), LSWidth512, "vlsseg"#nf#"e512.v">; - def VLSSEG#nf#E1024_V : VStridedSegmentLoad<!add(nf, -1), LSWidth1024, "vlsseg"#nf#"e1024.v">; def VSSSEG#nf#E8_V : VStridedSegmentStore<!add(nf, -1), LSWidth8, "vssseg"#nf#"e8.v">; def VSSSEG#nf#E16_V : VStridedSegmentStore<!add(nf, -1), LSWidth16, "vssseg"#nf#"e16.v">; def VSSSEG#nf#E32_V : VStridedSegmentStore<!add(nf, -1), LSWidth32, "vssseg"#nf#"e32.v">; def VSSSEG#nf#E64_V : VStridedSegmentStore<!add(nf, -1), LSWidth64, "vssseg"#nf#"e64.v">; - def VSSSEG#nf#E128_V : VStridedSegmentStore<!add(nf, -1), LSWidth128, "vssseg"#nf#"e128.v">; - def VSSSEG#nf#E256_V : VStridedSegmentStore<!add(nf, -1), LSWidth256, "vssseg"#nf#"e256.v">; - def VSSSEG#nf#E512_V : VStridedSegmentStore<!add(nf, -1), LSWidth512, "vssseg"#nf#"e512.v">; - def VSSSEG#nf#E1024_V : VStridedSegmentStore<!add(nf, -1), LSWidth1024, "vssseg"#nf#"e1024.v">; // Vector Indexed Instructions def VLUXSEG#nf#EI8_V : VIndexedSegmentLoad<!add(nf, -1), MOPLDIndexedUnord, @@ -1109,14 +1083,6 @@ let Predicates = [HasStdExtZvlsseg] in { LSWidth32, "vluxseg"#nf#"ei32.v">; def VLUXSEG#nf#EI64_V : VIndexedSegmentLoad<!add(nf, -1), MOPLDIndexedUnord, LSWidth64, "vluxseg"#nf#"ei64.v">; - def VLUXSEG#nf#EI128_V : VIndexedSegmentLoad<!add(nf, -1), MOPLDIndexedUnord, - LSWidth128, "vluxseg"#nf#"ei128.v">; - def VLUXSEG#nf#EI256_V : VIndexedSegmentLoad<!add(nf, -1), MOPLDIndexedUnord, - LSWidth256, "vluxseg"#nf#"ei256.v">; - def VLUXSEG#nf#EI512_V : VIndexedSegmentLoad<!add(nf, -1), MOPLDIndexedUnord, - LSWidth512, "vluxseg"#nf#"ei512.v">; - def VLUXSEG#nf#EI1024_V : VIndexedSegmentLoad<!add(nf, -1), MOPLDIndexedUnord, - LSWidth1024, "vluxseg"#nf#"ei1024.v">; def VLOXSEG#nf#EI8_V : VIndexedSegmentLoad<!add(nf, -1), MOPLDIndexedOrder, LSWidth8, "vloxseg"#nf#"ei8.v">; @@ -1126,14 +1092,6 @@ let Predicates = [HasStdExtZvlsseg] in { LSWidth32, "vloxseg"#nf#"ei32.v">; def VLOXSEG#nf#EI64_V : VIndexedSegmentLoad<!add(nf, -1), MOPLDIndexedOrder, LSWidth64, "vloxseg"#nf#"ei64.v">; - def VLOXSEG#nf#EI128_V : VIndexedSegmentLoad<!add(nf, -1), MOPLDIndexedOrder, - LSWidth128, "vloxseg"#nf#"ei128.v">; - def VLOXSEG#nf#EI256_V : VIndexedSegmentLoad<!add(nf, -1), MOPLDIndexedOrder, - LSWidth256, "vloxseg"#nf#"ei256.v">; - def VLOXSEG#nf#EI512_V : VIndexedSegmentLoad<!add(nf, -1), MOPLDIndexedOrder, - LSWidth512, "vloxseg"#nf#"ei512.v">; - def VLOXSEG#nf#EI1024_V : VIndexedSegmentLoad<!add(nf, -1), MOPLDIndexedOrder, - LSWidth1024, "vloxseg"#nf#"ei1024.v">; def VSUXSEG#nf#EI8_V : VIndexedSegmentStore<!add(nf, -1), MOPSTIndexedUnord, LSWidth8, "vsuxseg"#nf#"ei8.v">; @@ -1143,14 +1101,6 @@ let Predicates = [HasStdExtZvlsseg] in { LSWidth32, "vsuxseg"#nf#"ei32.v">; def VSUXSEG#nf#EI64_V : VIndexedSegmentStore<!add(nf, -1), MOPSTIndexedUnord, LSWidth64, "vsuxseg"#nf#"ei64.v">; - def VSUXSEG#nf#EI128_V : VIndexedSegmentStore<!add(nf, -1), MOPSTIndexedUnord, - LSWidth128, "vsuxseg"#nf#"ei128.v">; - def VSUXSEG#nf#EI256_V : VIndexedSegmentStore<!add(nf, -1), MOPSTIndexedUnord, - LSWidth256, "vsuxseg"#nf#"ei256.v">; - def VSUXSEG#nf#EI512_V : VIndexedSegmentStore<!add(nf, -1), MOPSTIndexedUnord, - LSWidth512, "vsuxseg"#nf#"ei512.v">; - def VSUXSEG#nf#EI1024_V : VIndexedSegmentStore<!add(nf, -1), MOPSTIndexedUnord, - LSWidth1024, "vsuxseg"#nf#"ei1024.v">; def VSOXSEG#nf#EI8_V : VIndexedSegmentStore<!add(nf, -1), MOPSTIndexedOrder, LSWidth8, "vsoxseg"#nf#"ei8.v">; @@ -1160,14 +1110,6 @@ let Predicates = [HasStdExtZvlsseg] in { LSWidth32, "vsoxseg"#nf#"ei32.v">; def VSOXSEG#nf#EI64_V : VIndexedSegmentStore<!add(nf, -1), MOPSTIndexedOrder, LSWidth64, "vsoxseg"#nf#"ei64.v">; - def VSOXSEG#nf#EI128_V : VIndexedSegmentStore<!add(nf, -1), MOPSTIndexedOrder, - LSWidth128, "vsoxseg"#nf#"ei128.v">; - def VSOXSEG#nf#EI256_V : VIndexedSegmentStore<!add(nf, -1), MOPSTIndexedOrder, - LSWidth256, "vsoxseg"#nf#"ei256.v">; - def VSOXSEG#nf#EI512_V : VIndexedSegmentStore<!add(nf, -1), MOPSTIndexedOrder, - LSWidth512, "vsoxseg"#nf#"ei512.v">; - def VSOXSEG#nf#EI1024_V : VIndexedSegmentStore<!add(nf, -1), MOPSTIndexedOrder, - LSWidth1024, "vsoxseg"#nf#"ei1024.v">; } } // Predicates = [HasStdExtZvlsseg] diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td index 06e4d053d5d7..60bd1b24cab8 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td @@ -7,7 +7,7 @@ //===----------------------------------------------------------------------===// /// /// This file contains the required infrastructure to support code generation -/// for the standard 'V' (Vector) extension, version 0.9. This version is still +/// for the standard 'V' (Vector) extension, version 0.10. This version is still /// experimental as the 'V' extension hasn't been ratified yet. /// /// This file is included from RISCVInstrInfoV.td @@ -42,17 +42,7 @@ def riscv_read_vl : SDNode<"RISCVISD::READ_VL", //-------------------------------------------------------------- // !X0 | X0 | VLMAX | Set vl to VLMAX // X0 | X0 | Value in vl | Keep current vl, just change vtype. -def NoX0 : SDNodeXForm<undef, -[{ - auto *C = dyn_cast<ConstantSDNode>(N); - if (C && C->isNullValue()) { - SDLoc DL(N); - return SDValue(CurDAG->getMachineNode(RISCV::ADDI, DL, Subtarget->getXLenVT(), - CurDAG->getRegister(RISCV::X0, Subtarget->getXLenVT()), - CurDAG->getTargetConstant(0, DL, Subtarget->getXLenVT())), 0); - } - return SDValue(N, 0); -}]>; +def VLOp : ComplexPattern<XLenVT, 1, "selectVLOp">; def DecImm : SDNodeXForm<imm, [{ return CurDAG->getTargetConstant(N->getSExtValue() - 1, SDLoc(N), @@ -1228,6 +1218,14 @@ multiclass VPseudoUSLoad { } } +multiclass VPseudoLoadMask { + foreach mti = AllMasks in { + let VLMul = mti.LMul.value in { + def "_V_" # mti.BX : VPseudoUSLoadNoMask<VR>; + } + } +} + multiclass VPseudoSLoad { foreach lmul = MxList.m in { defvar LInfo = lmul.MX; @@ -1264,6 +1262,14 @@ multiclass VPseudoUSStore { } } +multiclass VPseudoStoreMask { + foreach mti = AllMasks in { + let VLMul = mti.LMul.value in { + def "_V_" # mti.BX : VPseudoUSStoreNoMask<VR>; + } + } +} + multiclass VPseudoSStore { foreach lmul = MxList.m in { defvar LInfo = lmul.MX; @@ -1951,10 +1957,10 @@ class VPatUnaryNoMask<string intrinsic_name, VReg op2_reg_class> : Pat<(result_type (!cast<Intrinsic>(intrinsic_name) (op2_type op2_reg_class:$rs2), - (XLenVT GPR:$vl))), + (XLenVT (VLOp GPR:$vl)))), (!cast<Instruction>(inst#"_"#kind#"_"#vlmul.MX) (op2_type op2_reg_class:$rs2), - (NoX0 GPR:$vl), sew)>; + GPR:$vl, sew)>; class VPatUnaryMask<string intrinsic_name, string inst, @@ -1970,21 +1976,21 @@ class VPatUnaryMask<string intrinsic_name, (result_type result_reg_class:$merge), (op2_type op2_reg_class:$rs2), (mask_type V0), - (XLenVT GPR:$vl))), + (XLenVT (VLOp GPR:$vl)))), (!cast<Instruction>(inst#"_"#kind#"_"#vlmul.MX#"_MASK") (result_type result_reg_class:$merge), (op2_type op2_reg_class:$rs2), - (mask_type V0), (NoX0 GPR:$vl), sew)>; + (mask_type V0), GPR:$vl, sew)>; class VPatMaskUnaryNoMask<string intrinsic_name, string inst, MTypeInfo mti> : Pat<(mti.Mask (!cast<Intrinsic>(intrinsic_name) (mti.Mask VR:$rs2), - (XLenVT GPR:$vl))), + (XLenVT (VLOp GPR:$vl)))), (!cast<Instruction>(inst#"_M_"#mti.BX) (mti.Mask VR:$rs2), - (NoX0 GPR:$vl), mti.SEW)>; + GPR:$vl, mti.SEW)>; class VPatMaskUnaryMask<string intrinsic_name, string inst, @@ -1993,11 +1999,11 @@ class VPatMaskUnaryMask<string intrinsic_name, (mti.Mask VR:$merge), (mti.Mask VR:$rs2), (mti.Mask V0), - (XLenVT GPR:$vl))), + (XLenVT (VLOp GPR:$vl)))), (!cast<Instruction>(inst#"_M_"#mti.BX#"_MASK") (mti.Mask VR:$merge), (mti.Mask VR:$rs2), - (mti.Mask V0), (NoX0 GPR:$vl), mti.SEW)>; + (mti.Mask V0), GPR:$vl, mti.SEW)>; class VPatUnaryAnyMask<string intrinsic, string inst, @@ -2013,12 +2019,12 @@ class VPatUnaryAnyMask<string intrinsic, (result_type result_reg_class:$merge), (op1_type op1_reg_class:$rs1), (mask_type VR:$rs2), - (XLenVT GPR:$vl))), + (XLenVT (VLOp GPR:$vl)))), (!cast<Instruction>(inst#"_"#kind#"_"#vlmul.MX) (result_type result_reg_class:$merge), (op1_type op1_reg_class:$rs1), (mask_type VR:$rs2), - (NoX0 GPR:$vl), sew)>; + GPR:$vl, sew)>; class VPatBinaryNoMask<string intrinsic_name, string inst, @@ -2031,11 +2037,11 @@ class VPatBinaryNoMask<string intrinsic_name, Pat<(result_type (!cast<Intrinsic>(intrinsic_name) (op1_type op1_reg_class:$rs1), (op2_type op2_kind:$rs2), - (XLenVT GPR:$vl))), + (XLenVT (VLOp GPR:$vl)))), (!cast<Instruction>(inst) (op1_type op1_reg_class:$rs1), (op2_type op2_kind:$rs2), - (NoX0 GPR:$vl), sew)>; + GPR:$vl, sew)>; class VPatBinaryMask<string intrinsic_name, string inst, @@ -2052,12 +2058,12 @@ class VPatBinaryMask<string intrinsic_name, (op1_type op1_reg_class:$rs1), (op2_type op2_kind:$rs2), (mask_type V0), - (XLenVT GPR:$vl))), + (XLenVT (VLOp GPR:$vl)))), (!cast<Instruction>(inst#"_MASK") (result_type result_reg_class:$merge), (op1_type op1_reg_class:$rs1), (op2_type op2_kind:$rs2), - (mask_type V0), (NoX0 GPR:$vl), sew)>; + (mask_type V0), GPR:$vl, sew)>; class VPatTernaryNoMask<string intrinsic, string inst, @@ -2075,12 +2081,12 @@ class VPatTernaryNoMask<string intrinsic, (result_type result_reg_class:$rs3), (op1_type op1_reg_class:$rs1), (op2_type op2_kind:$rs2), - (XLenVT GPR:$vl))), + (XLenVT (VLOp GPR:$vl)))), (!cast<Instruction>(inst#"_"#kind#"_"#vlmul.MX) result_reg_class:$rs3, (op1_type op1_reg_class:$rs1), op2_kind:$rs2, - (NoX0 GPR:$vl), sew)>; + GPR:$vl, sew)>; class VPatTernaryMask<string intrinsic, string inst, @@ -2099,13 +2105,13 @@ class VPatTernaryMask<string intrinsic, (op1_type op1_reg_class:$rs1), (op2_type op2_kind:$rs2), (mask_type V0), - (XLenVT GPR:$vl))), + (XLenVT (VLOp GPR:$vl)))), (!cast<Instruction>(inst#"_"#kind#"_"#vlmul.MX # "_MASK") result_reg_class:$rs3, (op1_type op1_reg_class:$rs1), op2_kind:$rs2, (mask_type V0), - (NoX0 GPR:$vl), sew)>; + GPR:$vl, sew)>; class VPatAMOWDNoMask<string intrinsic_name, string inst, @@ -2119,10 +2125,10 @@ class VPatAMOWDNoMask<string intrinsic_name, GPR:$rs1, (op1_type op1_reg_class:$vs2), (result_type vlmul.vrclass:$vd), - (XLenVT GPR:$vl))), + (XLenVT (VLOp GPR:$vl)))), (!cast<Instruction>(inst # "_WD_" # vlmul.MX # "_" # emul.MX) $rs1, $vs2, $vd, - (NoX0 GPR:$vl), sew)>; + GPR:$vl, sew)>; class VPatAMOWDMask<string intrinsic_name, string inst, @@ -2138,10 +2144,10 @@ class VPatAMOWDMask<string intrinsic_name, (op1_type op1_reg_class:$vs2), (result_type vlmul.vrclass:$vd), (mask_type V0), - (XLenVT GPR:$vl))), + (XLenVT (VLOp GPR:$vl)))), (!cast<Instruction>(inst # "_WD_" # vlmul.MX # "_" # emul.MX # "_MASK") $rs1, $vs2, $vd, - (mask_type V0), (NoX0 GPR:$vl), sew)>; + (mask_type V0), GPR:$vl, sew)>; multiclass VPatUSLoad<string intrinsic, string inst, @@ -2153,14 +2159,14 @@ multiclass VPatUSLoad<string intrinsic, { defvar Intr = !cast<Intrinsic>(intrinsic); defvar Pseudo = !cast<Instruction>(inst#"_V_"#vlmul.MX); - def : Pat<(type (Intr GPR:$rs1, GPR:$vl)), - (Pseudo $rs1, (NoX0 GPR:$vl), sew)>; + def : Pat<(type (Intr GPR:$rs1, (XLenVT (VLOp GPR:$vl)))), + (Pseudo $rs1, GPR:$vl, sew)>; defvar IntrMask = !cast<Intrinsic>(intrinsic # "_mask"); defvar PseudoMask = !cast<Instruction>(inst#"_V_"#vlmul.MX#"_MASK"); def : Pat<(type (IntrMask (type GetVRegNoV0<reg_class>.R:$merge), - GPR:$rs1, (mask_type V0), GPR:$vl)), + GPR:$rs1, (mask_type V0), (XLenVT (VLOp GPR:$vl)))), (PseudoMask $merge, - $rs1, (mask_type V0), (NoX0 GPR:$vl), sew)>; + $rs1, (mask_type V0), GPR:$vl, sew)>; } multiclass VPatUSLoadFF<string inst, @@ -2171,13 +2177,13 @@ multiclass VPatUSLoadFF<string inst, VReg reg_class> { defvar Pseudo = !cast<Instruction>(inst#"_V_"#vlmul.MX); - def : Pat<(type (riscv_vleff GPR:$rs1, GPR:$vl)), - (Pseudo $rs1, (NoX0 GPR:$vl), sew)>; + def : Pat<(type (riscv_vleff GPR:$rs1, (XLenVT (VLOp GPR:$vl)))), + (Pseudo $rs1, GPR:$vl, sew)>; defvar PseudoMask = !cast<Instruction>(inst#"_V_"#vlmul.MX#"_MASK"); def : Pat<(type (riscv_vleff_mask (type GetVRegNoV0<reg_class>.R:$merge), - GPR:$rs1, (mask_type V0), GPR:$vl)), + GPR:$rs1, (mask_type V0), (XLenVT (VLOp GPR:$vl)))), (PseudoMask $merge, - $rs1, (mask_type V0), (NoX0 GPR:$vl), sew)>; + $rs1, (mask_type V0), GPR:$vl, sew)>; } multiclass VPatSLoad<string intrinsic, @@ -2190,14 +2196,14 @@ multiclass VPatSLoad<string intrinsic, { defvar Intr = !cast<Intrinsic>(intrinsic); defvar Pseudo = !cast<Instruction>(inst#"_V_"#vlmul.MX); - def : Pat<(type (Intr GPR:$rs1, GPR:$rs2, GPR:$vl)), - (Pseudo $rs1, $rs2, (NoX0 GPR:$vl), sew)>; + def : Pat<(type (Intr GPR:$rs1, GPR:$rs2, (XLenVT (VLOp GPR:$vl)))), + (Pseudo $rs1, $rs2, GPR:$vl, sew)>; defvar IntrMask = !cast<Intrinsic>(intrinsic # "_mask"); defvar PseudoMask = !cast<Instruction>(inst#"_V_"#vlmul.MX#"_MASK"); def : Pat<(type (IntrMask (type GetVRegNoV0<reg_class>.R:$merge), - GPR:$rs1, GPR:$rs2, (mask_type V0), GPR:$vl)), + GPR:$rs1, GPR:$rs2, (mask_type V0), (XLenVT (VLOp GPR:$vl)))), (PseudoMask $merge, - $rs1, $rs2, (mask_type V0), (NoX0 GPR:$vl), sew)>; + $rs1, $rs2, (mask_type V0), GPR:$vl, sew)>; } multiclass VPatILoad<string intrinsic, @@ -2213,16 +2219,16 @@ multiclass VPatILoad<string intrinsic, { defvar Intr = !cast<Intrinsic>(intrinsic); defvar Pseudo = !cast<Instruction>(inst#"_V_"#idx_vlmul.MX#"_"#vlmul.MX); - def : Pat<(type (Intr GPR:$rs1, (idx_type idx_reg_class:$rs2), GPR:$vl)), - (Pseudo $rs1, $rs2, (NoX0 GPR:$vl), sew)>; + def : Pat<(type (Intr GPR:$rs1, (idx_type idx_reg_class:$rs2), (XLenVT (VLOp GPR:$vl)))), + (Pseudo $rs1, $rs2, GPR:$vl, sew)>; defvar IntrMask = !cast<Intrinsic>(intrinsic # "_mask"); defvar PseudoMask = !cast<Instruction>(inst#"_V_"#idx_vlmul.MX#"_"#vlmul.MX#"_MASK"); def : Pat<(type (IntrMask (type GetVRegNoV0<reg_class>.R:$merge), GPR:$rs1, (idx_type idx_reg_class:$rs2), - (mask_type V0), GPR:$vl)), + (mask_type V0), (XLenVT (VLOp GPR:$vl)))), (PseudoMask $merge, - $rs1, $rs2, (mask_type V0), (NoX0 GPR:$vl), sew)>; + $rs1, $rs2, (mask_type V0), GPR:$vl, sew)>; } multiclass VPatUSStore<string intrinsic, @@ -2235,12 +2241,12 @@ multiclass VPatUSStore<string intrinsic, { defvar Intr = !cast<Intrinsic>(intrinsic); defvar Pseudo = !cast<Instruction>(inst#"_V_"#vlmul.MX); - def : Pat<(Intr (type reg_class:$rs3), GPR:$rs1, GPR:$vl), - (Pseudo $rs3, $rs1, (NoX0 GPR:$vl), sew)>; + def : Pat<(Intr (type reg_class:$rs3), GPR:$rs1, (XLenVT (VLOp GPR:$vl))), + (Pseudo $rs3, $rs1, GPR:$vl, sew)>; defvar IntrMask = !cast<Intrinsic>(intrinsic # "_mask"); defvar PseudoMask = !cast<Instruction>(inst#"_V_"#vlmul.MX#"_MASK"); - def : Pat<(IntrMask (type reg_class:$rs3), GPR:$rs1, (mask_type V0), GPR:$vl), - (PseudoMask $rs3, $rs1, (mask_type V0), (NoX0 GPR:$vl), sew)>; + def : Pat<(IntrMask (type reg_class:$rs3), GPR:$rs1, (mask_type V0), (XLenVT (VLOp GPR:$vl))), + (PseudoMask $rs3, $rs1, (mask_type V0), GPR:$vl, sew)>; } multiclass VPatSStore<string intrinsic, @@ -2253,12 +2259,12 @@ multiclass VPatSStore<string intrinsic, { defvar Intr = !cast<Intrinsic>(intrinsic); defvar Pseudo = !cast<Instruction>(inst#"_V_"#vlmul.MX); - def : Pat<(Intr (type reg_class:$rs3), GPR:$rs1, GPR:$rs2, GPR:$vl), - (Pseudo $rs3, $rs1, $rs2, (NoX0 GPR:$vl), sew)>; + def : Pat<(Intr (type reg_class:$rs3), GPR:$rs1, GPR:$rs2, (XLenVT (VLOp GPR:$vl))), + (Pseudo $rs3, $rs1, $rs2, GPR:$vl, sew)>; defvar IntrMask = !cast<Intrinsic>(intrinsic # "_mask"); defvar PseudoMask = !cast<Instruction>(inst#"_V_"#vlmul.MX#"_MASK"); - def : Pat<(IntrMask (type reg_class:$rs3), GPR:$rs1, GPR:$rs2, (mask_type V0), GPR:$vl), - (PseudoMask $rs3, $rs1, $rs2, (mask_type V0), (NoX0 GPR:$vl), sew)>; + def : Pat<(IntrMask (type reg_class:$rs3), GPR:$rs1, GPR:$rs2, (mask_type V0), (XLenVT (VLOp GPR:$vl))), + (PseudoMask $rs3, $rs1, $rs2, (mask_type V0), GPR:$vl, sew)>; } multiclass VPatIStore<string intrinsic, @@ -2275,13 +2281,13 @@ multiclass VPatIStore<string intrinsic, defvar Intr = !cast<Intrinsic>(intrinsic); defvar Pseudo = !cast<Instruction>(inst#"_V_"#idx_vlmul.MX#"_"#vlmul.MX); def : Pat<(Intr (type reg_class:$rs3), GPR:$rs1, - (idx_type idx_reg_class:$rs2), GPR:$vl), - (Pseudo $rs3, $rs1, $rs2, (NoX0 GPR:$vl), sew)>; + (idx_type idx_reg_class:$rs2), (XLenVT (VLOp GPR:$vl))), + (Pseudo $rs3, $rs1, $rs2, GPR:$vl, sew)>; defvar IntrMask = !cast<Intrinsic>(intrinsic # "_mask"); defvar PseudoMask = !cast<Instruction>(inst#"_V_"#idx_vlmul.MX#"_"#vlmul.MX#"_MASK"); def : Pat<(IntrMask (type reg_class:$rs3), GPR:$rs1, - (idx_type idx_reg_class:$rs2), (mask_type V0), GPR:$vl), - (PseudoMask $rs3, $rs1, $rs2, (mask_type V0), (NoX0 GPR:$vl), sew)>; + (idx_type idx_reg_class:$rs2), (mask_type V0), (XLenVT (VLOp GPR:$vl))), + (PseudoMask $rs3, $rs1, $rs2, (mask_type V0), GPR:$vl, sew)>; } multiclass VPatUnaryS_M<string intrinsic_name, @@ -2289,13 +2295,13 @@ multiclass VPatUnaryS_M<string intrinsic_name, { foreach mti = AllMasks in { def : Pat<(XLenVT (!cast<Intrinsic>(intrinsic_name) - (mti.Mask VR:$rs1), GPR:$vl)), + (mti.Mask VR:$rs1), (XLenVT (VLOp GPR:$vl)))), (!cast<Instruction>(inst#"_M_"#mti.BX) $rs1, - (NoX0 GPR:$vl), mti.SEW)>; + GPR:$vl, mti.SEW)>; def : Pat<(XLenVT (!cast<Intrinsic>(intrinsic_name # "_mask") - (mti.Mask VR:$rs1), (mti.Mask V0), GPR:$vl)), + (mti.Mask VR:$rs1), (mti.Mask V0), (XLenVT (VLOp GPR:$vl)))), (!cast<Instruction>(inst#"_M_"#mti.BX#"_MASK") $rs1, - (mti.Mask V0), (NoX0 GPR:$vl), mti.SEW)>; + (mti.Mask V0), GPR:$vl, mti.SEW)>; } } @@ -2360,24 +2366,24 @@ multiclass VPatNullaryV<string intrinsic, string instruction> { foreach vti = AllIntegerVectors in { def : Pat<(vti.Vector (!cast<Intrinsic>(intrinsic) - (XLenVT GPR:$vl))), + (XLenVT (VLOp GPR:$vl)))), (!cast<Instruction>(instruction#"_V_" # vti.LMul.MX) - (NoX0 GPR:$vl), vti.SEW)>; + GPR:$vl, vti.SEW)>; def : Pat<(vti.Vector (!cast<Intrinsic>(intrinsic # "_mask") (vti.Vector vti.RegClass:$merge), - (vti.Mask V0), (XLenVT GPR:$vl))), + (vti.Mask V0), (XLenVT (VLOp GPR:$vl)))), (!cast<Instruction>(instruction#"_V_" # vti.LMul.MX # "_MASK") vti.RegClass:$merge, (vti.Mask V0), - (NoX0 GPR:$vl), vti.SEW)>; + GPR:$vl, vti.SEW)>; } } multiclass VPatNullaryM<string intrinsic, string inst> { foreach mti = AllMasks in def : Pat<(mti.Mask (!cast<Intrinsic>(intrinsic) - (XLenVT GPR:$vl))), + (XLenVT (VLOp GPR:$vl)))), (!cast<Instruction>(inst#"_M_"#mti.BX) - (NoX0 GPR:$vl), mti.SEW)>; + GPR:$vl, mti.SEW)>; } multiclass VPatBinary<string intrinsic, @@ -2414,11 +2420,11 @@ multiclass VPatBinaryCarryIn<string intrinsic, (op1_type op1_reg_class:$rs1), (op2_type op2_kind:$rs2), (mask_type V0), - (XLenVT GPR:$vl))), + (XLenVT (VLOp GPR:$vl)))), (!cast<Instruction>(inst#"_"#kind#"_"#vlmul.MX) (op1_type op1_reg_class:$rs1), (op2_type op2_kind:$rs2), - (mask_type V0), (NoX0 GPR:$vl), sew)>; + (mask_type V0), GPR:$vl, sew)>; } multiclass VPatBinaryMaskOut<string intrinsic, @@ -2435,11 +2441,11 @@ multiclass VPatBinaryMaskOut<string intrinsic, def : Pat<(result_type (!cast<Intrinsic>(intrinsic) (op1_type op1_reg_class:$rs1), (op2_type op2_kind:$rs2), - (XLenVT GPR:$vl))), + (XLenVT (VLOp GPR:$vl)))), (!cast<Instruction>(inst#"_"#kind#"_"#vlmul.MX) (op1_type op1_reg_class:$rs1), (op2_type op2_kind:$rs2), - (NoX0 GPR:$vl), sew)>; + GPR:$vl, sew)>; } multiclass VPatConversion<string intrinsic, @@ -3125,7 +3131,7 @@ def PseudoReadVL : Pseudo<(outs GPR:$rd), (ins), // Pseudos. let hasSideEffects = 1, mayLoad = 0, mayStore = 0, Defs = [VL, VTYPE] in { def PseudoVSETVLI : Pseudo<(outs GPR:$rd), (ins GPR:$rs1, VTypeIOp:$vtypei), []>; - +def PseudoVSETIVLI : Pseudo<(outs GPR:$rd), (ins uimm5:$rs1, VTypeIOp:$vtypei), []>; } //===----------------------------------------------------------------------===// @@ -3142,6 +3148,9 @@ foreach eew = EEWList in { defm PseudoVSE # eew : VPseudoUSStore; } +defm PseudoVLE1 : VPseudoLoadMask; +defm PseudoVSE1 : VPseudoStoreMask; + //===----------------------------------------------------------------------===// // 7.5 Vector Strided Instructions //===----------------------------------------------------------------------===// @@ -3437,12 +3446,12 @@ defm PseudoVFSQRT : VPseudoUnaryV_V; //===----------------------------------------------------------------------===// // 14.9. Vector Floating-Point Reciprocal Square-Root Estimate Instruction //===----------------------------------------------------------------------===// -defm PseudoVFRSQRTE7 : VPseudoUnaryV_V; +defm PseudoVFRSQRT7 : VPseudoUnaryV_V; //===----------------------------------------------------------------------===// // 14.10. Vector Floating-Point Reciprocal Estimate Instruction //===----------------------------------------------------------------------===// -defm PseudoVFRECE7 : VPseudoUnaryV_V; +defm PseudoVFREC7 : VPseudoUnaryV_V; //===----------------------------------------------------------------------===// // 14.11. Vector Floating-Point Min/Max Instructions @@ -3719,6 +3728,15 @@ foreach vti = AllVectors in vti.Vector, vti.Mask, vti.SEW, vti.LMul, vti.RegClass>; } +foreach vti = AllMasks in { + defvar PseudoVLE1 = !cast<Instruction>("PseudoVLE1_V_"#vti.BX); + def : Pat<(vti.Mask (int_riscv_vle1 GPR:$rs1, (XLenVT (VLOp GPR:$vl)))), + (PseudoVLE1 $rs1, GPR:$vl, vti.SEW)>; + defvar PseudoVSE1 = !cast<Instruction>("PseudoVSE1_V_"#vti.BX); + def : Pat<(int_riscv_vse1 (vti.Mask VR:$rs3), GPR:$rs1, (XLenVT (VLOp GPR:$vl))), + (PseudoVSE1 $rs3, $rs1, GPR:$vl, vti.SEW)>; +} + //===----------------------------------------------------------------------===// // 7.5 Vector Strided Instructions //===----------------------------------------------------------------------===// @@ -3886,62 +3904,63 @@ defm "" : VPatBinaryM_VX_VI<"int_riscv_vmsgt", "PseudoVMSGT", AllIntegerVectors> // instruction. foreach vti = AllIntegerVectors in { def : Pat<(vti.Mask (int_riscv_vmslt (vti.Vector vti.RegClass:$rs1), - (vti.Scalar simm5_plus1:$rs2), GPR:$vl)), + (vti.Scalar simm5_plus1:$rs2), (XLenVT (VLOp GPR:$vl)))), (!cast<Instruction>("PseudoVMSLE_VI_"#vti.LMul.MX) vti.RegClass:$rs1, (DecImm simm5_plus1:$rs2), - (NoX0 GPR:$vl), + GPR:$vl, vti.SEW)>; def : Pat<(vti.Mask (int_riscv_vmslt_mask (vti.Mask V0), (vti.Vector vti.RegClass:$rs1), (vti.Scalar simm5_plus1:$rs2), (vti.Mask VR:$merge), - GPR:$vl)), + (XLenVT (VLOp GPR:$vl)))), (!cast<Instruction>("PseudoVMSLE_VI_"#vti.LMul.MX#"_MASK") VR:$merge, vti.RegClass:$rs1, (DecImm simm5_plus1:$rs2), (vti.Mask V0), - (NoX0 GPR:$vl), + GPR:$vl, vti.SEW)>; - def : Pat<(vti.Mask (int_riscv_vmsltu (vti.Vector vti.RegClass:$rs1), - (vti.Scalar simm5_plus1:$rs2), GPR:$vl)), + def : Pat<(vti.Mask (int_riscv_vmsltu (vti.Vector vti.RegClass:$rs1), + (vti.Scalar simm5_plus1:$rs2), + (XLenVT (VLOp GPR:$vl)))), (!cast<Instruction>("PseudoVMSLEU_VI_"#vti.LMul.MX) vti.RegClass:$rs1, (DecImm simm5_plus1:$rs2), - (NoX0 GPR:$vl), + GPR:$vl, vti.SEW)>; def : Pat<(vti.Mask (int_riscv_vmsltu_mask (vti.Mask V0), (vti.Vector vti.RegClass:$rs1), (vti.Scalar simm5_plus1:$rs2), (vti.Mask VR:$merge), - GPR:$vl)), + (XLenVT (VLOp GPR:$vl)))), (!cast<Instruction>("PseudoVMSLEU_VI_"#vti.LMul.MX#"_MASK") VR:$merge, vti.RegClass:$rs1, (DecImm simm5_plus1:$rs2), (vti.Mask V0), - (NoX0 GPR:$vl), + GPR:$vl, vti.SEW)>; // Special cases to avoid matching vmsltu.vi 0 (always false) to // vmsleu.vi -1 (always true). Instead match to vmsne.vv. def : Pat<(vti.Mask (int_riscv_vmsltu (vti.Vector vti.RegClass:$rs1), - (vti.Scalar 0), GPR:$vl)), + (vti.Scalar 0), (XLenVT (VLOp GPR:$vl)))), (!cast<Instruction>("PseudoVMSNE_VV_"#vti.LMul.MX) vti.RegClass:$rs1, vti.RegClass:$rs1, - (NoX0 GPR:$vl), + GPR:$vl, vti.SEW)>; def : Pat<(vti.Mask (int_riscv_vmsltu_mask (vti.Mask V0), (vti.Vector vti.RegClass:$rs1), (vti.Scalar 0), (vti.Mask VR:$merge), - GPR:$vl)), + (XLenVT (VLOp GPR:$vl)))), (!cast<Instruction>("PseudoVMSNE_VV_"#vti.LMul.MX#"_MASK") VR:$merge, vti.RegClass:$rs1, vti.RegClass:$rs1, (vti.Mask V0), - (NoX0 GPR:$vl), + GPR:$vl, vti.SEW)>; } @@ -4002,18 +4021,18 @@ defm "" : VPatBinaryV_VM_XM_IM<"int_riscv_vmerge", "PseudoVMERGE">; //===----------------------------------------------------------------------===// foreach vti = AllVectors in { def : Pat<(vti.Vector (int_riscv_vmv_v_v (vti.Vector vti.RegClass:$rs1), - GPR:$vl)), + (XLenVT (VLOp GPR:$vl)))), (!cast<Instruction>("PseudoVMV_V_V_"#vti.LMul.MX) - $rs1, (NoX0 GPR:$vl), vti.SEW)>; + $rs1, GPR:$vl, vti.SEW)>; } foreach vti = AllIntegerVectors in { - def : Pat<(vti.Vector (int_riscv_vmv_v_x GPR:$rs2, GPR:$vl)), + def : Pat<(vti.Vector (int_riscv_vmv_v_x GPR:$rs2, (XLenVT (VLOp GPR:$vl)))), (!cast<Instruction>("PseudoVMV_V_X_"#vti.LMul.MX) - $rs2, (NoX0 GPR:$vl), vti.SEW)>; - def : Pat<(vti.Vector (int_riscv_vmv_v_x simm5:$imm5, GPR:$vl)), + $rs2, GPR:$vl, vti.SEW)>; + def : Pat<(vti.Vector (int_riscv_vmv_v_x simm5:$imm5, (XLenVT (VLOp GPR:$vl)))), (!cast<Instruction>("PseudoVMV_V_I_"#vti.LMul.MX) - simm5:$imm5, (NoX0 GPR:$vl), vti.SEW)>; + simm5:$imm5, GPR:$vl, vti.SEW)>; } //===----------------------------------------------------------------------===// @@ -4109,12 +4128,12 @@ defm "" : VPatUnaryV_V<"int_riscv_vfsqrt", "PseudoVFSQRT", AllFloatVectors>; //===----------------------------------------------------------------------===// // 14.9. Vector Floating-Point Reciprocal Square-Root Estimate Instruction //===----------------------------------------------------------------------===// -defm "" : VPatUnaryV_V<"int_riscv_vfrsqrte7", "PseudoVFRSQRTE7", AllFloatVectors>; +defm "" : VPatUnaryV_V<"int_riscv_vfrsqrt7", "PseudoVFRSQRT7", AllFloatVectors>; //===----------------------------------------------------------------------===// // 14.10. Vector Floating-Point Reciprocal Estimate Instruction //===----------------------------------------------------------------------===// -defm "" : VPatUnaryV_V<"int_riscv_vfrece7", "PseudoVFRECE7", AllFloatVectors>; +defm "" : VPatUnaryV_V<"int_riscv_vfrec7", "PseudoVFREC7", AllFloatVectors>; //===----------------------------------------------------------------------===// // 14.11. Vector Floating-Point Min/Max Instructions @@ -4157,8 +4176,8 @@ foreach fvti = AllFloatVectors in { defvar instr = !cast<Instruction>("PseudoVMERGE_VIM_"#fvti.LMul.MX); def : Pat<(fvti.Vector (int_riscv_vfmerge (fvti.Vector fvti.RegClass:$rs2), (fvti.Scalar (fpimm0)), - (fvti.Mask V0), (XLenVT GPR:$vl))), - (instr fvti.RegClass:$rs2, 0, (fvti.Mask V0), (NoX0 GPR:$vl), fvti.SEW)>; + (fvti.Mask V0), (XLenVT (VLOp GPR:$vl)))), + (instr fvti.RegClass:$rs2, 0, (fvti.Mask V0), GPR:$vl, fvti.SEW)>; } //===----------------------------------------------------------------------===// @@ -4167,16 +4186,16 @@ foreach fvti = AllFloatVectors in { foreach fvti = AllFloatVectors in { // If we're splatting fpimm0, use vmv.v.x vd, x0. def : Pat<(fvti.Vector (int_riscv_vfmv_v_f - (fvti.Scalar (fpimm0)), GPR:$vl)), + (fvti.Scalar (fpimm0)), (XLenVT (VLOp GPR:$vl)))), (!cast<Instruction>("PseudoVMV_V_I_"#fvti.LMul.MX) - 0, (NoX0 GPR:$vl), fvti.SEW)>; + 0, GPR:$vl, fvti.SEW)>; def : Pat<(fvti.Vector (int_riscv_vfmv_v_f - (fvti.Scalar fvti.ScalarRegClass:$rs2), GPR:$vl)), + (fvti.Scalar fvti.ScalarRegClass:$rs2), (XLenVT (VLOp GPR:$vl)))), (!cast<Instruction>("PseudoVFMV_V_" # fvti.ScalarSuffix # "_" # fvti.LMul.MX) (fvti.Scalar fvti.ScalarRegClass:$rs2), - (NoX0 GPR:$vl), fvti.SEW)>; + GPR:$vl, fvti.SEW)>; } //===----------------------------------------------------------------------===// @@ -4321,9 +4340,9 @@ foreach vti = AllIntegerVectors in { def : Pat<(riscv_vmv_x_s (vti.Vector vti.RegClass:$rs2)), (!cast<Instruction>("PseudoVMV_X_S_" # vti.LMul.MX) $rs2, vti.SEW)>; def : Pat<(vti.Vector (int_riscv_vmv_s_x (vti.Vector vti.RegClass:$rs1), - GPR:$rs2, GPR:$vl)), + GPR:$rs2, (XLenVT (VLOp GPR:$vl)))), (!cast<Instruction>("PseudoVMV_S_X_" # vti.LMul.MX) - (vti.Vector $rs1), $rs2, (NoX0 GPR:$vl), vti.SEW)>; + (vti.Vector $rs1), $rs2, GPR:$vl, vti.SEW)>; } } // Predicates = [HasStdExtV] @@ -4339,12 +4358,12 @@ foreach fvti = AllFloatVectors in { (instr $rs2, fvti.SEW)>; def : Pat<(fvti.Vector (int_riscv_vfmv_s_f (fvti.Vector fvti.RegClass:$rs1), - (fvti.Scalar fvti.ScalarRegClass:$rs2), GPR:$vl)), + (fvti.Scalar fvti.ScalarRegClass:$rs2), (XLenVT (VLOp GPR:$vl)))), (!cast<Instruction>("PseudoVFMV_S_"#fvti.ScalarSuffix#"_" # fvti.LMul.MX) (fvti.Vector $rs1), (fvti.Scalar fvti.ScalarRegClass:$rs2), - (NoX0 GPR:$vl), fvti.SEW)>; + GPR:$vl, fvti.SEW)>; } } // Predicates = [HasStdExtV, HasStdExtF] diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td index aea3d0e17ccc..dee67708bed1 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td @@ -8,7 +8,7 @@ /// /// This file contains the required infrastructure and SDNode patterns to /// support code generation for the standard 'V' (Vector) extension, version -/// 0.9. This version is still experimental as the 'V' extension hasn't been +/// 0.10. This version is still experimental as the 'V' extension hasn't been /// ratified yet. /// /// This file is included from and depends upon RISCVInstrInfoVPseudos.td @@ -384,8 +384,8 @@ defm "" : VPatBinarySDNode_VV_VX<mulhs, "PseudoVMULH">; defm "" : VPatBinarySDNode_VV_VX<mulhu, "PseudoVMULHU">; // 12.11. Vector Integer Divide Instructions -defm "" : VPatBinarySDNode_VV_VX<sdiv, "PseudoVDIVU">; -defm "" : VPatBinarySDNode_VV_VX<udiv, "PseudoVDIV">; +defm "" : VPatBinarySDNode_VV_VX<udiv, "PseudoVDIVU">; +defm "" : VPatBinarySDNode_VV_VX<sdiv, "PseudoVDIV">; defm "" : VPatBinarySDNode_VV_VX<urem, "PseudoVREMU">; defm "" : VPatBinarySDNode_VV_VX<srem, "PseudoVREM">; diff --git a/llvm/lib/Target/VE/VE.h b/llvm/lib/Target/VE/VE.h index a404f7ced70a..8c1fa840f19c 100644 --- a/llvm/lib/Target/VE/VE.h +++ b/llvm/lib/Target/VE/VE.h @@ -334,7 +334,7 @@ inline static bool isMImmVal(uint64_t Val) { return true; } // (m)1 patterns - return (Val & (1UL << 63)) && isShiftedMask_64(Val); + return (Val & (UINT64_C(1) << 63)) && isShiftedMask_64(Val); } inline static bool isMImm32Val(uint32_t Val) { @@ -347,14 +347,14 @@ inline static bool isMImm32Val(uint32_t Val) { return true; } // (m)1 patterns - return (Val & (1 << 31)) && isShiftedMask_32(Val); + return (Val & (UINT32_C(1) << 31)) && isShiftedMask_32(Val); } /// val2MImm - Convert an integer immediate value to target MImm immediate. inline static uint64_t val2MImm(uint64_t Val) { if (Val == 0) return 0; // (0)1 - if (Val & (1UL << 63)) + if (Val & (UINT64_C(1) << 63)) return countLeadingOnes(Val); // (m)1 return countLeadingZeros(Val) | 0x40; // (m)0 } @@ -364,8 +364,8 @@ inline static uint64_t mimm2Val(uint64_t Val) { if (Val == 0) return 0; // (0)1 if ((Val & 0x40) == 0) - return (uint64_t)((1L << 63) >> (Val & 0x3f)); // (m)1 - return ((uint64_t)(-1L) >> (Val & 0x3f)); // (m)0 + return (uint64_t)((INT64_C(1) << 63) >> (Val & 0x3f)); // (m)1 + return ((uint64_t)INT64_C(-1) >> (Val & 0x3f)); // (m)0 } inline unsigned M0(unsigned Val) { return Val + 64; } diff --git a/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp b/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp index 05e482a6b66e..4e6d8e8e1a54 100644 --- a/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp +++ b/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp @@ -492,6 +492,7 @@ static int readPrefixes(struct InternalInstruction *insn) { insn->addressSize = (insn->hasAdSize ? 4 : 8); insn->displacementSize = 4; insn->immediateSize = 4; + insn->hasOpSize = false; } else { insn->registerSize = (insn->hasOpSize ? 2 : 4); insn->addressSize = (insn->hasAdSize ? 4 : 8); diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 0dd20235aa3c..6b816c710f98 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -36916,11 +36916,18 @@ static SDValue canonicalizeLaneShuffleWithRepeatedOps(SDValue V, Res = DAG.getNode(SrcOpc0, DL, SrcVT0, DAG.getBitcast(SrcVT0, Res)); return DAG.getBitcast(VT, Res); } + case X86ISD::VPERMILPI: + // TODO: Handle v4f64 permutes with different low/high lane masks. + if (SrcVT0 == MVT::v4f64) { + uint64_t Mask = Src0.getConstantOperandVal(1); + if ((Mask & 0x3) != ((Mask >> 2) & 0x3)) + break; + } + LLVM_FALLTHROUGH; case X86ISD::VSHLI: case X86ISD::VSRLI: case X86ISD::VSRAI: case X86ISD::PSHUFD: - case X86ISD::VPERMILPI: if (Src1.isUndef() || Src0.getOperand(1) == Src1.getOperand(1)) { SDValue LHS = DAG.getBitcast(VT, Src0.getOperand(0)); SDValue RHS = diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index 0c2b278fdd7b..19012797ae9a 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -1123,10 +1123,10 @@ defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v64i8_info, v32i8x_info, EXTRACT_get_vextract256_imm, [HasAVX512]>; // vextractps - extract 32 bits from XMM -def VEXTRACTPSZrr : AVX512AIi8<0x17, MRMDestReg, (outs GR32:$dst), +def VEXTRACTPSZrr : AVX512AIi8<0x17, MRMDestReg, (outs GR32orGR64:$dst), (ins VR128X:$src1, u8imm:$src2), "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set GR32:$dst, (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2))]>, + [(set GR32orGR64:$dst, (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2))]>, EVEX, VEX_WIG, Sched<[WriteVecExtract]>; def VEXTRACTPSZmr : AVX512AIi8<0x17, MRMDestMem, (outs), diff --git a/llvm/lib/Transforms/IPO/SampleContextTracker.cpp b/llvm/lib/Transforms/IPO/SampleContextTracker.cpp index 37fc27e91100..41d7f363e1a4 100644 --- a/llvm/lib/Transforms/IPO/SampleContextTracker.cpp +++ b/llvm/lib/Transforms/IPO/SampleContextTracker.cpp @@ -30,7 +30,7 @@ namespace llvm { ContextTrieNode *ContextTrieNode::getChildContext(const LineLocation &CallSite, StringRef CalleeName) { if (CalleeName.empty()) - return getChildContext(CallSite); + return getHottestChildContext(CallSite); uint32_t Hash = nodeHash(CalleeName, CallSite); auto It = AllChildContext.find(Hash); @@ -40,18 +40,22 @@ ContextTrieNode *ContextTrieNode::getChildContext(const LineLocation &CallSite, } ContextTrieNode * -ContextTrieNode::getChildContext(const LineLocation &CallSite) { +ContextTrieNode::getHottestChildContext(const LineLocation &CallSite) { // CSFDO-TODO: This could be slow, change AllChildContext so we can // do point look up for child node by call site alone. - // CSFDO-TODO: Return the child with max count for indirect call + // Retrieve the child node with max count for indirect call ContextTrieNode *ChildNodeRet = nullptr; + uint64_t MaxCalleeSamples = 0; for (auto &It : AllChildContext) { ContextTrieNode &ChildNode = It.second; - if (ChildNode.CallSiteLoc == CallSite) { - if (ChildNodeRet) - return nullptr; - else - ChildNodeRet = &ChildNode; + if (ChildNode.CallSiteLoc != CallSite) + continue; + FunctionSamples *Samples = ChildNode.getFunctionSamples(); + if (!Samples) + continue; + if (Samples->getTotalSamples() > MaxCalleeSamples) { + ChildNodeRet = &ChildNode; + MaxCalleeSamples = Samples->getTotalSamples(); } } @@ -179,7 +183,7 @@ SampleContextTracker::SampleContextTracker( SampleContext Context(FuncSample.first(), RawContext); LLVM_DEBUG(dbgs() << "Tracking Context for function: " << Context << "\n"); if (!Context.isBaseContext()) - FuncToCtxtProfileSet[Context.getName()].insert(FSamples); + FuncToCtxtProfileSet[Context.getNameWithoutContext()].insert(FSamples); ContextTrieNode *NewNode = getOrCreateContextPath(Context, true); assert(!NewNode->getFunctionSamples() && "New node can't have sample profile"); @@ -191,12 +195,12 @@ FunctionSamples * SampleContextTracker::getCalleeContextSamplesFor(const CallBase &Inst, StringRef CalleeName) { LLVM_DEBUG(dbgs() << "Getting callee context for instr: " << Inst << "\n"); - // CSFDO-TODO: We use CalleeName to differentiate indirect call - // We need to get sample for indirect callee too. DILocation *DIL = Inst.getDebugLoc(); if (!DIL) return nullptr; + // For indirect call, CalleeName will be empty, in which case the context + // profile for callee with largest total samples will be returned. ContextTrieNode *CalleeContext = getCalleeContextFor(DIL, CalleeName); if (CalleeContext) { FunctionSamples *FSamples = CalleeContext->getFunctionSamples(); @@ -209,6 +213,26 @@ SampleContextTracker::getCalleeContextSamplesFor(const CallBase &Inst, return nullptr; } +std::vector<const FunctionSamples *> +SampleContextTracker::getIndirectCalleeContextSamplesFor( + const DILocation *DIL) { + std::vector<const FunctionSamples *> R; + if (!DIL) + return R; + + ContextTrieNode *CallerNode = getContextFor(DIL); + LineLocation CallSite = FunctionSamples::getCallSiteIdentifier(DIL); + for (auto &It : CallerNode->getAllChildContext()) { + ContextTrieNode &ChildNode = It.second; + if (ChildNode.getCallSiteLoc() != CallSite) + continue; + if (FunctionSamples *CalleeSamples = ChildNode.getFunctionSamples()) + R.push_back(CalleeSamples); + } + + return R; +} + FunctionSamples * SampleContextTracker::getContextSamplesFor(const DILocation *DIL) { assert(DIL && "Expect non-null location"); @@ -295,11 +319,6 @@ void SampleContextTracker::promoteMergeContextSamplesTree( const Instruction &Inst, StringRef CalleeName) { LLVM_DEBUG(dbgs() << "Promoting and merging context tree for instr: \n" << Inst << "\n"); - // CSFDO-TODO: We also need to promote context profile from indirect - // calls. We won't have callee names from those from call instr. - if (CalleeName.empty()) - return; - // Get the caller context for the call instruction, we don't use callee // name from call because there can be context from indirect calls too. DILocation *DIL = Inst.getDebugLoc(); @@ -308,8 +327,23 @@ void SampleContextTracker::promoteMergeContextSamplesTree( return; // Get the context that needs to be promoted - LineLocation CallSite(FunctionSamples::getOffset(DIL), - DIL->getBaseDiscriminator()); + LineLocation CallSite = FunctionSamples::getCallSiteIdentifier(DIL); + // For indirect call, CalleeName will be empty, in which case we need to + // promote all non-inlined child context profiles. + if (CalleeName.empty()) { + for (auto &It : CallerNode->getAllChildContext()) { + ContextTrieNode *NodeToPromo = &It.second; + if (CallSite != NodeToPromo->getCallSiteLoc()) + continue; + FunctionSamples *FromSamples = NodeToPromo->getFunctionSamples(); + if (FromSamples && FromSamples->getContext().hasState(InlinedContext)) + continue; + promoteMergeContextSamplesTree(*NodeToPromo); + } + return; + } + + // Get the context for the given callee that needs to be promoted ContextTrieNode *NodeToPromo = CallerNode->getChildContext(CallSite, CalleeName); if (!NodeToPromo) @@ -329,6 +363,8 @@ ContextTrieNode &SampleContextTracker::promoteMergeContextSamplesTree( LLVM_DEBUG(dbgs() << " Found context tree root to promote: " << FromSamples->getContext() << "\n"); + assert(!FromSamples->getContext().hasState(InlinedContext) && + "Shouldn't promote inlined context profile"); StringRef ContextStrToRemove = FromSamples->getContext().getCallingContext(); return promoteMergeContextSamplesTree(NodeToPromo, RootContext, ContextStrToRemove); @@ -361,18 +397,14 @@ SampleContextTracker::getCalleeContextFor(const DILocation *DIL, StringRef CalleeName) { assert(DIL && "Expect non-null location"); - // CSSPGO-TODO: need to support indirect callee - if (CalleeName.empty()) - return nullptr; - ContextTrieNode *CallContext = getContextFor(DIL); if (!CallContext) return nullptr; + // When CalleeName is empty, the child context profile with max + // total samples will be returned. return CallContext->getChildContext( - LineLocation(FunctionSamples::getOffset(DIL), - DIL->getBaseDiscriminator()), - CalleeName); + FunctionSamples::getCallSiteIdentifier(DIL), CalleeName); } ContextTrieNode *SampleContextTracker::getContextFor(const DILocation *DIL) { @@ -386,8 +418,8 @@ ContextTrieNode *SampleContextTracker::getContextFor(const DILocation *DIL) { if (Name.empty()) Name = PrevDIL->getScope()->getSubprogram()->getName(); S.push_back( - std::make_pair(LineLocation(FunctionSamples::getOffset(DIL), - DIL->getBaseDiscriminator()), Name)); + std::make_pair(FunctionSamples::getCallSiteIdentifier(DIL), + PrevDIL->getScope()->getSubprogram()->getLinkageName())); PrevDIL = DIL; } diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp index 264ac4065e8c..b2a9127773c3 100644 --- a/llvm/lib/Transforms/IPO/SampleProfile.cpp +++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp @@ -26,6 +26,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/None.h" +#include "llvm/ADT/PriorityQueue.h" #include "llvm/ADT/SCCIterator.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" @@ -107,6 +108,16 @@ STATISTIC(NumCSNotInlined, STATISTIC(NumMismatchedProfile, "Number of functions with CFG mismatched profile"); STATISTIC(NumMatchedProfile, "Number of functions with CFG matched profile"); +STATISTIC(NumDuplicatedInlinesite, + "Number of inlined callsites with a partial distribution factor"); + +STATISTIC(NumCSInlinedHitMinLimit, + "Number of functions with FDO inline stopped due to min size limit"); +STATISTIC(NumCSInlinedHitMaxLimit, + "Number of functions with FDO inline stopped due to max size limit"); +STATISTIC( + NumCSInlinedHitGrowthLimit, + "Number of functions with FDO inline stopped due to growth size limit"); // Command line option to specify the file to read samples from. This is // mainly used for debugging. @@ -171,6 +182,38 @@ static cl::opt<bool> ProfileSizeInline( cl::desc("Inline cold call sites in profile loader if it's beneficial " "for code size.")); +static cl::opt<int> ProfileInlineGrowthLimit( + "sample-profile-inline-growth-limit", cl::Hidden, cl::init(12), + cl::desc("The size growth ratio limit for proirity-based sample profile " + "loader inlining.")); + +static cl::opt<int> ProfileInlineLimitMin( + "sample-profile-inline-limit-min", cl::Hidden, cl::init(100), + cl::desc("The lower bound of size growth limit for " + "proirity-based sample profile loader inlining.")); + +static cl::opt<int> ProfileInlineLimitMax( + "sample-profile-inline-limit-max", cl::Hidden, cl::init(10000), + cl::desc("The upper bound of size growth limit for " + "proirity-based sample profile loader inlining.")); + +static cl::opt<int> ProfileICPThreshold( + "sample-profile-icp-threshold", cl::Hidden, cl::init(5), + cl::desc( + "Relative hotness threshold for indirect " + "call promotion in proirity-based sample profile loader inlining.")); + +static cl::opt<int> SampleHotCallSiteThreshold( + "sample-profile-hot-inline-threshold", cl::Hidden, cl::init(3000), + cl::desc("Hot callsite threshold for proirity-based sample profile loader " + "inlining.")); + +static cl::opt<bool> CallsitePrioritizedInline( + "sample-profile-prioritized-inline", cl::Hidden, cl::ZeroOrMore, + cl::init(false), + cl::desc("Use call site prioritized inlining for sample profile loader." + "Currently only CSSPGO is supported.")); + static cl::opt<int> SampleColdCallSiteThreshold( "sample-profile-cold-inline-threshold", cl::Hidden, cl::init(45), cl::desc("Threshold for inlining cold callsites")); @@ -313,6 +356,38 @@ private: DenseMap<uint64_t, StringRef> &CurrentGUIDToFuncNameMap; }; +// Inline candidate used by iterative callsite prioritized inliner +struct InlineCandidate { + CallBase *CallInstr; + const FunctionSamples *CalleeSamples; + // Prorated callsite count, which will be used to guide inlining. For example, + // if a callsite is duplicated in LTO prelink, then in LTO postlink the two + // copies will get their own distribution factors and their prorated counts + // will be used to decide if they should be inlined independently. + uint64_t CallsiteCount; + // Call site distribution factor to prorate the profile samples for a + // duplicated callsite. Default value is 1.0. + float CallsiteDistribution; +}; + +// Inline candidate comparer using call site weight +struct CandidateComparer { + bool operator()(const InlineCandidate &LHS, const InlineCandidate &RHS) { + if (LHS.CallsiteCount != RHS.CallsiteCount) + return LHS.CallsiteCount < RHS.CallsiteCount; + + // Tie breaker using GUID so we have stable/deterministic inlining order + assert(LHS.CalleeSamples && RHS.CalleeSamples && + "Expect non-null FunctionSamples"); + return LHS.CalleeSamples->getGUID(LHS.CalleeSamples->getName()) < + RHS.CalleeSamples->getGUID(RHS.CalleeSamples->getName()); + } +}; + +using CandidateQueue = + PriorityQueue<InlineCandidate, std::vector<InlineCandidate>, + CandidateComparer>; + /// Sample profile pass. /// /// This pass reads profile data from the file specified by @@ -350,9 +425,21 @@ protected: findIndirectCallFunctionSamples(const Instruction &I, uint64_t &Sum) const; mutable DenseMap<const DILocation *, const FunctionSamples *> DILocation2SampleMap; const FunctionSamples *findFunctionSamples(const Instruction &I) const; - bool inlineCallInstruction(CallBase &CB); + // Attempt to promote indirect call and also inline the promoted call + bool tryPromoteAndInlineCandidate( + Function &F, InlineCandidate &Candidate, uint64_t SumOrigin, + uint64_t &Sum, DenseSet<Instruction *> &PromotedInsns, + SmallVector<CallBase *, 8> *InlinedCallSites = nullptr); bool inlineHotFunctions(Function &F, DenseSet<GlobalValue::GUID> &InlinedGUIDs); + InlineCost shouldInlineCandidate(InlineCandidate &Candidate); + bool getInlineCandidate(InlineCandidate *NewCandidate, CallBase *CB); + bool + tryInlineCandidate(InlineCandidate &Candidate, + SmallVector<CallBase *, 8> *InlinedCallSites = nullptr); + bool + inlineHotFunctionsWithPriority(Function &F, + DenseSet<GlobalValue::GUID> &InlinedGUIDs); // Inline cold/small functions in addition to hot ones bool shouldInlineColdCallee(CallBase &CallInst); void emitOptimizationRemarksForInlineCandidates( @@ -808,7 +895,7 @@ ErrorOr<uint64_t> SampleProfileLoader::getProbeWeight(const Instruction &Inst) { const ErrorOr<uint64_t> &R = FS->findSamplesAt(Probe->Id, 0); if (R) { - uint64_t Samples = R.get(); + uint64_t Samples = R.get() * Probe->Factor; bool FirstMark = CoverageTracker.markSamplesUsed(FS, Probe->Id, 0, Samples); if (FirstMark) { ORE->emit([&]() { @@ -816,13 +903,17 @@ ErrorOr<uint64_t> SampleProfileLoader::getProbeWeight(const Instruction &Inst) { Remark << "Applied " << ore::NV("NumSamples", Samples); Remark << " samples from profile (ProbeId="; Remark << ore::NV("ProbeId", Probe->Id); + Remark << ", Factor="; + Remark << ore::NV("Factor", Probe->Factor); + Remark << ", OriginalSamples="; + Remark << ore::NV("OriginalSamples", R.get()); Remark << ")"; return Remark; }); } - LLVM_DEBUG(dbgs() << " " << Probe->Id << ":" << Inst - << " - weight: " << R.get() << ")\n"); + << " - weight: " << R.get() << " - factor: " + << format("%0.2f", Probe->Factor) << ")\n"); return Samples; } return R; @@ -918,6 +1009,31 @@ SampleProfileLoader::findIndirectCallFunctionSamples( return R; } + auto FSCompare = [](const FunctionSamples *L, const FunctionSamples *R) { + assert(L && R && "Expect non-null FunctionSamples"); + if (L->getEntrySamples() != R->getEntrySamples()) + return L->getEntrySamples() > R->getEntrySamples(); + return FunctionSamples::getGUID(L->getName()) < + FunctionSamples::getGUID(R->getName()); + }; + + if (ProfileIsCS) { + auto CalleeSamples = + ContextTracker->getIndirectCalleeContextSamplesFor(DIL); + if (CalleeSamples.empty()) + return R; + + // For CSSPGO, we only use target context profile's entry count + // as that already includes both inlined callee and non-inlined ones.. + Sum = 0; + for (const auto *const FS : CalleeSamples) { + Sum += FS->getEntrySamples(); + R.push_back(FS); + } + llvm::sort(R, FSCompare); + return R; + } + const FunctionSamples *FS = findFunctionSamples(Inst); if (FS == nullptr) return R; @@ -935,12 +1051,7 @@ SampleProfileLoader::findIndirectCallFunctionSamples( Sum += NameFS.second.getEntrySamples(); R.push_back(&NameFS.second); } - llvm::sort(R, [](const FunctionSamples *L, const FunctionSamples *R) { - if (L->getEntrySamples() != R->getEntrySamples()) - return L->getEntrySamples() > R->getEntrySamples(); - return FunctionSamples::getGUID(L->getName()) < - FunctionSamples::getGUID(R->getName()); - }); + llvm::sort(R, FSCompare); } return R; } @@ -977,42 +1088,64 @@ SampleProfileLoader::findFunctionSamples(const Instruction &Inst) const { return it.first->second; } -bool SampleProfileLoader::inlineCallInstruction(CallBase &CB) { - if (ExternalInlineAdvisor) { - auto Advice = ExternalInlineAdvisor->getAdvice(CB); - if (!Advice->isInliningRecommended()) { - Advice->recordUnattemptedInlining(); - return false; +/// Attempt to promote indirect call and also inline the promoted call. +/// +/// \param F Caller function. +/// \param Candidate ICP and inline candidate. +/// \param Sum Sum of target counts for indirect call. +/// \param PromotedInsns Map to keep track of indirect call already processed. +/// \param Candidate ICP and inline candidate. +/// \param InlinedCallSite Output vector for new call sites exposed after +/// inlining. +bool SampleProfileLoader::tryPromoteAndInlineCandidate( + Function &F, InlineCandidate &Candidate, uint64_t SumOrigin, uint64_t &Sum, + DenseSet<Instruction *> &PromotedInsns, + SmallVector<CallBase *, 8> *InlinedCallSite) { + const char *Reason = "Callee function not available"; + // R->getValue() != &F is to prevent promoting a recursive call. + // If it is a recursive call, we do not inline it as it could bloat + // the code exponentially. There is way to better handle this, e.g. + // clone the caller first, and inline the cloned caller if it is + // recursive. As llvm does not inline recursive calls, we will + // simply ignore it instead of handling it explicitly. + auto R = SymbolMap.find(Candidate.CalleeSamples->getFuncName()); + if (R != SymbolMap.end() && R->getValue() && + !R->getValue()->isDeclaration() && R->getValue()->getSubprogram() && + R->getValue()->hasFnAttribute("use-sample-profile") && + R->getValue() != &F && + isLegalToPromote(*Candidate.CallInstr, R->getValue(), &Reason)) { + auto *DI = + &pgo::promoteIndirectCall(*Candidate.CallInstr, R->getValue(), + Candidate.CallsiteCount, Sum, false, ORE); + if (DI) { + Sum -= Candidate.CallsiteCount; + // Prorate the indirect callsite distribution. + // Do not update the promoted direct callsite distribution at this + // point since the original distribution combined with the callee + // profile will be used to prorate callsites from the callee if + // inlined. Once not inlined, the direct callsite distribution should + // be prorated so that the it will reflect the real callsite counts. + setProbeDistributionFactor(*Candidate.CallInstr, + Candidate.CallsiteDistribution * Sum / + SumOrigin); + PromotedInsns.insert(Candidate.CallInstr); + Candidate.CallInstr = DI; + if (isa<CallInst>(DI) || isa<InvokeInst>(DI)) { + bool Inlined = tryInlineCandidate(Candidate, InlinedCallSite); + if (!Inlined) { + // Prorate the direct callsite distribution so that it reflects real + // callsite counts. + setProbeDistributionFactor(*DI, Candidate.CallsiteDistribution * + Candidate.CallsiteCount / + SumOrigin); + } + return Inlined; + } } - // Dummy record, we don't use it for replay. - Advice->recordInlining(); - } - - Function *CalledFunction = CB.getCalledFunction(); - assert(CalledFunction); - DebugLoc DLoc = CB.getDebugLoc(); - BasicBlock *BB = CB.getParent(); - InlineParams Params = getInlineParams(); - Params.ComputeFullInlineCost = true; - // Checks if there is anything in the reachable portion of the callee at - // this callsite that makes this inlining potentially illegal. Need to - // set ComputeFullInlineCost, otherwise getInlineCost may return early - // when cost exceeds threshold without checking all IRs in the callee. - // The acutal cost does not matter because we only checks isNever() to - // see if it is legal to inline the callsite. - InlineCost Cost = - getInlineCost(CB, Params, GetTTI(*CalledFunction), GetAC, GetTLI); - if (Cost.isNever()) { - ORE->emit(OptimizationRemarkAnalysis(CSINLINE_DEBUG, "InlineFail", DLoc, BB) - << "incompatible inlining"); - return false; - } - InlineFunctionInfo IFI(nullptr, GetAC); - if (InlineFunction(CB, IFI).isSuccess()) { - // The call to InlineFunction erases I, so we can't pass it here. - emitInlinedInto(*ORE, DLoc, BB, *CalledFunction, *BB->getParent(), Cost, - true, CSINLINE_DEBUG); - return true; + } else { + LLVM_DEBUG(dbgs() << "\nFailed to promote indirect call to " + << Candidate.CalleeSamples->getFuncName() << " because " + << Reason << "\n"); } return false; } @@ -1078,10 +1211,11 @@ bool SampleProfileLoader::inlineHotFunctions( "ProfAccForSymsInList should be false when profile-sample-accurate " "is enabled"); - DenseMap<CallBase *, const FunctionSamples *> localNotInlinedCallSites; + DenseMap<CallBase *, const FunctionSamples *> LocalNotInlinedCallSites; bool Changed = false; - while (true) { - bool LocalChanged = false; + bool LocalChanged = true; + while (LocalChanged) { + LocalChanged = false; SmallVector<CallBase *, 10> CIS; for (auto &BB : F) { bool Hot = false; @@ -1095,7 +1229,7 @@ bool SampleProfileLoader::inlineHotFunctions( "GUIDToFuncNameMap has to be populated"); AllCandidates.push_back(CB); if (FS->getEntrySamples() > 0 || ProfileIsCS) - localNotInlinedCallSites.try_emplace(CB, FS); + LocalNotInlinedCallSites.try_emplace(CB, FS); if (callsiteIsHot(FS, PSI)) Hot = true; else if (shouldInlineColdCallee(*CB)) @@ -1113,6 +1247,11 @@ bool SampleProfileLoader::inlineHotFunctions( } for (CallBase *I : CIS) { Function *CalledFunction = I->getCalledFunction(); + InlineCandidate Candidate = { + I, + LocalNotInlinedCallSites.count(I) ? LocalNotInlinedCallSites[I] + : nullptr, + 0 /* dummy count */, 1.0 /* dummy distribution factor */}; // Do not inline recursive calls. if (CalledFunction == &F) continue; @@ -1121,6 +1260,7 @@ bool SampleProfileLoader::inlineHotFunctions( continue; uint64_t Sum; for (const auto *FS : findIndirectCallFunctionSamples(*I, Sum)) { + uint64_t SumOrigin = Sum; if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) { FS->findInlinedFunctions(InlinedGUIDs, F.getParent(), PSI->getOrCompHotCountThreshold()); @@ -1129,65 +1269,34 @@ bool SampleProfileLoader::inlineHotFunctions( if (!callsiteIsHot(FS, PSI)) continue; - const char *Reason = "Callee function not available"; - // R->getValue() != &F is to prevent promoting a recursive call. - // If it is a recursive call, we do not inline it as it could bloat - // the code exponentially. There is way to better handle this, e.g. - // clone the caller first, and inline the cloned caller if it is - // recursive. As llvm does not inline recursive calls, we will - // simply ignore it instead of handling it explicitly. - auto CalleeFunctionName = FS->getFuncName(); - auto R = SymbolMap.find(CalleeFunctionName); - if (R != SymbolMap.end() && R->getValue() && - !R->getValue()->isDeclaration() && - R->getValue()->getSubprogram() && - R->getValue()->hasFnAttribute("use-sample-profile") && - R->getValue() != &F && - isLegalToPromote(*I, R->getValue(), &Reason)) { - uint64_t C = FS->getEntrySamples(); - auto &DI = - pgo::promoteIndirectCall(*I, R->getValue(), C, Sum, false, ORE); - Sum -= C; - PromotedInsns.insert(I); - // If profile mismatches, we should not attempt to inline DI. - if ((isa<CallInst>(DI) || isa<InvokeInst>(DI)) && - inlineCallInstruction(cast<CallBase>(DI))) { - if (ProfileIsCS) - ContextTracker->markContextSamplesInlined(FS); - localNotInlinedCallSites.erase(I); - LocalChanged = true; - ++NumCSInlined; - } - } else { - LLVM_DEBUG(dbgs() - << "\nFailed to promote indirect call to " - << CalleeFunctionName << " because " << Reason << "\n"); + Candidate = {I, FS, FS->getEntrySamples(), 1.0}; + if (tryPromoteAndInlineCandidate(F, Candidate, SumOrigin, Sum, + PromotedInsns)) { + LocalNotInlinedCallSites.erase(I); + LocalChanged = true; } } } else if (CalledFunction && CalledFunction->getSubprogram() && !CalledFunction->isDeclaration()) { - if (inlineCallInstruction(*I)) { - if (ProfileIsCS) - ContextTracker->markContextSamplesInlined( - localNotInlinedCallSites[I]); - localNotInlinedCallSites.erase(I); + if (tryInlineCandidate(Candidate)) { + LocalNotInlinedCallSites.erase(I); LocalChanged = true; - ++NumCSInlined; } } else if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) { findCalleeFunctionSamples(*I)->findInlinedFunctions( InlinedGUIDs, F.getParent(), PSI->getOrCompHotCountThreshold()); } } - if (LocalChanged) { - Changed = true; - } else { - break; - } + Changed |= LocalChanged; } + // For CS profile, profile for not inlined context will be merged when + // base profile is being trieved + if (ProfileIsCS) + return Changed; + // Accumulate not inlined callsite information into notInlinedSamples - for (const auto &Pair : localNotInlinedCallSites) { + for (const auto &Pair : LocalNotInlinedCallSites) { CallBase *I = Pair.getFirst(); Function *Callee = I->getCalledFunction(); if (!Callee || Callee->isDeclaration()) @@ -1232,6 +1341,266 @@ bool SampleProfileLoader::inlineHotFunctions( return Changed; } +bool SampleProfileLoader::tryInlineCandidate( + InlineCandidate &Candidate, SmallVector<CallBase *, 8> *InlinedCallSites) { + + CallBase &CB = *Candidate.CallInstr; + Function *CalledFunction = CB.getCalledFunction(); + assert(CalledFunction && "Expect a callee with definition"); + DebugLoc DLoc = CB.getDebugLoc(); + BasicBlock *BB = CB.getParent(); + + InlineCost Cost = shouldInlineCandidate(Candidate); + if (Cost.isNever()) { + ORE->emit(OptimizationRemarkAnalysis(CSINLINE_DEBUG, "InlineFail", DLoc, BB) + << "incompatible inlining"); + return false; + } + + if (!Cost) + return false; + + InlineFunctionInfo IFI(nullptr, GetAC); + if (InlineFunction(CB, IFI).isSuccess()) { + // The call to InlineFunction erases I, so we can't pass it here. + emitInlinedInto(*ORE, DLoc, BB, *CalledFunction, *BB->getParent(), Cost, + true, CSINLINE_DEBUG); + + // Now populate the list of newly exposed call sites. + if (InlinedCallSites) { + InlinedCallSites->clear(); + for (auto &I : IFI.InlinedCallSites) + InlinedCallSites->push_back(I); + } + + if (ProfileIsCS) + ContextTracker->markContextSamplesInlined(Candidate.CalleeSamples); + ++NumCSInlined; + + // Prorate inlined probes for a duplicated inlining callsite which probably + // has a distribution less than 100%. Samples for an inlinee should be + // distributed among the copies of the original callsite based on each + // callsite's distribution factor for counts accuracy. Note that an inlined + // probe may come with its own distribution factor if it has been duplicated + // in the inlinee body. The two factor are multiplied to reflect the + // aggregation of duplication. + if (Candidate.CallsiteDistribution < 1) { + for (auto &I : IFI.InlinedCallSites) { + if (Optional<PseudoProbe> Probe = extractProbe(*I)) + setProbeDistributionFactor(*I, Probe->Factor * + Candidate.CallsiteDistribution); + } + NumDuplicatedInlinesite++; + } + + return true; + } + return false; +} + +bool SampleProfileLoader::getInlineCandidate(InlineCandidate *NewCandidate, + CallBase *CB) { + assert(CB && "Expect non-null call instruction"); + + if (isa<IntrinsicInst>(CB)) + return false; + + // Find the callee's profile. For indirect call, find hottest target profile. + const FunctionSamples *CalleeSamples = findCalleeFunctionSamples(*CB); + if (!CalleeSamples) + return false; + + float Factor = 1.0; + if (Optional<PseudoProbe> Probe = extractProbe(*CB)) + Factor = Probe->Factor; + + uint64_t CallsiteCount = 0; + ErrorOr<uint64_t> Weight = getBlockWeight(CB->getParent()); + if (Weight) + CallsiteCount = Weight.get(); + if (CalleeSamples) + CallsiteCount = std::max( + CallsiteCount, uint64_t(CalleeSamples->getEntrySamples() * Factor)); + + *NewCandidate = {CB, CalleeSamples, CallsiteCount, Factor}; + return true; +} + +InlineCost +SampleProfileLoader::shouldInlineCandidate(InlineCandidate &Candidate) { + std::unique_ptr<InlineAdvice> Advice = nullptr; + if (ExternalInlineAdvisor) { + Advice = ExternalInlineAdvisor->getAdvice(*Candidate.CallInstr); + if (!Advice->isInliningRecommended()) { + Advice->recordUnattemptedInlining(); + return InlineCost::getNever("not previously inlined"); + } + Advice->recordInlining(); + return InlineCost::getAlways("previously inlined"); + } + + // Adjust threshold based on call site hotness, only do this for callsite + // prioritized inliner because otherwise cost-benefit check is done earlier. + int SampleThreshold = SampleColdCallSiteThreshold; + if (CallsitePrioritizedInline) { + if (Candidate.CallsiteCount > PSI->getHotCountThreshold()) + SampleThreshold = SampleHotCallSiteThreshold; + else if (!ProfileSizeInline) + return InlineCost::getNever("cold callsite"); + } + + Function *Callee = Candidate.CallInstr->getCalledFunction(); + assert(Callee && "Expect a definition for inline candidate of direct call"); + + InlineParams Params = getInlineParams(); + Params.ComputeFullInlineCost = true; + // Checks if there is anything in the reachable portion of the callee at + // this callsite that makes this inlining potentially illegal. Need to + // set ComputeFullInlineCost, otherwise getInlineCost may return early + // when cost exceeds threshold without checking all IRs in the callee. + // The acutal cost does not matter because we only checks isNever() to + // see if it is legal to inline the callsite. + InlineCost Cost = getInlineCost(*Candidate.CallInstr, Callee, Params, + GetTTI(*Callee), GetAC, GetTLI); + + // Honor always inline and never inline from call analyzer + if (Cost.isNever() || Cost.isAlways()) + return Cost; + + // For old FDO inliner, we inline the call site as long as cost is not + // "Never". The cost-benefit check is done earlier. + if (!CallsitePrioritizedInline) { + return InlineCost::get(Cost.getCost(), INT_MAX); + } + + // Otherwise only use the cost from call analyzer, but overwite threshold with + // Sample PGO threshold. + return InlineCost::get(Cost.getCost(), SampleThreshold); +} + +bool SampleProfileLoader::inlineHotFunctionsWithPriority( + Function &F, DenseSet<GlobalValue::GUID> &InlinedGUIDs) { + DenseSet<Instruction *> PromotedInsns; + assert(ProfileIsCS && "Prioritiy based inliner only works with CSSPGO now"); + + // ProfAccForSymsInList is used in callsiteIsHot. The assertion makes sure + // Profile symbol list is ignored when profile-sample-accurate is on. + assert((!ProfAccForSymsInList || + (!ProfileSampleAccurate && + !F.hasFnAttribute("profile-sample-accurate"))) && + "ProfAccForSymsInList should be false when profile-sample-accurate " + "is enabled"); + + // Populating worklist with initial call sites from root inliner, along + // with call site weights. + CandidateQueue CQueue; + InlineCandidate NewCandidate; + for (auto &BB : F) { + for (auto &I : BB.getInstList()) { + auto *CB = dyn_cast<CallBase>(&I); + if (!CB) + continue; + if (getInlineCandidate(&NewCandidate, CB)) + CQueue.push(NewCandidate); + } + } + + // Cap the size growth from profile guided inlining. This is needed even + // though cost of each inline candidate already accounts for callee size, + // because with top-down inlining, we can grow inliner size significantly + // with large number of smaller inlinees each pass the cost check. + assert(ProfileInlineLimitMax >= ProfileInlineLimitMin && + "Max inline size limit should not be smaller than min inline size " + "limit."); + unsigned SizeLimit = F.getInstructionCount() * ProfileInlineGrowthLimit; + SizeLimit = std::min(SizeLimit, (unsigned)ProfileInlineLimitMax); + SizeLimit = std::max(SizeLimit, (unsigned)ProfileInlineLimitMin); + if (ExternalInlineAdvisor) + SizeLimit = std::numeric_limits<unsigned>::max(); + + // Perform iterative BFS call site prioritized inlining + bool Changed = false; + while (!CQueue.empty() && F.getInstructionCount() < SizeLimit) { + InlineCandidate Candidate = CQueue.top(); + CQueue.pop(); + CallBase *I = Candidate.CallInstr; + Function *CalledFunction = I->getCalledFunction(); + + if (CalledFunction == &F) + continue; + if (I->isIndirectCall()) { + if (PromotedInsns.count(I)) + continue; + uint64_t Sum; + auto CalleeSamples = findIndirectCallFunctionSamples(*I, Sum); + uint64_t SumOrigin = Sum; + Sum *= Candidate.CallsiteDistribution; + for (const auto *FS : CalleeSamples) { + // TODO: Consider disable pre-lTO ICP for MonoLTO as well + if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) { + FS->findInlinedFunctions(InlinedGUIDs, F.getParent(), + PSI->getOrCompHotCountThreshold()); + continue; + } + uint64_t EntryCountDistributed = + FS->getEntrySamples() * Candidate.CallsiteDistribution; + // In addition to regular inline cost check, we also need to make sure + // ICP isn't introducing excessive speculative checks even if individual + // target looks beneficial to promote and inline. That means we should + // only do ICP when there's a small number dominant targets. + if (EntryCountDistributed < SumOrigin / ProfileICPThreshold) + break; + // TODO: Fix CallAnalyzer to handle all indirect calls. + // For indirect call, we don't run CallAnalyzer to get InlineCost + // before actual inlining. This is because we could see two different + // types from the same definition, which makes CallAnalyzer choke as + // it's expecting matching parameter type on both caller and callee + // side. See example from PR18962 for the triggering cases (the bug was + // fixed, but we generate different types). + if (!PSI->isHotCount(EntryCountDistributed)) + break; + SmallVector<CallBase *, 8> InlinedCallSites; + // Attach function profile for promoted indirect callee, and update + // call site count for the promoted inline candidate too. + Candidate = {I, FS, EntryCountDistributed, + Candidate.CallsiteDistribution}; + if (tryPromoteAndInlineCandidate(F, Candidate, SumOrigin, Sum, + PromotedInsns, &InlinedCallSites)) { + for (auto *CB : InlinedCallSites) { + if (getInlineCandidate(&NewCandidate, CB)) + CQueue.emplace(NewCandidate); + } + Changed = true; + } + } + } else if (CalledFunction && CalledFunction->getSubprogram() && + !CalledFunction->isDeclaration()) { + SmallVector<CallBase *, 8> InlinedCallSites; + if (tryInlineCandidate(Candidate, &InlinedCallSites)) { + for (auto *CB : InlinedCallSites) { + if (getInlineCandidate(&NewCandidate, CB)) + CQueue.emplace(NewCandidate); + } + Changed = true; + } + } else if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) { + findCalleeFunctionSamples(*I)->findInlinedFunctions( + InlinedGUIDs, F.getParent(), PSI->getOrCompHotCountThreshold()); + } + } + + if (!CQueue.empty()) { + if (SizeLimit == (unsigned)ProfileInlineLimitMax) + ++NumCSInlinedHitMaxLimit; + else if (SizeLimit == (unsigned)ProfileInlineLimitMin) + ++NumCSInlinedHitMinLimit; + else + ++NumCSInlinedHitGrowthLimit; + } + + return Changed; +} + /// Find equivalence classes for the given block. /// /// This finds all the blocks that are guaranteed to execute the same @@ -1654,6 +2023,14 @@ void SampleProfileLoader::propagateWeights(Function &F) { auto T = FS->findCallTargetMapAt(CallSite); if (!T || T.get().empty()) continue; + // Prorate the callsite counts to reflect what is already done to the + // callsite, such as ICP or calliste cloning. + if (FunctionSamples::ProfileIsProbeBased) { + if (Optional<PseudoProbe> Probe = extractProbe(I)) { + if (Probe->Factor < 1) + T = SampleRecord::adjustCallTargets(T.get(), Probe->Factor); + } + } SmallVector<InstrProfValueData, 2> SortedCallTargets = GetSortedValueDataFromCallTargets(T.get()); uint64_t Sum; @@ -1833,7 +2210,10 @@ bool SampleProfileLoader::emitAnnotations(Function &F) { } DenseSet<GlobalValue::GUID> InlinedGUIDs; - Changed |= inlineHotFunctions(F, InlinedGUIDs); + if (ProfileIsCS && CallsitePrioritizedInline) + Changed |= inlineHotFunctionsWithPriority(F, InlinedGUIDs); + else + Changed |= inlineHotFunctions(F, InlinedGUIDs); // Compute basic block weights. Changed |= computeBlockWeights(F); @@ -1978,6 +2358,12 @@ bool SampleProfileLoader::doInitialization(Module &M, ProfileIsCS = true; FunctionSamples::ProfileIsCS = true; + // Enable priority-base inliner and size inline by default for CSSPGO. + if (!ProfileSizeInline.getNumOccurrences()) + ProfileSizeInline = true; + if (!CallsitePrioritizedInline.getNumOccurrences()) + CallsitePrioritizedInline = true; + // Tracker for profiles under different context ContextTracker = std::make_unique<SampleContextTracker>(Reader->getProfiles()); diff --git a/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp b/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp index 7cecd20b78d8..a885c3ee4ded 100644 --- a/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp +++ b/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp @@ -12,6 +12,7 @@ #include "llvm/Transforms/IPO/SampleProfileProbe.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/BlockFrequencyInfo.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/CFG.h" @@ -25,8 +26,10 @@ #include "llvm/IR/MDBuilder.h" #include "llvm/ProfileData/SampleProf.h" #include "llvm/Support/CRC.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Transforms/Instrumentation.h" #include "llvm/Transforms/Utils/ModuleUtils.h" +#include <unordered_set> #include <vector> using namespace llvm; @@ -35,6 +38,115 @@ using namespace llvm; STATISTIC(ArtificialDbgLine, "Number of probes that have an artificial debug line"); +static cl::opt<bool> + VerifyPseudoProbe("verify-pseudo-probe", cl::init(false), cl::Hidden, + cl::desc("Do pseudo probe verification")); + +static cl::list<std::string> VerifyPseudoProbeFuncList( + "verify-pseudo-probe-funcs", cl::Hidden, + cl::desc("The option to specify the name of the functions to verify.")); + +static cl::opt<bool> + UpdatePseudoProbe("update-pseudo-probe", cl::init(true), cl::Hidden, + cl::desc("Update pseudo probe distribution factor")); + +bool PseudoProbeVerifier::shouldVerifyFunction(const Function *F) { + // Skip function declaration. + if (F->isDeclaration()) + return false; + // Skip function that will not be emitted into object file. The prevailing + // defintion will be verified instead. + if (F->hasAvailableExternallyLinkage()) + return false; + // Do a name matching. + static std::unordered_set<std::string> VerifyFuncNames( + VerifyPseudoProbeFuncList.begin(), VerifyPseudoProbeFuncList.end()); + return VerifyFuncNames.empty() || VerifyFuncNames.count(F->getName().str()); +} + +void PseudoProbeVerifier::registerCallbacks(PassInstrumentationCallbacks &PIC) { + if (VerifyPseudoProbe) { + PIC.registerAfterPassCallback( + [this](StringRef P, Any IR, const PreservedAnalyses &) { + this->runAfterPass(P, IR); + }); + } +} + +// Callback to run after each transformation for the new pass manager. +void PseudoProbeVerifier::runAfterPass(StringRef PassID, Any IR) { + std::string Banner = + "\n*** Pseudo Probe Verification After " + PassID.str() + " ***\n"; + dbgs() << Banner; + if (any_isa<const Module *>(IR)) + runAfterPass(any_cast<const Module *>(IR)); + else if (any_isa<const Function *>(IR)) + runAfterPass(any_cast<const Function *>(IR)); + else if (any_isa<const LazyCallGraph::SCC *>(IR)) + runAfterPass(any_cast<const LazyCallGraph::SCC *>(IR)); + else if (any_isa<const Loop *>(IR)) + runAfterPass(any_cast<const Loop *>(IR)); + else + llvm_unreachable("Unknown IR unit"); +} + +void PseudoProbeVerifier::runAfterPass(const Module *M) { + for (const Function &F : *M) + runAfterPass(&F); +} + +void PseudoProbeVerifier::runAfterPass(const LazyCallGraph::SCC *C) { + for (const LazyCallGraph::Node &N : *C) + runAfterPass(&N.getFunction()); +} + +void PseudoProbeVerifier::runAfterPass(const Function *F) { + if (!shouldVerifyFunction(F)) + return; + ProbeFactorMap ProbeFactors; + for (const auto &BB : *F) + collectProbeFactors(&BB, ProbeFactors); + verifyProbeFactors(F, ProbeFactors); +} + +void PseudoProbeVerifier::runAfterPass(const Loop *L) { + const Function *F = L->getHeader()->getParent(); + runAfterPass(F); +} + +void PseudoProbeVerifier::collectProbeFactors(const BasicBlock *Block, + ProbeFactorMap &ProbeFactors) { + for (const auto &I : *Block) { + if (Optional<PseudoProbe> Probe = extractProbe(I)) + ProbeFactors[Probe->Id] += Probe->Factor; + } +} + +void PseudoProbeVerifier::verifyProbeFactors( + const Function *F, const ProbeFactorMap &ProbeFactors) { + bool BannerPrinted = false; + auto &PrevProbeFactors = FunctionProbeFactors[F->getName()]; + for (const auto &I : ProbeFactors) { + float CurProbeFactor = I.second; + if (PrevProbeFactors.count(I.first)) { + float PrevProbeFactor = PrevProbeFactors[I.first]; + if (std::abs(CurProbeFactor - PrevProbeFactor) > + DistributionFactorVariance) { + if (!BannerPrinted) { + dbgs() << "Function " << F->getName() << ":\n"; + BannerPrinted = true; + } + dbgs() << "Probe " << I.first << "\tprevious factor " + << format("%0.2f", PrevProbeFactor) << "\tcurrent factor " + << format("%0.2f", CurProbeFactor) << "\n"; + } + } + + // Update + PrevProbeFactors[I.first] = I.second; + } +} + PseudoProbeManager::PseudoProbeManager(const Module &M) { if (NamedMDNode *FuncInfo = M.getNamedMetadata(PseudoProbeDescMetadataName)) { for (const auto *Operand : FuncInfo->operands()) { @@ -201,7 +313,8 @@ void SampleProfileProber::instrumentOneFunc(Function &F, TargetMachine *TM) { Function *ProbeFn = llvm::Intrinsic::getDeclaration(M, Intrinsic::pseudoprobe); Value *Args[] = {Builder.getInt64(Guid), Builder.getInt64(Index), - Builder.getInt32(0)}; + Builder.getInt32(0), + Builder.getInt64(PseudoProbeFullDistributionFactor)}; auto *Probe = Builder.CreateCall(ProbeFn, Args); AssignDebugLoc(Probe); } @@ -219,7 +332,8 @@ void SampleProfileProber::instrumentOneFunc(Function &F, TargetMachine *TM) { // Levarge the 32-bit discriminator field of debug data to store the ID and // type of a callsite probe. This gets rid of the dependency on plumbing a // customized metadata through the codegen pipeline. - uint32_t V = PseudoProbeDwarfDiscriminator::packProbeData(Index, Type); + uint32_t V = PseudoProbeDwarfDiscriminator::packProbeData( + Index, Type, 0, PseudoProbeDwarfDiscriminator::FullDistributionFactor); if (auto DIL = Call->getDebugLoc()) { DIL = DIL->cloneWithDiscriminator(V); Call->setDebugLoc(DIL); @@ -274,3 +388,47 @@ PreservedAnalyses SampleProfileProbePass::run(Module &M, return PreservedAnalyses::none(); } + +void PseudoProbeUpdatePass::runOnFunction(Function &F, + FunctionAnalysisManager &FAM) { + BlockFrequencyInfo &BFI = FAM.getResult<BlockFrequencyAnalysis>(F); + auto BBProfileCount = [&BFI](BasicBlock *BB) { + return BFI.getBlockProfileCount(BB) + ? BFI.getBlockProfileCount(BB).getValue() + : 0; + }; + + // Collect the sum of execution weight for each probe. + ProbeFactorMap ProbeFactors; + for (auto &Block : F) { + for (auto &I : Block) { + if (Optional<PseudoProbe> Probe = extractProbe(I)) + ProbeFactors[Probe->Id] += BBProfileCount(&Block); + } + } + + // Fix up over-counted probes. + for (auto &Block : F) { + for (auto &I : Block) { + if (Optional<PseudoProbe> Probe = extractProbe(I)) { + float Sum = ProbeFactors[Probe->Id]; + if (Sum != 0) + setProbeDistributionFactor(I, BBProfileCount(&Block) / Sum); + } + } + } +} + +PreservedAnalyses PseudoProbeUpdatePass::run(Module &M, + ModuleAnalysisManager &AM) { + if (UpdatePseudoProbe) { + for (auto &F : M) { + if (F.isDeclaration()) + continue; + FunctionAnalysisManager &FAM = + AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); + runOnFunction(F, FAM); + } + } + return PreservedAnalyses::none(); +} diff --git a/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp b/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp index 18717394d384..822a786fc7c7 100644 --- a/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp +++ b/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp @@ -1114,12 +1114,16 @@ void LoopUnswitch::emitPreheaderBranchOnCondition( Loop *L = LI->getLoopFor(I->getParent()); auto *DefiningAccess = MemA->getDefiningAccess(); - // If the defining access is a MemoryPhi in the header, get the incoming - // value for the pre-header as defining access. - if (DefiningAccess->getBlock() == I->getParent()) { + // Get the first defining access before the loop. + while (L->contains(DefiningAccess->getBlock())) { + // If the defining access is a MemoryPhi, get the incoming + // value for the pre-header as defining access. if (auto *MemPhi = dyn_cast<MemoryPhi>(DefiningAccess)) { DefiningAccess = MemPhi->getIncomingValueForBlock(L->getLoopPreheader()); + } else { + DefiningAccess = + cast<MemoryDef>(DefiningAccess)->getDefiningAccess(); } } MSSAU->createMemoryAccessInBB(New, DefiningAccess, New->getParent(), diff --git a/llvm/lib/Transforms/Utils/BuildLibCalls.cpp b/llvm/lib/Transforms/Utils/BuildLibCalls.cpp index f4afa3ad4623..dba5403f272a 100644 --- a/llvm/lib/Transforms/Utils/BuildLibCalls.cpp +++ b/llvm/lib/Transforms/Utils/BuildLibCalls.cpp @@ -170,16 +170,6 @@ static bool setRetAndArgsNoUndef(Function &F) { return setRetNoUndef(F) | setArgsNoUndef(F); } -static bool setRetNonNull(Function &F) { - assert(F.getReturnType()->isPointerTy() && - "nonnull applies only to pointers"); - if (F.hasAttribute(AttributeList::ReturnIndex, Attribute::NonNull)) - return false; - F.addAttribute(AttributeList::ReturnIndex, Attribute::NonNull); - ++NumNonNull; - return true; -} - static bool setReturnedArg(Function &F, unsigned ArgNo) { if (F.hasParamAttribute(ArgNo, Attribute::Returned)) return false; @@ -1005,63 +995,6 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); return Changed; - case LibFunc_ZdlPvRKSt9nothrow_t: // delete(void*, nothrow) - case LibFunc_ZdlPvSt11align_val_tRKSt9nothrow_t: // delete(void*, align_val_t, nothrow) - case LibFunc_ZdaPvRKSt9nothrow_t: // delete[](void*, nothrow) - case LibFunc_ZdaPvSt11align_val_tRKSt9nothrow_t: // delete[](void*, align_val_t, nothrow) - Changed |= setDoesNotThrow(F); - LLVM_FALLTHROUGH; - case LibFunc_ZdlPv: // delete(void*) - case LibFunc_ZdlPvj: // delete(void*, unsigned int) - case LibFunc_ZdlPvm: // delete(void*, unsigned long) - case LibFunc_ZdaPv: // delete[](void*) - case LibFunc_ZdaPvj: // delete[](void*, unsigned int) - case LibFunc_ZdaPvm: // delete[](void*, unsigned long) - case LibFunc_ZdlPvSt11align_val_t: // delete(void*, align_val_t) - case LibFunc_ZdlPvjSt11align_val_t: // delete(void*, unsigned int, align_val_t) - case LibFunc_ZdlPvmSt11align_val_t: // delete(void*, unsigned long, align_val_t) - case LibFunc_ZdaPvSt11align_val_t: // delete[](void*, align_val_t) - case LibFunc_ZdaPvjSt11align_val_t: // delete[](void*, unsigned int, align_val_t) - case LibFunc_ZdaPvmSt11align_val_t: // delete[](void*, unsigned long, align_val_t); - Changed |= setOnlyAccessesInaccessibleMemOrArgMem(F); - Changed |= setArgsNoUndef(F); - Changed |= setWillReturn(F); - Changed |= setDoesNotCapture(F, 0); - return Changed; - case LibFunc_ZnwjRKSt9nothrow_t: // new(unsigned int, nothrow) - case LibFunc_ZnwmRKSt9nothrow_t: // new(unsigned long, nothrow) - case LibFunc_ZnajRKSt9nothrow_t: // new[](unsigned int, nothrow) - case LibFunc_ZnamRKSt9nothrow_t: // new[](unsigned long, nothrow) - case LibFunc_ZnwjSt11align_val_tRKSt9nothrow_t: // new(unsigned int, align_val_t, nothrow) - case LibFunc_ZnwmSt11align_val_tRKSt9nothrow_t: // new(unsigned long, align_val_t, nothrow) - case LibFunc_ZnajSt11align_val_tRKSt9nothrow_t: // new[](unsigned int, align_val_t, nothrow) - case LibFunc_ZnamSt11align_val_tRKSt9nothrow_t: // new[](unsigned long, align_val_t, nothrow) - // Nothrow operator new may return null pointer - Changed |= setDoesNotThrow(F); - Changed |= setOnlyAccessesInaccessibleMemory(F); - Changed |= setRetNoUndef(F); - Changed |= setRetDoesNotAlias(F); - Changed |= setWillReturn(F); - return Changed; - case LibFunc_Znwj: // new(unsigned int) - case LibFunc_Znwm: // new(unsigned long) - case LibFunc_Znaj: // new[](unsigned int) - case LibFunc_Znam: // new[](unsigned long) - case LibFunc_ZnwjSt11align_val_t: // new(unsigned int, align_val_t) - case LibFunc_ZnwmSt11align_val_t: // new(unsigned long, align_val_t) - case LibFunc_ZnajSt11align_val_t: // new[](unsigned int, align_val_t) - case LibFunc_ZnamSt11align_val_t: // new[](unsigned long, align_val_t) - case LibFunc_msvc_new_int: // new(unsigned int) - case LibFunc_msvc_new_longlong: // new(unsigned long long) - case LibFunc_msvc_new_array_int: // new[](unsigned int) - case LibFunc_msvc_new_array_longlong: // new[](unsigned long long) - Changed |= setOnlyAccessesInaccessibleMemory(F); - // Operator new always returns a nonnull noalias pointer - Changed |= setRetNoUndef(F); - Changed |= setRetNonNull(F); - Changed |= setRetDoesNotAlias(F); - Changed |= setWillReturn(F); - return Changed; // TODO: add LibFunc entries for: // case LibFunc_memset_pattern4: // case LibFunc_memset_pattern8: diff --git a/llvm/lib/Transforms/Utils/InlineFunction.cpp b/llvm/lib/Transforms/Utils/InlineFunction.cpp index 0ac8fa537f4e..3026342cc4a6 100644 --- a/llvm/lib/Transforms/Utils/InlineFunction.cpp +++ b/llvm/lib/Transforms/Utils/InlineFunction.cpp @@ -921,14 +921,20 @@ void ScopedAliasMetadataDeepCloner::remap(ValueToValueMapTy &VMap) { if (!I) continue; + // Only update scopes when we find them in the map. If they are not, it is + // because we already handled that instruction before. This is faster than + // tracking which instructions we already updated. if (MDNode *M = I->getMetadata(LLVMContext::MD_alias_scope)) - I->setMetadata(LLVMContext::MD_alias_scope, MDMap[M]); + if (MDNode *MNew = MDMap.lookup(M)) + I->setMetadata(LLVMContext::MD_alias_scope, MNew); if (MDNode *M = I->getMetadata(LLVMContext::MD_noalias)) - I->setMetadata(LLVMContext::MD_noalias, MDMap[M]); + if (MDNode *MNew = MDMap.lookup(M)) + I->setMetadata(LLVMContext::MD_noalias, MNew); if (auto *Decl = dyn_cast<NoAliasScopeDeclInst>(I)) - Decl->setScopeList(MDMap[Decl->getScopeList()]); + if (MDNode *MNew = MDMap.lookup(Decl->getScopeList())) + Decl->setScopeList(MNew); } } diff --git a/llvm/lib/Transforms/Utils/LoopPeel.cpp b/llvm/lib/Transforms/Utils/LoopPeel.cpp index cb5fee7d28e6..befacb591762 100644 --- a/llvm/lib/Transforms/Utils/LoopPeel.cpp +++ b/llvm/lib/Transforms/Utils/LoopPeel.cpp @@ -509,7 +509,7 @@ static void cloneLoopBlocks( SmallVectorImpl<std::pair<BasicBlock *, BasicBlock *>> &ExitEdges, SmallVectorImpl<BasicBlock *> &NewBlocks, LoopBlocksDFS &LoopBlocks, ValueToValueMapTy &VMap, ValueToValueMapTy &LVMap, DominatorTree *DT, - LoopInfo *LI) { + LoopInfo *LI, ArrayRef<MDNode *> LoopLocalNoAliasDeclScopes) { BasicBlock *Header = L->getHeader(); BasicBlock *Latch = L->getLoopLatch(); BasicBlock *PreHeader = L->getLoopPreheader(); @@ -545,6 +545,15 @@ static void cloneLoopBlocks( } } + { + // Identify what other metadata depends on the cloned version. After + // cloning, replace the metadata with the corrected version for both + // memory instructions and noalias intrinsics. + std::string Ext = (Twine("Peel") + Twine(IterNumber)).str(); + cloneAndAdaptNoAliasScopes(LoopLocalNoAliasDeclScopes, NewBlocks, + Header->getContext(), Ext); + } + // Recursively create the new Loop objects for nested loops, if any, // to preserve LoopInfo. for (Loop *ChildLoop : *L) { @@ -769,13 +778,19 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI, uint64_t ExitWeight = 0, FallThroughWeight = 0; initBranchWeights(Header, LatchBR, ExitWeight, FallThroughWeight); + // Identify what noalias metadata is inside the loop: if it is inside the + // loop, the associated metadata must be cloned for each iteration. + SmallVector<MDNode *, 6> LoopLocalNoAliasDeclScopes; + identifyNoAliasScopesToClone(L->getBlocks(), LoopLocalNoAliasDeclScopes); + // For each peeled-off iteration, make a copy of the loop. for (unsigned Iter = 0; Iter < PeelCount; ++Iter) { SmallVector<BasicBlock *, 8> NewBlocks; ValueToValueMapTy VMap; cloneLoopBlocks(L, Iter, InsertTop, InsertBot, ExitEdges, NewBlocks, - LoopBlocks, VMap, LVMap, DT, LI); + LoopBlocks, VMap, LVMap, DT, LI, + LoopLocalNoAliasDeclScopes); // Remap to use values from the current iteration instead of the // previous one. diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index ea0d7673edf6..47635dbdda02 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -5504,11 +5504,9 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) { return None; } - ElementCount MaxVF = computeFeasibleMaxVF(TC, UserVF); - switch (ScalarEpilogueStatus) { case CM_ScalarEpilogueAllowed: - return MaxVF; + return computeFeasibleMaxVF(TC, UserVF); case CM_ScalarEpilogueNotAllowedUsePredicate: LLVM_FALLTHROUGH; case CM_ScalarEpilogueNotNeededUsePredicate: @@ -5546,7 +5544,7 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) { LLVM_DEBUG(dbgs() << "LV: Cannot fold tail by masking: vectorize with a " "scalar epilogue instead.\n"); ScalarEpilogueStatus = CM_ScalarEpilogueAllowed; - return MaxVF; + return computeFeasibleMaxVF(TC, UserVF); } return None; } @@ -5563,6 +5561,7 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) { InterleaveInfo.invalidateGroupsRequiringScalarEpilogue(); } + ElementCount MaxVF = computeFeasibleMaxVF(TC, UserVF); assert(!MaxVF.isScalable() && "Scalable vectors do not yet support tail folding"); assert((UserVF.isNonZero() || isPowerOf2_32(MaxVF.getFixedValue())) && diff --git a/llvm/tools/llvm-profdata/llvm-profdata.cpp b/llvm/tools/llvm-profdata/llvm-profdata.cpp index 8dc43924c067..7e53c30c7579 100644 --- a/llvm/tools/llvm-profdata/llvm-profdata.cpp +++ b/llvm/tools/llvm-profdata/llvm-profdata.cpp @@ -696,7 +696,7 @@ mergeSampleProfile(const WeightedFileVector &Inputs, SymbolRemapper *Remapper, Remapper ? remapSamples(I->second, *Remapper, Result) : FunctionSamples(); FunctionSamples &Samples = Remapper ? Remapped : I->second; - StringRef FName = Samples.getName(); + StringRef FName = Samples.getNameWithContext(true); MergeResult(Result, ProfileMap[FName].merge(Samples, Input.Weight)); if (Result != sampleprof_error::success) { std::error_code EC = make_error_code(Result); diff --git a/llvm/utils/TableGen/IntrinsicEmitter.cpp b/llvm/utils/TableGen/IntrinsicEmitter.cpp index 4be0d90a45d2..978d24c8300d 100644 --- a/llvm/utils/TableGen/IntrinsicEmitter.cpp +++ b/llvm/utils/TableGen/IntrinsicEmitter.cpp @@ -638,13 +638,13 @@ void IntrinsicEmitter::EmitAttributes(const CodeGenIntrinsicTable &Ints, std::max(maxArgAttrs, unsigned(intrinsic.ArgumentAttributes.size())); unsigned &N = UniqAttributes[&intrinsic]; if (N) continue; - assert(AttrNum < 256 && "Too many unique attributes for table!"); N = ++AttrNum; + assert(N < 65536 && "Too many unique attributes for table!"); } // Emit an array of AttributeList. Most intrinsics will have at least one // entry, for the function itself (index ~1), which is usually nounwind. - OS << " static const uint8_t IntrinsicsToAttributesMap[] = {\n"; + OS << " static const uint16_t IntrinsicsToAttributesMap[] = {\n"; for (unsigned i = 0, e = Ints.size(); i != e; ++i) { const CodeGenIntrinsic &intrinsic = Ints[i]; diff --git a/openmp/runtime/src/kmp_config.h.cmake b/openmp/runtime/src/kmp_config.h.cmake index 3d682c690fc7..f6aee7197ee8 100644 --- a/openmp/runtime/src/kmp_config.h.cmake +++ b/openmp/runtime/src/kmp_config.h.cmake @@ -44,8 +44,8 @@ #define OMPT_DEBUG LIBOMP_OMPT_DEBUG #cmakedefine01 LIBOMP_OMPT_SUPPORT #define OMPT_SUPPORT LIBOMP_OMPT_SUPPORT -#cmakedefine01 LIBOMPTARGET_PROFILING_SUPPORT -#define OMPTARGET_PROFILING_SUPPORT LIBOMPTARGET_PROFILING_SUPPORT +#cmakedefine01 LIBOMP_PROFILING_SUPPORT +#define OMP_PROFILING_SUPPORT LIBOMP_PROFILING_SUPPORT #cmakedefine01 LIBOMP_OMPT_OPTIONAL #define OMPT_OPTIONAL LIBOMP_OMPT_OPTIONAL #cmakedefine01 LIBOMP_USE_ADAPTIVE_LOCKS diff --git a/openmp/runtime/src/kmp_runtime.cpp b/openmp/runtime/src/kmp_runtime.cpp index 4a0634d59cff..a6e32bd008e1 100644 --- a/openmp/runtime/src/kmp_runtime.cpp +++ b/openmp/runtime/src/kmp_runtime.cpp @@ -32,7 +32,7 @@ #include "ompt-specific.h" #endif -#if OMPTARGET_PROFILING_SUPPORT +#if OMP_PROFILING_SUPPORT #include "llvm/Support/TimeProfiler.h" static char *ProfileTraceFile = nullptr; #endif @@ -5740,7 +5740,7 @@ void __kmp_free_thread(kmp_info_t *this_th) { /* ------------------------------------------------------------------------ */ void *__kmp_launch_thread(kmp_info_t *this_thr) { -#if OMPTARGET_PROFILING_SUPPORT +#if OMP_PROFILING_SUPPORT ProfileTraceFile = getenv("LIBOMPTARGET_PROFILE"); // TODO: add a configuration option for time granularity if (ProfileTraceFile) @@ -5848,7 +5848,7 @@ void *__kmp_launch_thread(kmp_info_t *this_thr) { KA_TRACE(10, ("__kmp_launch_thread: T#%d done\n", gtid)); KMP_MB(); -#if OMPTARGET_PROFILING_SUPPORT +#if OMP_PROFILING_SUPPORT llvm::timeTraceProfilerFinishThread(); #endif return this_thr; diff --git a/openmp/runtime/src/kmp_settings.cpp b/openmp/runtime/src/kmp_settings.cpp index a8522130f972..b477edbbfb42 100644 --- a/openmp/runtime/src/kmp_settings.cpp +++ b/openmp/runtime/src/kmp_settings.cpp @@ -3355,7 +3355,8 @@ static void __kmp_stg_parse_allocator(char const *name, char const *value, ntraits++; } } - omp_alloctrait_t traits[ntraits]; + omp_alloctrait_t *traits = + (omp_alloctrait_t *)KMP_ALLOCA(ntraits * sizeof(omp_alloctrait_t)); // Helper macros #define IS_POWER_OF_TWO(n) (((n) & ((n)-1)) == 0) |