diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2022-02-20 14:39:23 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2022-02-20 14:39:23 +0000 |
commit | c4bd2b43b293827b7ec880a10a6e491f0cc94211 (patch) | |
tree | acde8cf5ca883ea6e4fa6c9026bb8c6e3c14377b | |
parent | 3f25e997d96a3150a192777c3c389c258c5cf7ee (diff) |
Vendor import of llvm-project branch release/13.x llvmorg-13.0.1-0-g75e33f71c2da.vendor/llvm-project/llvmorg-13.0.1-0-g75e33f71c2davendor/llvm-project/release-13.x
64 files changed, 549 insertions, 392 deletions
diff --git a/clang/lib/Analysis/CFG.cpp b/clang/lib/Analysis/CFG.cpp index ba5eceda24b5..87c2f6f9f08f 100644 --- a/clang/lib/Analysis/CFG.cpp +++ b/clang/lib/Analysis/CFG.cpp @@ -542,6 +542,7 @@ private: // Visitors to walk an AST and construct the CFG. CFGBlock *VisitInitListExpr(InitListExpr *ILE, AddStmtChoice asc); CFGBlock *VisitAddrLabelExpr(AddrLabelExpr *A, AddStmtChoice asc); + CFGBlock *VisitAttributedStmt(AttributedStmt *A, AddStmtChoice asc); CFGBlock *VisitBinaryOperator(BinaryOperator *B, AddStmtChoice asc); CFGBlock *VisitBreakStmt(BreakStmt *B); CFGBlock *VisitCallExpr(CallExpr *C, AddStmtChoice asc); @@ -2149,6 +2150,9 @@ CFGBlock *CFGBuilder::Visit(Stmt * S, AddStmtChoice asc, case Stmt::InitListExprClass: return VisitInitListExpr(cast<InitListExpr>(S), asc); + case Stmt::AttributedStmtClass: + return VisitAttributedStmt(cast<AttributedStmt>(S), asc); + case Stmt::AddrLabelExprClass: return VisitAddrLabelExpr(cast<AddrLabelExpr>(S), asc); @@ -2398,8 +2402,32 @@ CFGBlock *CFGBuilder::VisitAddrLabelExpr(AddrLabelExpr *A, return Block; } -CFGBlock *CFGBuilder::VisitUnaryOperator(UnaryOperator *U, - AddStmtChoice asc) { +static bool isFallthroughStatement(const AttributedStmt *A) { + bool isFallthrough = hasSpecificAttr<FallThroughAttr>(A->getAttrs()); + assert((!isFallthrough || isa<NullStmt>(A->getSubStmt())) && + "expected fallthrough not to have children"); + return isFallthrough; +} + +CFGBlock *CFGBuilder::VisitAttributedStmt(AttributedStmt *A, + AddStmtChoice asc) { + // AttributedStmts for [[likely]] can have arbitrary statements as children, + // and the current visitation order here would add the AttributedStmts + // for [[likely]] after the child nodes, which is undesirable: For example, + // if the child contains an unconditional return, the [[likely]] would be + // considered unreachable. + // So only add the AttributedStmt for FallThrough, which has CFG effects and + // also no children, and omit the others. None of the other current StmtAttrs + // have semantic meaning for the CFG. + if (isFallthroughStatement(A) && asc.alwaysAdd(*this, A)) { + autoCreateBlock(); + appendStmt(Block, A); + } + + return VisitChildren(A); +} + +CFGBlock *CFGBuilder::VisitUnaryOperator(UnaryOperator *U, AddStmtChoice asc) { if (asc.alwaysAdd(*this, U)) { autoCreateBlock(); appendStmt(Block, U); @@ -3333,7 +3361,7 @@ CFGBlock *CFGBuilder::VisitGCCAsmStmt(GCCAsmStmt *G, AddStmtChoice asc) { // Save "Succ" in BackpatchBlocks. In the backpatch processing, "Succ" is // used to avoid adding "Succ" again. BackpatchBlocks.push_back(JumpSource(Succ, ScopePos)); - return Block; + return VisitChildren(G); } CFGBlock *CFGBuilder::VisitForStmt(ForStmt *F) { diff --git a/clang/lib/Analysis/UninitializedValues.cpp b/clang/lib/Analysis/UninitializedValues.cpp index 67cd39728c35..811146e50b45 100644 --- a/clang/lib/Analysis/UninitializedValues.cpp +++ b/clang/lib/Analysis/UninitializedValues.cpp @@ -591,8 +591,8 @@ public: if (AtPredExit == MayUninitialized) { // If the predecessor's terminator is an "asm goto" that initializes - // the variable, then it won't be counted as "initialized" on the - // non-fallthrough paths. + // the variable, then don't count it as "initialized" on the indirect + // paths. CFGTerminator term = Pred->getTerminator(); if (const auto *as = dyn_cast_or_null<GCCAsmStmt>(term.getStmt())) { const CFGBlock *fallthrough = *Pred->succ_begin(); @@ -810,13 +810,21 @@ void TransferFunctions::VisitGCCAsmStmt(GCCAsmStmt *as) { if (!as->isAsmGoto()) return; - for (const Expr *o : as->outputs()) - if (const VarDecl *VD = findVar(o).getDecl()) - if (vals[VD] != Initialized) - // If the variable isn't initialized by the time we get here, then we - // mark it as potentially uninitialized for those cases where it's used - // on an indirect path, where it's not guaranteed to be defined. - vals[VD] = MayUninitialized; + ASTContext &C = ac.getASTContext(); + for (const Expr *O : as->outputs()) { + const Expr *Ex = stripCasts(C, O); + + // Strip away any unary operators. Invalid l-values are reported by other + // semantic analysis passes. + while (const auto *UO = dyn_cast<UnaryOperator>(Ex)) + Ex = stripCasts(C, UO->getSubExpr()); + + // Mark the variable as potentially uninitialized for those cases where + // it's used on an indirect path, where it's not guaranteed to be + // defined. + if (const VarDecl *VD = findVar(Ex).getDecl()) + vals[VD] = MayUninitialized; + } } void TransferFunctions::VisitObjCMessageExpr(ObjCMessageExpr *ME) { diff --git a/clang/lib/Basic/Targets/OSTargets.h b/clang/lib/Basic/Targets/OSTargets.h index 3fe39ed64d9c..12df95c19f35 100644 --- a/clang/lib/Basic/Targets/OSTargets.h +++ b/clang/lib/Basic/Targets/OSTargets.h @@ -461,10 +461,8 @@ protected: if (this->HasFloat128) Builder.defineMacro("__FLOAT128__"); - if (Opts.C11) { - Builder.defineMacro("__STDC_NO_ATOMICS__"); + if (Opts.C11) Builder.defineMacro("__STDC_NO_THREADS__"); - } } public: diff --git a/clang/lib/Basic/Targets/PPC.cpp b/clang/lib/Basic/Targets/PPC.cpp index 59656888e25f..ecfbe284fb2e 100644 --- a/clang/lib/Basic/Targets/PPC.cpp +++ b/clang/lib/Basic/Targets/PPC.cpp @@ -243,7 +243,10 @@ static void defineXLCompatMacros(MacroBuilder &Builder) { void PPCTargetInfo::getTargetDefines(const LangOptions &Opts, MacroBuilder &Builder) const { - defineXLCompatMacros(Builder); + // We define the XLC compatibility macros only on AIX and Linux since XLC + // was never available on any other platforms. + if (getTriple().isOSAIX() || getTriple().isOSLinux()) + defineXLCompatMacros(Builder); // Target identification. Builder.defineMacro("__ppc__"); diff --git a/clang/lib/Basic/Targets/Sparc.h b/clang/lib/Basic/Targets/Sparc.h index 07844abafe11..e9f8c10db7b0 100644 --- a/clang/lib/Basic/Targets/Sparc.h +++ b/clang/lib/Basic/Targets/Sparc.h @@ -50,8 +50,6 @@ public: bool hasFeature(StringRef Feature) const override; - bool hasSjLjLowering() const override { return true; } - ArrayRef<Builtin::Info> getTargetBuiltins() const override { // FIXME: Implement! return None; @@ -180,7 +178,6 @@ public: void getTargetDefines(const LangOptions &Opts, MacroBuilder &Builder) const override; - bool hasSjLjLowering() const override { return true; } bool hasExtIntType() const override { return true; } }; diff --git a/clang/lib/Driver/ToolChains/Arch/AArch64.cpp b/clang/lib/Driver/ToolChains/Arch/AArch64.cpp index ed8c7e94b013..0e354a49b59a 100644 --- a/clang/lib/Driver/ToolChains/Arch/AArch64.cpp +++ b/clang/lib/Driver/ToolChains/Arch/AArch64.cpp @@ -191,7 +191,7 @@ void aarch64::getAArch64TargetFeatures(const Driver &D, bool success = true; // Enable NEON by default. Features.push_back("+neon"); - llvm::StringRef WaMArch = ""; + llvm::StringRef WaMArch; if (ForAS) for (const auto *A : Args.filtered(options::OPT_Wa_COMMA, options::OPT_Xassembler)) @@ -201,7 +201,7 @@ void aarch64::getAArch64TargetFeatures(const Driver &D, // Call getAArch64ArchFeaturesFromMarch only if "-Wa,-march=" or // "-Xassembler -march" is detected. Otherwise it may return false // and causes Clang to error out. - if (WaMArch.size()) + if (!WaMArch.empty()) success = getAArch64ArchFeaturesFromMarch(D, WaMArch, Args, Features); else if ((A = Args.getLastArg(options::OPT_march_EQ))) success = getAArch64ArchFeaturesFromMarch(D, A->getValue(), Args, Features); @@ -222,8 +222,15 @@ void aarch64::getAArch64TargetFeatures(const Driver &D, success = getAArch64MicroArchFeaturesFromMcpu( D, getAArch64TargetCPU(Args, Triple, A), Args, Features); - if (!success) - D.Diag(diag::err_drv_clang_unsupported) << A->getAsString(Args); + if (!success) { + auto Diag = D.Diag(diag::err_drv_clang_unsupported); + // If "-Wa,-march=" is used, 'WaMArch' will contain the argument's value, + // while 'A' is uninitialized. Only dereference 'A' in the other case. + if (!WaMArch.empty()) + Diag << "-march=" + WaMArch.str(); + else + Diag << A->getAsString(Args); + } if (Args.getLastArg(options::OPT_mgeneral_regs_only)) { Features.push_back("-fp-armv8"); diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp index 11dc661abc24..86c9ac4aa364 100644 --- a/clang/lib/Format/TokenAnnotator.cpp +++ b/clang/lib/Format/TokenAnnotator.cpp @@ -3604,6 +3604,16 @@ static bool isAllmanLambdaBrace(const FormatToken &Tok) { !Tok.isOneOf(TT_ObjCBlockLBrace, TT_DictLiteral)); } +// Returns the first token on the line that is not a comment. +static const FormatToken *getFirstNonComment(const AnnotatedLine &Line) { + const FormatToken *Next = Line.First; + if (!Next) + return Next; + if (Next->is(tok::comment)) + Next = Next->getNextNonComment(); + return Next; +} + bool TokenAnnotator::mustBreakBefore(const AnnotatedLine &Line, const FormatToken &Right) { const FormatToken &Left = *Right.Previous; @@ -3785,12 +3795,34 @@ bool TokenAnnotator::mustBreakBefore(const AnnotatedLine &Line, if (Right.is(TT_InlineASMBrace)) return Right.HasUnescapedNewline; - if (isAllmanBrace(Left) || isAllmanBrace(Right)) - return (Line.startsWith(tok::kw_enum) && Style.BraceWrapping.AfterEnum) || - (Line.startsWith(tok::kw_typedef, tok::kw_enum) && - Style.BraceWrapping.AfterEnum) || - (Line.startsWith(tok::kw_class) && Style.BraceWrapping.AfterClass) || + if (isAllmanBrace(Left) || isAllmanBrace(Right)) { + auto FirstNonComment = getFirstNonComment(Line); + bool AccessSpecifier = + FirstNonComment && + FirstNonComment->isOneOf(Keywords.kw_internal, tok::kw_public, + tok::kw_private, tok::kw_protected); + + if (Style.BraceWrapping.AfterEnum) { + if (Line.startsWith(tok::kw_enum) || + Line.startsWith(tok::kw_typedef, tok::kw_enum)) + return true; + // Ensure BraceWrapping for `public enum A {`. + if (AccessSpecifier && FirstNonComment->Next && + FirstNonComment->Next->is(tok::kw_enum)) + return true; + } + + // Ensure BraceWrapping for `public interface A {`. + if (Style.BraceWrapping.AfterClass && + ((AccessSpecifier && FirstNonComment->Next && + FirstNonComment->Next->is(Keywords.kw_interface)) || + Line.startsWith(Keywords.kw_interface))) + return true; + + return (Line.startsWith(tok::kw_class) && Style.BraceWrapping.AfterClass) || (Line.startsWith(tok::kw_struct) && Style.BraceWrapping.AfterStruct); + } + if (Left.is(TT_ObjCBlockLBrace) && Style.AllowShortBlocksOnASingleLine == FormatStyle::SBS_Never) return true; diff --git a/clang/lib/Format/UnwrappedLineParser.cpp b/clang/lib/Format/UnwrappedLineParser.cpp index 673986d16af2..8487875064aa 100644 --- a/clang/lib/Format/UnwrappedLineParser.cpp +++ b/clang/lib/Format/UnwrappedLineParser.cpp @@ -2532,6 +2532,8 @@ bool UnwrappedLineParser::parseEnum() { if (FormatTok->Tok.is(tok::kw_enum)) nextToken(); + const FormatToken &InitialToken = *FormatTok; + // In TypeScript, "enum" can also be used as property name, e.g. in interface // declarations. An "enum" keyword followed by a colon would be a syntax // error and thus assume it is just an identifier. @@ -2578,7 +2580,8 @@ bool UnwrappedLineParser::parseEnum() { return true; } - if (!Style.AllowShortEnumsOnASingleLine) + if (!Style.AllowShortEnumsOnASingleLine && + ShouldBreakBeforeBrace(Style, InitialToken)) addUnwrappedLine(); // Parse enum body. nextToken(); diff --git a/clang/lib/Format/WhitespaceManager.cpp b/clang/lib/Format/WhitespaceManager.cpp index a822e0aaf1f9..74136d2f5caa 100644 --- a/clang/lib/Format/WhitespaceManager.cpp +++ b/clang/lib/Format/WhitespaceManager.cpp @@ -1146,14 +1146,15 @@ WhitespaceManager::CellDescriptions WhitespaceManager::getCells(unsigned Start, } else if (C.Tok->is(tok::comma)) { if (!Cells.empty()) Cells.back().EndIndex = i; - Cell++; + if (C.Tok->getNextNonComment()->isNot(tok::r_brace)) // dangling comma + ++Cell; } } else if (Depth == 1) { if (C.Tok == MatchingParen) { if (!Cells.empty()) Cells.back().EndIndex = i; Cells.push_back(CellDescription{i, ++Cell, i + 1, false, nullptr}); - CellCount = Cell + 1; + CellCount = C.Tok->Previous->isNot(tok::comma) ? Cell + 1 : Cell; // Go to the next non-comment and ensure there is a break in front const auto *NextNonComment = C.Tok->getNextNonComment(); while (NextNonComment->is(tok::comma)) @@ -1190,6 +1191,17 @@ WhitespaceManager::CellDescriptions WhitespaceManager::getCells(unsigned Start, // So if we split a line previously and the tail line + this token is // less then the column limit we remove the split here and just put // the column start at a space past the comma + // + // FIXME This if branch covers the cases where the column is not + // the first column. This leads to weird pathologies like the formatting + // auto foo = Items{ + // Section{ + // 0, bar(), + // } + // }; + // Well if it doesn't lead to that it's indicative that the line + // breaking should be revisited. Unfortunately alot of other options + // interact with this auto j = i - 1; if ((j - 1) > Start && Changes[j].Tok->is(tok::comma) && Changes[j - 1].NewlinesBefore > 0) { diff --git a/clang/lib/Headers/__clang_cuda_runtime_wrapper.h b/clang/lib/Headers/__clang_cuda_runtime_wrapper.h index f401964bd529..31126292755f 100644 --- a/clang/lib/Headers/__clang_cuda_runtime_wrapper.h +++ b/clang/lib/Headers/__clang_cuda_runtime_wrapper.h @@ -41,6 +41,7 @@ #include <cmath> #include <cstdlib> #include <stdlib.h> +#include <string.h> #undef __CUDACC__ // Preserve common macros that will be changed below by us or by CUDA @@ -205,11 +206,6 @@ inline __host__ double __signbitd(double x) { #endif #if CUDA_VERSION >= 9000 -// CUDA-9.2 needs host-side memcpy for some host functions in -// device_functions.hpp -#if CUDA_VERSION >= 9020 -#include <string.h> -#endif #include "crt/math_functions.hpp" #else #include "math_functions.hpp" diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp index f04eb9199024..4179249e91de 100644 --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -15868,7 +15868,7 @@ ExprResult Sema::BuildVAArgExpr(SourceLocation BuiltinLoc, // promoted type and the underlying type are the same except for // signedness. Ask the AST for the correctly corresponding type and see // if that's compatible. - if (!PromoteType.isNull() && + if (!PromoteType.isNull() && !UnderlyingType->isBooleanType() && PromoteType->isUnsignedIntegerType() != UnderlyingType->isUnsignedIntegerType()) { UnderlyingType = diff --git a/compiler-rt/lib/asan/asan_malloc_linux.cpp b/compiler-rt/lib/asan/asan_malloc_linux.cpp index c6bec8551bc5..3a7d8cfacd0f 100644 --- a/compiler-rt/lib/asan/asan_malloc_linux.cpp +++ b/compiler-rt/lib/asan/asan_malloc_linux.cpp @@ -30,7 +30,7 @@ using namespace __asan; static uptr allocated_for_dlsym; static uptr last_dlsym_alloc_size_in_words; -static const uptr kDlsymAllocPoolSize = 1024; +static const uptr kDlsymAllocPoolSize = 8192; static uptr alloc_memory_for_dlsym[kDlsymAllocPoolSize]; static inline bool IsInDlsymAllocPool(const void *ptr) { diff --git a/compiler-rt/lib/msan/msan_interceptors.cpp b/compiler-rt/lib/msan/msan_interceptors.cpp index 760f74e927d0..dfd1d0c3cd9b 100644 --- a/compiler-rt/lib/msan/msan_interceptors.cpp +++ b/compiler-rt/lib/msan/msan_interceptors.cpp @@ -33,6 +33,7 @@ #include "sanitizer_common/sanitizer_stackdepot.h" #include "sanitizer_common/sanitizer_libc.h" #include "sanitizer_common/sanitizer_linux.h" +#include "sanitizer_common/sanitizer_glibc_version.h" #include "sanitizer_common/sanitizer_tls_get_addr.h" #include "sanitizer_common/sanitizer_vector.h" @@ -656,7 +657,8 @@ INTERCEPTOR(int, putenv, char *string) { return res; } -#if SANITIZER_FREEBSD || SANITIZER_NETBSD +#define SANITIZER_STAT_LINUX (SANITIZER_LINUX && __GLIBC_PREREQ(2, 33)) +#if SANITIZER_FREEBSD || SANITIZER_NETBSD || SANITIZER_STAT_LINUX INTERCEPTOR(int, fstat, int fd, void *buf) { ENSURE_MSAN_INITED(); int res = REAL(fstat)(fd, buf); @@ -664,7 +666,7 @@ INTERCEPTOR(int, fstat, int fd, void *buf) { __msan_unpoison(buf, __sanitizer::struct_stat_sz); return res; } -#define MSAN_MAYBE_INTERCEPT_FSTAT INTERCEPT_FUNCTION(fstat) +# define MSAN_MAYBE_INTERCEPT_FSTAT MSAN_INTERCEPT_FUNC(fstat) #else #define MSAN_MAYBE_INTERCEPT_FSTAT #endif @@ -677,7 +679,7 @@ INTERCEPTOR(int, __fxstat, int magic, int fd, void *buf) { __msan_unpoison(buf, __sanitizer::struct_stat_sz); return res; } -#define MSAN_MAYBE_INTERCEPT___FXSTAT INTERCEPT_FUNCTION(__fxstat) +# define MSAN_MAYBE_INTERCEPT___FXSTAT MSAN_INTERCEPT_FUNC(__fxstat) #else #define MSAN_MAYBE_INTERCEPT___FXSTAT #endif @@ -690,20 +692,24 @@ INTERCEPTOR(int, __fxstat64, int magic, int fd, void *buf) { __msan_unpoison(buf, __sanitizer::struct_stat64_sz); return res; } -#define MSAN_MAYBE_INTERCEPT___FXSTAT64 INTERCEPT_FUNCTION(__fxstat64) +# define MSAN_MAYBE_INTERCEPT___FXSTAT64 MSAN_INTERCEPT_FUNC(__fxstat64) #else -#define MSAN_MAYBE_INTERCEPT___FXSTAT64 +# define MSAN_MAYBE_INTERCEPT___FXSTAT64 #endif -#if SANITIZER_FREEBSD || SANITIZER_NETBSD +#if SANITIZER_FREEBSD || SANITIZER_NETBSD || SANITIZER_STAT_LINUX INTERCEPTOR(int, fstatat, int fd, char *pathname, void *buf, int flags) { ENSURE_MSAN_INITED(); int res = REAL(fstatat)(fd, pathname, buf, flags); if (!res) __msan_unpoison(buf, __sanitizer::struct_stat_sz); return res; } -# define MSAN_INTERCEPT_FSTATAT INTERCEPT_FUNCTION(fstatat) +# define MSAN_MAYBE_INTERCEPT_FSTATAT MSAN_INTERCEPT_FUNC(fstatat) #else +# define MSAN_MAYBE_INTERCEPT_FSTATAT +#endif + +#if !SANITIZER_FREEBSD && !SANITIZER_NETBSD INTERCEPTOR(int, __fxstatat, int magic, int fd, char *pathname, void *buf, int flags) { ENSURE_MSAN_INITED(); @@ -711,7 +717,9 @@ INTERCEPTOR(int, __fxstatat, int magic, int fd, char *pathname, void *buf, if (!res) __msan_unpoison(buf, __sanitizer::struct_stat_sz); return res; } -# define MSAN_INTERCEPT_FSTATAT INTERCEPT_FUNCTION(__fxstatat) +# define MSAN_MAYBE_INTERCEPT___FXSTATAT MSAN_INTERCEPT_FUNC(__fxstatat) +#else +# define MSAN_MAYBE_INTERCEPT___FXSTATAT #endif #if !SANITIZER_FREEBSD && !SANITIZER_NETBSD @@ -722,9 +730,9 @@ INTERCEPTOR(int, __fxstatat64, int magic, int fd, char *pathname, void *buf, if (!res) __msan_unpoison(buf, __sanitizer::struct_stat64_sz); return res; } -#define MSAN_MAYBE_INTERCEPT___FXSTATAT64 INTERCEPT_FUNCTION(__fxstatat64) +# define MSAN_MAYBE_INTERCEPT___FXSTATAT64 MSAN_INTERCEPT_FUNC(__fxstatat64) #else -#define MSAN_MAYBE_INTERCEPT___FXSTATAT64 +# define MSAN_MAYBE_INTERCEPT___FXSTATAT64 #endif INTERCEPTOR(int, pipe, int pipefd[2]) { @@ -1686,7 +1694,8 @@ void InitializeInterceptors() { MSAN_MAYBE_INTERCEPT_FCVT; MSAN_MAYBE_INTERCEPT_FSTAT; MSAN_MAYBE_INTERCEPT___FXSTAT; - MSAN_INTERCEPT_FSTATAT; + MSAN_MAYBE_INTERCEPT_FSTATAT; + MSAN_MAYBE_INTERCEPT___FXSTATAT; MSAN_MAYBE_INTERCEPT___FXSTAT64; MSAN_MAYBE_INTERCEPT___FXSTATAT64; INTERCEPT_FUNCTION(pipe); diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h b/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h index 5b710c23fd00..b0e01930a379 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h @@ -457,10 +457,13 @@ #define SANITIZER_INTERCEPT_SEND_SENDTO SI_POSIX #define SANITIZER_INTERCEPT_EVENTFD_READ_WRITE SI_LINUX -#define SANITIZER_INTERCEPT_STAT \ - (SI_FREEBSD || SI_MAC || SI_ANDROID || SI_NETBSD || SI_SOLARIS) -#define SANITIZER_INTERCEPT_LSTAT (SI_NETBSD || SI_FREEBSD) -#define SANITIZER_INTERCEPT___XSTAT (!SANITIZER_INTERCEPT_STAT && SI_POSIX) +#define SI_STAT_LINUX (SI_LINUX && __GLIBC_PREREQ(2, 33)) +#define SANITIZER_INTERCEPT_STAT \ + (SI_FREEBSD || SI_MAC || SI_ANDROID || SI_NETBSD || SI_SOLARIS || \ + SI_STAT_LINUX) +#define SANITIZER_INTERCEPT_LSTAT (SI_NETBSD || SI_FREEBSD || SI_STAT_LINUX) +#define SANITIZER_INTERCEPT___XSTAT \ + ((!SANITIZER_INTERCEPT_STAT && SI_POSIX) || SI_STAT_LINUX) #define SANITIZER_INTERCEPT___XSTAT64 SI_LINUX_NOT_ANDROID #define SANITIZER_INTERCEPT___LXSTAT SANITIZER_INTERCEPT___XSTAT #define SANITIZER_INTERCEPT___LXSTAT64 SI_LINUX_NOT_ANDROID diff --git a/libcxx/include/__threading_support b/libcxx/include/__threading_support index 4d867167c2b1..2242a6908529 100644 --- a/libcxx/include/__threading_support +++ b/libcxx/include/__threading_support @@ -29,16 +29,9 @@ # include <__external_threading> #elif !defined(_LIBCPP_HAS_NO_THREADS) -#if defined(__APPLE__) || defined(__MVS__) -# define _LIBCPP_NO_NATIVE_SEMAPHORES -#endif - #if defined(_LIBCPP_HAS_THREAD_API_PTHREAD) # include <pthread.h> # include <sched.h> -# ifndef _LIBCPP_NO_NATIVE_SEMAPHORES -# include <semaphore.h> -# endif #elif defined(_LIBCPP_HAS_THREAD_API_C11) # include <threads.h> #endif @@ -78,12 +71,6 @@ typedef pthread_mutex_t __libcpp_recursive_mutex_t; typedef pthread_cond_t __libcpp_condvar_t; #define _LIBCPP_CONDVAR_INITIALIZER PTHREAD_COND_INITIALIZER -#ifndef _LIBCPP_NO_NATIVE_SEMAPHORES -// Semaphore -typedef sem_t __libcpp_semaphore_t; -# define _LIBCPP_SEMAPHORE_MAX SEM_VALUE_MAX -#endif - // Execute once typedef pthread_once_t __libcpp_exec_once_flag; #define _LIBCPP_EXEC_ONCE_INITIALIZER PTHREAD_ONCE_INIT @@ -149,12 +136,6 @@ typedef void* __libcpp_recursive_mutex_t[5]; typedef void* __libcpp_condvar_t; #define _LIBCPP_CONDVAR_INITIALIZER 0 -// Semaphore -typedef void* __libcpp_semaphore_t; -#if defined(_LIBCPP_HAS_THREAD_API_WIN32) -# define _LIBCPP_SEMAPHORE_MAX (::std::numeric_limits<long>::max()) -#endif - // Execute Once typedef void* __libcpp_exec_once_flag; #define _LIBCPP_EXEC_ONCE_INITIALIZER 0 @@ -219,26 +200,6 @@ int __libcpp_condvar_timedwait(__libcpp_condvar_t *__cv, __libcpp_mutex_t *__m, _LIBCPP_THREAD_ABI_VISIBILITY int __libcpp_condvar_destroy(__libcpp_condvar_t* __cv); -#ifndef _LIBCPP_NO_NATIVE_SEMAPHORES - -// Semaphore -_LIBCPP_THREAD_ABI_VISIBILITY -bool __libcpp_semaphore_init(__libcpp_semaphore_t* __sem, int __init); - -_LIBCPP_THREAD_ABI_VISIBILITY -bool __libcpp_semaphore_destroy(__libcpp_semaphore_t* __sem); - -_LIBCPP_THREAD_ABI_VISIBILITY -bool __libcpp_semaphore_post(__libcpp_semaphore_t* __sem); - -_LIBCPP_THREAD_ABI_VISIBILITY -bool __libcpp_semaphore_wait(__libcpp_semaphore_t* __sem); - -_LIBCPP_THREAD_ABI_VISIBILITY -bool __libcpp_semaphore_wait_timed(__libcpp_semaphore_t* __sem, chrono::nanoseconds const& __ns); - -#endif // _LIBCPP_NO_NATIVE_SEMAPHORES - // Execute once _LIBCPP_THREAD_ABI_VISIBILITY int __libcpp_execute_once(__libcpp_exec_once_flag *flag, @@ -452,38 +413,6 @@ int __libcpp_condvar_destroy(__libcpp_condvar_t *__cv) return pthread_cond_destroy(__cv); } -#ifndef _LIBCPP_NO_NATIVE_SEMAPHORES - -// Semaphore -bool __libcpp_semaphore_init(__libcpp_semaphore_t* __sem, int __init) -{ - return sem_init(__sem, 0, __init) == 0; -} - -bool __libcpp_semaphore_destroy(__libcpp_semaphore_t* __sem) -{ - return sem_destroy(__sem) == 0; -} - -bool __libcpp_semaphore_post(__libcpp_semaphore_t* __sem) -{ - return sem_post(__sem) == 0; -} - -bool __libcpp_semaphore_wait(__libcpp_semaphore_t* __sem) -{ - return sem_wait(__sem) == 0; -} - -bool __libcpp_semaphore_wait_timed(__libcpp_semaphore_t* __sem, chrono::nanoseconds const& __ns) -{ - auto const __abs_time = chrono::system_clock::now().time_since_epoch() + __ns; - __libcpp_timespec_t __ts = __thread_detail::__convert_to_timespec(__abs_time); - return sem_timedwait(__sem, &__ts) == 0; -} - -#endif //_LIBCPP_NO_NATIVE_SEMAPHORES - // Execute once int __libcpp_execute_once(__libcpp_exec_once_flag *flag, void (*init_routine)()) { diff --git a/libcxx/include/semaphore b/libcxx/include/semaphore index 906f62e0f07a..db03fb967ed1 100644 --- a/libcxx/include/semaphore +++ b/libcxx/include/semaphore @@ -67,10 +67,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD /* -__atomic_semaphore_base is the general-case implementation, to be used for -user-requested least-max values that exceed the OS implementation support -(incl. when the OS has no support of its own) and for binary semaphores. - +__atomic_semaphore_base is the general-case implementation. It is a typical Dijkstra semaphore algorithm over atomics, wait and notify functions. It avoids contention against users' own use of those facilities. @@ -82,7 +79,7 @@ class __atomic_semaphore_base public: _LIBCPP_INLINE_VISIBILITY - __atomic_semaphore_base(ptrdiff_t __count) : __a(__count) + constexpr explicit __atomic_semaphore_base(ptrdiff_t __count) : __a(__count) { } _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY @@ -108,81 +105,30 @@ public: _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY bool try_acquire_for(chrono::duration<Rep, Period> const& __rel_time) { - auto const __test_fn = [this]() -> bool { - auto __old = __a.load(memory_order_acquire); - while(1) { - if (__old == 0) - return false; - if(__a.compare_exchange_strong(__old, __old - 1, memory_order_acquire, memory_order_relaxed)) - return true; - } - }; + if (__rel_time == chrono::duration<Rep, Period>::zero()) + return try_acquire(); + auto const __test_fn = [this]() { return try_acquire(); }; return __libcpp_thread_poll_with_backoff(__test_fn, __libcpp_timed_backoff_policy(), __rel_time); } -}; - -#ifndef _LIBCPP_NO_NATIVE_SEMAPHORES - -/* - -__platform_semaphore_base a simple wrapper for the OS semaphore type. That -is, every call is routed to the OS in the most direct manner possible. - -*/ - -class __platform_semaphore_base -{ - __libcpp_semaphore_t __semaphore; - -public: - _LIBCPP_INLINE_VISIBILITY - __platform_semaphore_base(ptrdiff_t __count) : - __semaphore() - { - __libcpp_semaphore_init(&__semaphore, __count); - } - _LIBCPP_INLINE_VISIBILITY - ~__platform_semaphore_base() { - __libcpp_semaphore_destroy(&__semaphore); - } - _LIBCPP_INLINE_VISIBILITY - void release(ptrdiff_t __update) - { - for(; __update; --__update) - __libcpp_semaphore_post(&__semaphore); - } - _LIBCPP_INLINE_VISIBILITY - void acquire() - { - __libcpp_semaphore_wait(&__semaphore); - } - _LIBCPP_INLINE_VISIBILITY - bool try_acquire_for(chrono::nanoseconds __rel_time) + _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY + bool try_acquire() { - return __libcpp_semaphore_wait_timed(&__semaphore, __rel_time); + auto __old = __a.load(memory_order_acquire); + while (true) { + if (__old == 0) + return false; + if (__a.compare_exchange_strong(__old, __old - 1, memory_order_acquire, memory_order_relaxed)) + return true; + } } }; -template<ptrdiff_t __least_max_value> -using __semaphore_base = - typename conditional<(__least_max_value > 1 && __least_max_value <= _LIBCPP_SEMAPHORE_MAX), - __platform_semaphore_base, - __atomic_semaphore_base>::type; - -#else - -template<ptrdiff_t __least_max_value> -using __semaphore_base = - __atomic_semaphore_base; - #define _LIBCPP_SEMAPHORE_MAX (numeric_limits<ptrdiff_t>::max()) -#endif //_LIBCPP_NO_NATIVE_SEMAPHORES - template<ptrdiff_t __least_max_value = _LIBCPP_SEMAPHORE_MAX> class counting_semaphore { - __semaphore_base<__least_max_value> __semaphore; + __atomic_semaphore_base __semaphore; public: static constexpr ptrdiff_t max() noexcept { @@ -190,7 +136,7 @@ public: } _LIBCPP_INLINE_VISIBILITY - counting_semaphore(ptrdiff_t __count = 0) : __semaphore(__count) { } + constexpr explicit counting_semaphore(ptrdiff_t __count) : __semaphore(__count) { } ~counting_semaphore() = default; counting_semaphore(const counting_semaphore&) = delete; @@ -215,14 +161,14 @@ public: _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY bool try_acquire() { - return try_acquire_for(chrono::nanoseconds::zero()); + return __semaphore.try_acquire(); } template <class Clock, class Duration> _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY bool try_acquire_until(chrono::time_point<Clock, Duration> const& __abs_time) { auto const current = Clock::now(); - if(current >= __abs_time) + if (current >= __abs_time) return try_acquire(); else return try_acquire_for(__abs_time - current); diff --git a/libcxx/include/string b/libcxx/include/string index 4159ea580345..23dd43792fc6 100644 --- a/libcxx/include/string +++ b/libcxx/include/string @@ -3345,6 +3345,7 @@ basic_string<_CharT, _Traits, _Allocator>::reserve(size_type __requested_capacit } template <class _CharT, class _Traits, class _Allocator> +inline void basic_string<_CharT, _Traits, _Allocator>::shrink_to_fit() _NOEXCEPT { @@ -3355,6 +3356,7 @@ basic_string<_CharT, _Traits, _Allocator>::shrink_to_fit() _NOEXCEPT } template <class _CharT, class _Traits, class _Allocator> +inline void basic_string<_CharT, _Traits, _Allocator>::__shrink_or_extend(size_type __target_capacity) { diff --git a/lld/ELF/Arch/PPC.cpp b/lld/ELF/Arch/PPC.cpp index aaecef6ee94f..d9334d5bf853 100644 --- a/lld/ELF/Arch/PPC.cpp +++ b/lld/ELF/Arch/PPC.cpp @@ -20,6 +20,9 @@ using namespace llvm::ELF; using namespace lld; using namespace lld::elf; +// Undefine the macro predefined by GCC powerpc32. +#undef PPC + namespace { class PPC final : public TargetInfo { public: diff --git a/lld/ELF/InputFiles.cpp b/lld/ELF/InputFiles.cpp index d5b9efbe18fc..ab65571887d1 100644 --- a/lld/ELF/InputFiles.cpp +++ b/lld/ELF/InputFiles.cpp @@ -1567,7 +1567,7 @@ template <class ELFT> void SharedFile::parse() { Symbol *s = symtab->addSymbol( Undefined{this, name, sym.getBinding(), sym.st_other, sym.getType()}); s->exportDynamic = true; - if (s->isUndefined() && !s->isWeak() && + if (s->isUndefined() && sym.getBinding() != STB_WEAK && config->unresolvedSymbolsInShlib != UnresolvedPolicy::Ignore) requiredSymbols.push_back(s); continue; diff --git a/lld/ELF/Relocations.cpp b/lld/ELF/Relocations.cpp index 537859f9e0b5..71249188afe3 100644 --- a/lld/ELF/Relocations.cpp +++ b/lld/ELF/Relocations.cpp @@ -927,6 +927,12 @@ static void reportUndefinedSymbol(const UndefinedDiag &undef, msg += "\n>>> the vtable symbol may be undefined because the class is missing " "its key function (see https://lld.llvm.org/missingkeyfunction)"; + if (config->gcSections && config->zStartStopGC && + sym.getName().startswith("__start_")) { + msg += "\n>>> the encapsulation symbol needs to be retained under " + "--gc-sections properly; consider -z nostart-stop-gc " + "(see https://lld.llvm.org/ELF/start-stop-gc)"; + } if (undef.isWarning) warn(msg); @@ -1403,8 +1409,8 @@ static void scanReloc(InputSectionBase &sec, OffsetGetter &getOffset, RelTy *&i, // The 4 types that relative GOTPLT are all x86 and x86-64 specific. if (oneof<R_GOTPLTONLY_PC, R_GOTPLTREL, R_GOTPLT, R_TLSGD_GOTPLT>(expr)) { in.gotPlt->hasGotPltOffRel = true; - } else if (oneof<R_GOTONLY_PC, R_GOTREL, R_PPC64_TOCBASE, R_PPC64_RELAX_TOC>( - expr)) { + } else if (oneof<R_GOTONLY_PC, R_GOTREL, R_PPC32_PLTREL, R_PPC64_TOCBASE, + R_PPC64_RELAX_TOC>( expr)) { in.got->hasGotOffRel = true; } diff --git a/lld/docs/ELF/start-stop-gc.rst b/lld/docs/ELF/start-stop-gc.rst new file mode 100644 index 000000000000..18ccc26defc1 --- /dev/null +++ b/lld/docs/ELF/start-stop-gc.rst @@ -0,0 +1,66 @@ +-z start-stop-gc +================ + +If your ``-Wl,--gc-sections`` build fail with a linker error like this: + + error: undefined symbol: __start_meta + >>> referenced by {{.*}} + >>> the encapsulation symbol needs to be retained under --gc-sections properly; consider -z nostart-stop-gc (see https://lld.llvm.org/ELF/start-stop-gc) + +it is likely your C identifier name sections are not properly annotated to +suffice under ``--gc-sections``. + +``__start_meta`` and ``__stop_meta`` are sometimed called encapsulation +symbols. In October 2015, GNU ld switched behavior and made a ``__start_meta`` +reference from a live section retain all ``meta`` input sections. This +conservative behavior works for existing code which does not take GC into fair +consideration, but unnecessarily increases sizes for modern metadata section +usage which desires precise GC. + +GNU ld 2.37 added ``-z start-stop-gc`` to restore the traditional behavior +ld.lld 13.0.0 defaults to ``-z start-stop-gc`` and supports ``-z nostart-stop-gc`` +to switch to the conservative behavior. + +The Apple ld64 linker has a similar ``section$start`` feature and always +allowed GC (like ``-z start-stop-gc``). + +Annotate C identifier name sections +----------------------------------- + +A C identifier name section (``meta``) sometimes depends on another section. +Let that section reference ``meta`` via a relocation. + +.. code-block:: c + + asm(".pushsection .init_array,\"aw\",%init_array\n" \ + ".reloc ., BFD_RELOC_NONE, meta\n" \ + ".popsection\n") + +If a relocation is inconvenient, consider using ``__attribute__((retain))`` +(GCC 11 with modern binutils, Clang 13). + +.. code-block:: c + + #pragma GCC diagnostic push + #pragma GCC diagnostic ignored "-Wattributes" + __attribute__((retain,used,section("meta"))) + static const char dummy[0]; + #pragma GCC diagnostic pop + +GCC before 11 and Clang before 13 do not recognize ``__attribute__((retain))``, +so ``-Wattributes`` may need to be ignored. On ELF targets, +``__attribute__((used))`` prevents compiler discarding, but does not affect +linker ``--gc-sections``. + +In a macro, you may use: + +.. code-block:: c + + _Pragma("GCC diagnostic push") + _Pragma("GCC diagnostic ignored \"-Wattributes\"") + ... + _Pragma("GCC diagnostic pop") + +If you use the ``SECTIONS`` command in a linker script, use +`the ``KEEP`` keyword <https://sourceware.org/binutils/docs/ld/Input-Section-Keep.html>`_, e.g. +``meta : { KEEP(*(meta)) }`` diff --git a/lld/docs/ReleaseNotes.rst b/lld/docs/ReleaseNotes.rst index 50af6e7d7939..238bf5a9f695 100644 --- a/lld/docs/ReleaseNotes.rst +++ b/lld/docs/ReleaseNotes.rst @@ -5,11 +5,6 @@ lld 13.0.0 Release Notes .. contents:: :local: -.. warning:: - These are in-progress notes for the upcoming LLVM 13.0.0 release. - Release notes for previous releases can be found on - `the Download Page <https://releases.llvm.org/download.html>`_. - Introduction ============ diff --git a/lld/docs/index.rst b/lld/docs/index.rst index 40da6d77cca8..b0080f54df24 100644 --- a/lld/docs/index.rst +++ b/lld/docs/index.rst @@ -178,4 +178,5 @@ document soon. Partitions ReleaseNotes ELF/linker_script + ELF/start-stop-gc ELF/warn_backrefs diff --git a/llvm/include/llvm/Analysis/LazyCallGraph.h b/llvm/include/llvm/Analysis/LazyCallGraph.h index 81500905c0f5..148be34aa73b 100644 --- a/llvm/include/llvm/Analysis/LazyCallGraph.h +++ b/llvm/include/llvm/Analysis/LazyCallGraph.h @@ -1098,28 +1098,10 @@ public: continue; } - // The blockaddress constant expression is a weird special case, we can't - // generically walk its operands the way we do for all other constants. - if (BlockAddress *BA = dyn_cast<BlockAddress>(C)) { - // If we've already visited the function referred to by the block - // address, we don't need to revisit it. - if (Visited.count(BA->getFunction())) - continue; - - // If all of the blockaddress' users are instructions within the - // referred to function, we don't need to insert a cycle. - if (llvm::all_of(BA->users(), [&](User *U) { - if (Instruction *I = dyn_cast<Instruction>(U)) - return I->getFunction() == BA->getFunction(); - return false; - })) - continue; - - // Otherwise we should go visit the referred to function. - Visited.insert(BA->getFunction()); - Worklist.push_back(BA->getFunction()); + // blockaddresses are weird and don't participate in the call graph anyway, + // skip them. + if (isa<BlockAddress>(C)) continue; - } for (Value *Op : C->operand_values()) if (Visited.insert(cast<Constant>(Op)).second) diff --git a/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/llvm/include/llvm/CodeGen/TargetInstrInfo.h index 05d0591f1e5d..35c33153913e 100644 --- a/llvm/include/llvm/CodeGen/TargetInstrInfo.h +++ b/llvm/include/llvm/CodeGen/TargetInstrInfo.h @@ -1922,9 +1922,7 @@ public: /// Optional target hook that returns true if \p MBB is safe to outline from, /// and returns any target-specific information in \p Flags. virtual bool isMBBSafeToOutlineFrom(MachineBasicBlock &MBB, - unsigned &Flags) const { - return true; - } + unsigned &Flags) const; /// Insert a custom frame for outlined functions. virtual void buildOutlinedFrame(MachineBasicBlock &MBB, MachineFunction &MF, diff --git a/llvm/include/llvm/IR/Metadata.h b/llvm/include/llvm/IR/Metadata.h index c5840564454e..17a9c3a77f4e 100644 --- a/llvm/include/llvm/IR/Metadata.h +++ b/llvm/include/llvm/IR/Metadata.h @@ -897,6 +897,7 @@ struct TempMDNodeDeleter { class MDNode : public Metadata { friend class ReplaceableMetadataImpl; friend class LLVMContextImpl; + friend class DIArgList; unsigned NumOperands; unsigned NumUnresolved; diff --git a/llvm/include/llvm/Support/Parallel.h b/llvm/include/llvm/Support/Parallel.h index 28d171d45256..5c3b26d5754c 100644 --- a/llvm/include/llvm/Support/Parallel.h +++ b/llvm/include/llvm/Support/Parallel.h @@ -40,7 +40,10 @@ class Latch { public: explicit Latch(uint32_t Count = 0) : Count(Count) {} - ~Latch() { sync(); } + ~Latch() { + // Ensure at least that sync() was called. + assert(Count == 0); + } void inc() { std::lock_guard<std::mutex> lock(Mutex); diff --git a/llvm/lib/Analysis/AliasAnalysis.cpp b/llvm/lib/Analysis/AliasAnalysis.cpp index e7445e225d52..1da712eb9d26 100644 --- a/llvm/lib/Analysis/AliasAnalysis.cpp +++ b/llvm/lib/Analysis/AliasAnalysis.cpp @@ -697,14 +697,16 @@ ModRefInfo AAResults::getModRefInfo(const Instruction *I, case Instruction::AtomicRMW: return getModRefInfo((const AtomicRMWInst *)I, Loc, AAQIP); case Instruction::Call: - return getModRefInfo((const CallInst *)I, Loc, AAQIP); + case Instruction::CallBr: case Instruction::Invoke: - return getModRefInfo((const InvokeInst *)I, Loc, AAQIP); + return getModRefInfo((const CallBase *)I, Loc, AAQIP); case Instruction::CatchPad: return getModRefInfo((const CatchPadInst *)I, Loc, AAQIP); case Instruction::CatchRet: return getModRefInfo((const CatchReturnInst *)I, Loc, AAQIP); default: + assert(!I->mayReadOrWriteMemory() && + "Unhandled memory access instruction!"); return ModRefInfo::NoModRef; } } diff --git a/llvm/lib/Analysis/InlineCost.cpp b/llvm/lib/Analysis/InlineCost.cpp index 4c2413e14435..e8f79a28a8e8 100644 --- a/llvm/lib/Analysis/InlineCost.cpp +++ b/llvm/lib/Analysis/InlineCost.cpp @@ -354,6 +354,7 @@ protected: bool simplifyCallSite(Function *F, CallBase &Call); template <typename Callable> bool simplifyInstruction(Instruction &I, Callable Evaluate); + bool simplifyIntrinsicCallIsConstant(CallBase &CB); ConstantInt *stripAndComputeInBoundsConstantOffsets(Value *&V); /// Return true if the given argument to the function being considered for @@ -1471,6 +1472,27 @@ bool CallAnalyzer::simplifyInstruction(Instruction &I, Callable Evaluate) { return true; } +/// Try to simplify a call to llvm.is.constant. +/// +/// Duplicate the argument checking from CallAnalyzer::simplifyCallSite since +/// we expect calls of this specific intrinsic to be infrequent. +/// +/// FIXME: Given that we know CB's parent (F) caller +/// (CandidateCall->getParent()->getParent()), we might be able to determine +/// whether inlining F into F's caller would change how the call to +/// llvm.is.constant would evaluate. +bool CallAnalyzer::simplifyIntrinsicCallIsConstant(CallBase &CB) { + Value *Arg = CB.getArgOperand(0); + auto *C = dyn_cast<Constant>(Arg); + + if (!C) + C = dyn_cast_or_null<Constant>(SimplifiedValues.lookup(Arg)); + + Type *RT = CB.getFunctionType()->getReturnType(); + SimplifiedValues[&CB] = ConstantInt::get(RT, C ? 1 : 0); + return true; +} + bool CallAnalyzer::visitBitCast(BitCastInst &I) { // Propagate constants through bitcasts. if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) { @@ -2091,6 +2113,8 @@ bool CallAnalyzer::visitCallBase(CallBase &Call) { if (auto *SROAArg = getSROAArgForValueOrNull(II->getOperand(0))) SROAArgValues[II] = SROAArg; return true; + case Intrinsic::is_constant: + return simplifyIntrinsicCallIsConstant(Call); } } diff --git a/llvm/lib/CodeGen/DwarfEHPrepare.cpp b/llvm/lib/CodeGen/DwarfEHPrepare.cpp index 5ca1e91cc5f4..fde7b942665d 100644 --- a/llvm/lib/CodeGen/DwarfEHPrepare.cpp +++ b/llvm/lib/CodeGen/DwarfEHPrepare.cpp @@ -318,6 +318,11 @@ public: return prepareDwarfEH(OptLevel, RewindFunction, F, TLI, DT, TTI); } + bool doFinalization(Module &M) override { + RewindFunction = nullptr; + return false; + } + void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired<TargetPassConfig>(); AU.addRequired<TargetTransformInfoWrapperPass>(); diff --git a/llvm/lib/CodeGen/RegAllocFast.cpp b/llvm/lib/CodeGen/RegAllocFast.cpp index 707161d5a8b0..68920e2e50df 100644 --- a/llvm/lib/CodeGen/RegAllocFast.cpp +++ b/llvm/lib/CodeGen/RegAllocFast.cpp @@ -15,6 +15,7 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/IndexedMap.h" +#include "llvm/ADT/MapVector.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/SparseSet.h" @@ -432,7 +433,7 @@ void RegAllocFast::spill(MachineBasicBlock::iterator Before, Register VirtReg, // every definition of it, meaning we can switch all the DBG_VALUEs over // to just reference the stack slot. SmallVectorImpl<MachineOperand *> &LRIDbgOperands = LiveDbgValueMap[VirtReg]; - SmallDenseMap<MachineInstr *, SmallVector<const MachineOperand *>> + SmallMapVector<MachineInstr *, SmallVector<const MachineOperand *>, 2> SpilledOperandsMap; for (MachineOperand *MO : LRIDbgOperands) SpilledOperandsMap[MO->getParent()].push_back(MO); diff --git a/llvm/lib/CodeGen/TargetInstrInfo.cpp b/llvm/lib/CodeGen/TargetInstrInfo.cpp index 2e4a656ea0c8..4bbb5beb21f3 100644 --- a/llvm/lib/CodeGen/TargetInstrInfo.cpp +++ b/llvm/lib/CodeGen/TargetInstrInfo.cpp @@ -1417,3 +1417,16 @@ std::string TargetInstrInfo::createMIROperandComment( } TargetInstrInfo::PipelinerLoopInfo::~PipelinerLoopInfo() {} + +bool TargetInstrInfo::isMBBSafeToOutlineFrom(MachineBasicBlock &MBB, + unsigned &Flags) const { + // Some instrumentations create special TargetOpcode at the start which + // expands to special code sequences which must be present. + auto First = MBB.getFirstNonDebugInstr(); + if (First != MBB.end() && + (First->getOpcode() == TargetOpcode::FENTRY_CALL || + First->getOpcode() == TargetOpcode::PATCHABLE_FUNCTION_ENTER)) + return false; + + return true; +} diff --git a/llvm/lib/IR/DebugInfoMetadata.cpp b/llvm/lib/IR/DebugInfoMetadata.cpp index 7b0dab799e1a..2180eedb58f7 100644 --- a/llvm/lib/IR/DebugInfoMetadata.cpp +++ b/llvm/lib/IR/DebugInfoMetadata.cpp @@ -1592,6 +1592,12 @@ void DIArgList::handleChangedOperand(void *Ref, Metadata *New) { assert((!New || isa<ValueAsMetadata>(New)) && "DIArgList must be passed a ValueAsMetadata"); untrack(); + bool Uniq = isUniqued(); + if (Uniq) { + // We need to update the uniqueness once the Args are updated since they + // form the key to the DIArgLists store. + eraseFromStore(); + } ValueAsMetadata *NewVM = cast_or_null<ValueAsMetadata>(New); for (ValueAsMetadata *&VM : Args) { if (&VM == OldVMPtr) { @@ -1601,6 +1607,10 @@ void DIArgList::handleChangedOperand(void *Ref, Metadata *New) { VM = ValueAsMetadata::get(UndefValue::get(VM->getValue()->getType())); } } + if (Uniq) { + if (uniquify() != this) + storeDistinctInContext(); + } track(); } void DIArgList::track() { diff --git a/llvm/lib/IR/LLVMContextImpl.cpp b/llvm/lib/IR/LLVMContextImpl.cpp index 99819602c545..85ac63eaa1aa 100644 --- a/llvm/lib/IR/LLVMContextImpl.cpp +++ b/llvm/lib/IR/LLVMContextImpl.cpp @@ -55,8 +55,15 @@ LLVMContextImpl::~LLVMContextImpl() { // Drop references for MDNodes. Do this before Values get deleted to avoid // unnecessary RAUW when nodes are still unresolved. - for (auto *I : DistinctMDNodes) + for (auto *I : DistinctMDNodes) { + // We may have DIArgList that were uniqued, and as it has a custom + // implementation of dropAllReferences, it needs to be explicitly invoked. + if (auto *AL = dyn_cast<DIArgList>(I)) { + AL->dropAllReferences(); + continue; + } I->dropAllReferences(); + } #define HANDLE_MDNODE_LEAF_UNIQUABLE(CLASS) \ for (auto *I : CLASS##s) \ I->dropAllReferences(); diff --git a/llvm/lib/IR/LLVMContextImpl.h b/llvm/lib/IR/LLVMContextImpl.h index 2ae23fdc95a8..655319eb1c99 100644 --- a/llvm/lib/IR/LLVMContextImpl.h +++ b/llvm/lib/IR/LLVMContextImpl.h @@ -391,8 +391,9 @@ template <> struct MDNodeKeyImpl<DIEnumerator> { IsUnsigned(N->isUnsigned()) {} bool isKeyOf(const DIEnumerator *RHS) const { - return APInt::isSameValue(Value, RHS->getValue()) && - IsUnsigned == RHS->isUnsigned() && Name == RHS->getRawName(); + return Value.getBitWidth() == RHS->getValue().getBitWidth() && + Value == RHS->getValue() && IsUnsigned == RHS->isUnsigned() && + Name == RHS->getRawName(); } unsigned getHashValue() const { return hash_combine(Value, Name); } diff --git a/llvm/lib/MC/MCParser/ELFAsmParser.cpp b/llvm/lib/MC/MCParser/ELFAsmParser.cpp index 70d69fc8dd32..6a9a174a1b6a 100644 --- a/llvm/lib/MC/MCParser/ELFAsmParser.cpp +++ b/llvm/lib/MC/MCParser/ELFAsmParser.cpp @@ -502,6 +502,23 @@ static bool hasPrefix(StringRef SectionName, StringRef Prefix) { return SectionName.startswith(Prefix) || SectionName == Prefix.drop_back(); } +static bool allowSectionTypeMismatch(const Triple &TT, StringRef SectionName, + unsigned Type) { + if (TT.getArch() == Triple::x86_64) { + // x86-64 psABI names SHT_X86_64_UNWIND as the canonical type for .eh_frame, + // but GNU as emits SHT_PROGBITS .eh_frame for .cfi_* directives. Don't + // error for SHT_PROGBITS .eh_frame + return SectionName == ".eh_frame" && Type == ELF::SHT_PROGBITS; + } + if (TT.isMIPS()) { + // MIPS .debug_* sections should have SHT_MIPS_DWARF section type to + // distinguish among sections contain DWARF and ECOFF debug formats, + // but in assembly files these sections have SHT_PROGBITS type. + return hasPrefix(SectionName, ".debug_") && Type == ELF::SHT_PROGBITS; + } + return false; +} + bool ELFAsmParser::ParseSectionArguments(bool IsPush, SMLoc loc) { StringRef SectionName; @@ -659,16 +676,14 @@ EndStmt: getContext().getELFSection(SectionName, Type, Flags, Size, GroupName, IsComdat, UniqueID, LinkedToSym); getStreamer().SwitchSection(Section, Subsection); - // x86-64 psABI names SHT_X86_64_UNWIND as the canonical type for .eh_frame, - // but GNU as emits SHT_PROGBITS .eh_frame for .cfi_* directives. Don't error - // for SHT_PROGBITS .eh_frame - if (Section->getType() != Type && - !(SectionName == ".eh_frame" && Type == ELF::SHT_PROGBITS)) - Error(loc, "changed section type for " + SectionName + ", expected: 0x" + - utohexstr(Section->getType())); // Check that flags are used consistently. However, the GNU assembler permits // to leave out in subsequent uses of the same sections; for compatibility, // do likewise. + if (!TypeName.empty() && Section->getType() != Type && + !allowSectionTypeMismatch(getContext().getTargetTriple(), SectionName, + Type)) + Error(loc, "changed section type for " + SectionName + ", expected: 0x" + + utohexstr(Section->getType())); if ((extraFlags || Size || !TypeName.empty()) && Section->getFlags() != Flags) Error(loc, "changed section flags for " + SectionName + ", expected: 0x" + utohexstr(Section->getFlags())); diff --git a/llvm/lib/Support/Parallel.cpp b/llvm/lib/Support/Parallel.cpp index 9a2e1003da5a..71e3a1362f7e 100644 --- a/llvm/lib/Support/Parallel.cpp +++ b/llvm/lib/Support/Parallel.cpp @@ -151,7 +151,12 @@ static std::atomic<int> TaskGroupInstances; // lock, only allow the first TaskGroup to run tasks parallelly. In the scenario // of nested parallel_for_each(), only the outermost one runs parallelly. TaskGroup::TaskGroup() : Parallel(TaskGroupInstances++ == 0) {} -TaskGroup::~TaskGroup() { --TaskGroupInstances; } +TaskGroup::~TaskGroup() { + // We must ensure that all the workloads have finished before decrementing the + // instances count. + L.sync(); + --TaskGroupInstances; +} void TaskGroup::spawn(std::function<void()> F) { if (Parallel) { diff --git a/llvm/lib/Support/Unix/Memory.inc b/llvm/lib/Support/Unix/Memory.inc index be88e7db1400..b83477e0e4cc 100644 --- a/llvm/lib/Support/Unix/Memory.inc +++ b/llvm/lib/Support/Unix/Memory.inc @@ -29,14 +29,6 @@ #include <zircon/syscalls.h> #endif -#if defined(__mips__) -# if defined(__OpenBSD__) -# include <mips64/sysarch.h> -# elif !defined(__FreeBSD__) -# include <sys/cachectl.h> -# endif -#endif - #if defined(__APPLE__) extern "C" void sys_icache_invalidate(const void *Addr, size_t len); #else diff --git a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp index b2eee2845ba9..5bbf4f97c54a 100644 --- a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp +++ b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp @@ -1049,6 +1049,7 @@ bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB, case AArch64::MOVaddrEXT: { // Expand into ADRP + ADD. Register DstReg = MI.getOperand(0).getReg(); + assert(DstReg != AArch64::XZR); MachineInstrBuilder MIB1 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADRP), DstReg) .add(MI.getOperand(1)); diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp index 091a62aa4ada..f29bb83c2d2e 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -6923,6 +6923,8 @@ bool AArch64InstrInfo::isFunctionSafeToOutlineFrom( bool AArch64InstrInfo::isMBBSafeToOutlineFrom(MachineBasicBlock &MBB, unsigned &Flags) const { + if (!TargetInstrInfo::isMBBSafeToOutlineFrom(MBB, Flags)) + return false; // Check if LR is available through all of the MBB. If it's not, then set // a flag. assert(MBB.getParent()->getRegInfo().tracksLiveness() && diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index 12744e4de09b..f3da6bf057c2 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -673,40 +673,40 @@ let isReMaterializable = 1, isCodeGenOnly = 1 in { // removed, along with the AArch64Wrapper node. let AddedComplexity = 10 in -def LOADgot : Pseudo<(outs GPR64:$dst), (ins i64imm:$addr), - [(set GPR64:$dst, (AArch64LOADgot tglobaladdr:$addr))]>, +def LOADgot : Pseudo<(outs GPR64common:$dst), (ins i64imm:$addr), + [(set GPR64common:$dst, (AArch64LOADgot tglobaladdr:$addr))]>, Sched<[WriteLDAdr]>; // The MOVaddr instruction should match only when the add is not folded // into a load or store address. def MOVaddr - : Pseudo<(outs GPR64:$dst), (ins i64imm:$hi, i64imm:$low), - [(set GPR64:$dst, (AArch64addlow (AArch64adrp tglobaladdr:$hi), + : Pseudo<(outs GPR64common:$dst), (ins i64imm:$hi, i64imm:$low), + [(set GPR64common:$dst, (AArch64addlow (AArch64adrp tglobaladdr:$hi), tglobaladdr:$low))]>, Sched<[WriteAdrAdr]>; def MOVaddrJT - : Pseudo<(outs GPR64:$dst), (ins i64imm:$hi, i64imm:$low), - [(set GPR64:$dst, (AArch64addlow (AArch64adrp tjumptable:$hi), + : Pseudo<(outs GPR64common:$dst), (ins i64imm:$hi, i64imm:$low), + [(set GPR64common:$dst, (AArch64addlow (AArch64adrp tjumptable:$hi), tjumptable:$low))]>, Sched<[WriteAdrAdr]>; def MOVaddrCP - : Pseudo<(outs GPR64:$dst), (ins i64imm:$hi, i64imm:$low), - [(set GPR64:$dst, (AArch64addlow (AArch64adrp tconstpool:$hi), + : Pseudo<(outs GPR64common:$dst), (ins i64imm:$hi, i64imm:$low), + [(set GPR64common:$dst, (AArch64addlow (AArch64adrp tconstpool:$hi), tconstpool:$low))]>, Sched<[WriteAdrAdr]>; def MOVaddrBA - : Pseudo<(outs GPR64:$dst), (ins i64imm:$hi, i64imm:$low), - [(set GPR64:$dst, (AArch64addlow (AArch64adrp tblockaddress:$hi), + : Pseudo<(outs GPR64common:$dst), (ins i64imm:$hi, i64imm:$low), + [(set GPR64common:$dst, (AArch64addlow (AArch64adrp tblockaddress:$hi), tblockaddress:$low))]>, Sched<[WriteAdrAdr]>; def MOVaddrTLS - : Pseudo<(outs GPR64:$dst), (ins i64imm:$hi, i64imm:$low), - [(set GPR64:$dst, (AArch64addlow (AArch64adrp tglobaltlsaddr:$hi), + : Pseudo<(outs GPR64common:$dst), (ins i64imm:$hi, i64imm:$low), + [(set GPR64common:$dst, (AArch64addlow (AArch64adrp tglobaltlsaddr:$hi), tglobaltlsaddr:$low))]>, Sched<[WriteAdrAdr]>; def MOVaddrEXT - : Pseudo<(outs GPR64:$dst), (ins i64imm:$hi, i64imm:$low), - [(set GPR64:$dst, (AArch64addlow (AArch64adrp texternalsym:$hi), + : Pseudo<(outs GPR64common:$dst), (ins i64imm:$hi, i64imm:$low), + [(set GPR64common:$dst, (AArch64addlow (AArch64adrp texternalsym:$hi), texternalsym:$low))]>, Sched<[WriteAdrAdr]>; // Normally AArch64addlow either gets folded into a following ldr/str, @@ -714,8 +714,8 @@ def MOVaddrEXT // might appear without either of them, so allow lowering it into a plain // add. def ADDlowTLS - : Pseudo<(outs GPR64:$dst), (ins GPR64:$src, i64imm:$low), - [(set GPR64:$dst, (AArch64addlow GPR64:$src, + : Pseudo<(outs GPR64sp:$dst), (ins GPR64sp:$src, i64imm:$low), + [(set GPR64sp:$dst, (AArch64addlow GPR64sp:$src, tglobaltlsaddr:$low))]>, Sched<[WriteAdr]>; diff --git a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp index 8c34027f7bb3..94a0ce09afed 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp @@ -13,6 +13,8 @@ #include "AArch64RegisterBankInfo.h" #include "AArch64InstrInfo.h" +#include "AArch64RegisterInfo.h" +#include "MCTargetDesc/AArch64MCTargetDesc.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/GlobalISel/RegisterBank.h" @@ -271,6 +273,7 @@ AArch64RegisterBankInfo::getRegBankFromRegClass(const TargetRegisterClass &RC, case AArch64::WSeqPairsClassRegClassID: case AArch64::XSeqPairsClassRegClassID: case AArch64::MatrixIndexGPR32_12_15RegClassID: + case AArch64::GPR64_with_sub_32_in_MatrixIndexGPR32_12_15RegClassID: return getRegBank(AArch64::GPRRegBankID); case AArch64::CCRRegClassID: return getRegBank(AArch64::CCRegBankID); diff --git a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp index 493c1ad87f93..d6dd807afbce 100644 --- a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp +++ b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp @@ -2048,7 +2048,7 @@ SILoadStoreOptimizer::collectMergeableInsts( // adjacent to each other in the list, which will make it easier to find // matches. MergeList.sort( - [] (const CombineInfo &A, CombineInfo &B) { + [] (const CombineInfo &A, const CombineInfo &B) { return A.Offset < B.Offset; }); ++I; diff --git a/llvm/lib/Target/ARM/ARMCallLowering.cpp b/llvm/lib/Target/ARM/ARMCallLowering.cpp index aff7ec8d2ed6..256a95b94f6c 100644 --- a/llvm/lib/Target/ARM/ARMCallLowering.cpp +++ b/llvm/lib/Target/ARM/ARMCallLowering.cpp @@ -525,7 +525,7 @@ bool ARMCallLowering::lowerCall(MachineIRBuilder &MIRBuilder, CallLoweringInfo & MIRBuilder.buildInstr(ARM::ADJCALLSTACKUP) .addImm(ArgAssigner.StackOffset) - .addImm(0) + .addImm(-1ULL) .add(predOps(ARMCC::AL)); return true; diff --git a/llvm/lib/Target/ARM/ARMFastISel.cpp b/llvm/lib/Target/ARM/ARMFastISel.cpp index 28a076edd6dc..9224c2221f4d 100644 --- a/llvm/lib/Target/ARM/ARMFastISel.cpp +++ b/llvm/lib/Target/ARM/ARMFastISel.cpp @@ -2022,7 +2022,7 @@ bool ARMFastISel::FinishCall(MVT RetVT, SmallVectorImpl<Register> &UsedRegs, unsigned AdjStackUp = TII.getCallFrameDestroyOpcode(); AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackUp)) - .addImm(NumBytes).addImm(0)); + .addImm(NumBytes).addImm(-1ULL)); // Now the return value. if (RetVT != MVT::isVoid) { diff --git a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp index 9c7055deaaf8..7c238a1099d8 100644 --- a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -79,6 +79,10 @@ public: void Select(SDNode *N) override; + /// Return true as some complex patterns, like those that call + /// canExtractShiftFromMul can modify the DAG inplace. + bool ComplexPatternFuncMutatesDAG() const override { return true; } + bool hasNoVMLxHazardUse(SDNode *N) const; bool isShifterOpProfitable(const SDValue &Shift, ARM_AM::ShiftOpc ShOpcVal, unsigned ShAmt); diff --git a/llvm/lib/Target/ARM/ARMInstrThumb.td b/llvm/lib/Target/ARM/ARMInstrThumb.td index ef07b2839bc9..4c9b8b5fbfa9 100644 --- a/llvm/lib/Target/ARM/ARMInstrThumb.td +++ b/llvm/lib/Target/ARM/ARMInstrThumb.td @@ -1520,6 +1520,7 @@ def tTBH_JT : tPseudoInst<(outs), let isCall = 1, Defs = [R0, R12, LR, CPSR], Uses = [SP] in def tTPsoft : tPseudoInst<(outs), (ins), 4, IIC_Br, [(set R0, ARMthread_pointer)]>, + Requires<[IsThumb, IsReadTPSoft]>, Sched<[WriteBr]>; //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/ARM/ARMInstrThumb2.td b/llvm/lib/Target/ARM/ARMInstrThumb2.td index e7eed2a0bbb1..f8b226b84e98 100644 --- a/llvm/lib/Target/ARM/ARMInstrThumb2.td +++ b/llvm/lib/Target/ARM/ARMInstrThumb2.td @@ -4670,6 +4670,9 @@ def t2CDP2 : T2Cop<0b1111, (outs), (ins p_imm:$cop, imm0_15:$opc1, } +// Reading thread pointer from coprocessor register +def : T2Pat<(ARMthread_pointer), (t2MRC 15, 0, 13, 0, 3)>, + Requires<[IsThumb2, IsReadTPHard]>; //===----------------------------------------------------------------------===// // ARMv8.1 Privilege Access Never extension diff --git a/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp b/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp index 7be5fc33a0af..04a835f08855 100644 --- a/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp +++ b/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp @@ -1027,12 +1027,13 @@ bool MipsSEDAGToDAGISel::trySelect(SDNode *Node) { } SDNode *Rdhwr = - CurDAG->getMachineNode(RdhwrOpc, DL, Node->getValueType(0), + CurDAG->getMachineNode(RdhwrOpc, DL, Node->getValueType(0), MVT::Glue, CurDAG->getRegister(Mips::HWR29, MVT::i32), CurDAG->getTargetConstant(0, DL, MVT::i32)); SDValue Chain = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL, DestReg, - SDValue(Rdhwr, 0)); - SDValue ResNode = CurDAG->getCopyFromReg(Chain, DL, DestReg, PtrVT); + SDValue(Rdhwr, 0), SDValue(Rdhwr, 1)); + SDValue ResNode = CurDAG->getCopyFromReg(Chain, DL, DestReg, PtrVT, + Chain.getValue(1)); ReplaceNode(Node, ResNode.getNode()); return true; } diff --git a/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp b/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp index 7631bb4bccfb..392de0f251a2 100644 --- a/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp +++ b/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp @@ -1576,6 +1576,16 @@ bool PPCAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, std::swap(Operands[2], Operands[1]); } + // Handle base mnemonic for atomic loads where the EH bit is zero. + if (Name == "lqarx" || Name == "ldarx" || Name == "lwarx" || + Name == "lharx" || Name == "lbarx") { + if (Operands.size() != 5) + return false; + PPCOperand &EHOp = (PPCOperand &)*Operands[4]; + if (EHOp.isU1Imm() && EHOp.getImm() == 0) + Operands.pop_back(); + } + return false; } @@ -1745,7 +1755,7 @@ unsigned PPCAsmParser::validateTargetOperandClass(MCParsedAsmOperand &AsmOp, } PPCOperand &Op = static_cast<PPCOperand &>(AsmOp); - if (Op.isImm() && Op.getImm() == ImmVal) + if (Op.isU3Imm() && Op.getImm() == ImmVal) return Match_Success; return Match_InvalidOperand; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp index 207101763ac2..7dab7a52ac53 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp @@ -999,7 +999,7 @@ bool RISCVInstrInfo::isFunctionSafeToOutlineFrom( bool RISCVInstrInfo::isMBBSafeToOutlineFrom(MachineBasicBlock &MBB, unsigned &Flags) const { // More accurate safety checking is done in getOutliningCandidateInfo. - return true; + return TargetInstrInfo::isMBBSafeToOutlineFrom(MBB, Flags); } // Enum values indicating how an outlined call should be constructed. diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.td b/llvm/lib/Target/SystemZ/SystemZInstrInfo.td index 7df7cc93d6eb..53495489cef8 100644 --- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.td +++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.td @@ -2173,7 +2173,7 @@ let hasSideEffects = 1 in { def EX : SideEffectBinaryRX<"ex", 0x44, ADDR64>; def EXRL : SideEffectBinaryRILPC<"exrl", 0xC60, ADDR64>; let hasNoSchedulingInfo = 1 in - def EXRL_Pseudo : Pseudo<(outs), (ins i64imm:$TargetOpc, ADDR64:$lenMinus1, + def EXRL_Pseudo : Alias<6, (outs), (ins i64imm:$TargetOpc, ADDR64:$lenMinus1, bdaddr12only:$bdl1, bdaddr12only:$bd2), []>; } diff --git a/llvm/lib/Target/X86/X86ExpandPseudo.cpp b/llvm/lib/Target/X86/X86ExpandPseudo.cpp index 4add8d30e010..65ffe6621545 100644 --- a/llvm/lib/Target/X86/X86ExpandPseudo.cpp +++ b/llvm/lib/Target/X86/X86ExpandPseudo.cpp @@ -657,35 +657,24 @@ void X86ExpandPseudo::ExpandVastartSaveXmmRegs( EntryBlk->end()); TailBlk->transferSuccessorsAndUpdatePHIs(EntryBlk); - int64_t FrameIndex = VAStartPseudoInstr->getOperand(1).getImm(); - Register BaseReg; - uint64_t FrameOffset = - X86FL->getFrameIndexReference(*Func, FrameIndex, BaseReg).getFixed(); - uint64_t VarArgsRegsOffset = VAStartPseudoInstr->getOperand(2).getImm(); + uint64_t FrameOffset = VAStartPseudoInstr->getOperand(4).getImm(); + uint64_t VarArgsRegsOffset = VAStartPseudoInstr->getOperand(6).getImm(); // TODO: add support for YMM and ZMM here. unsigned MOVOpc = STI->hasAVX() ? X86::VMOVAPSmr : X86::MOVAPSmr; // In the XMM save block, save all the XMM argument registers. - for (int64_t OpndIdx = 3, RegIdx = 0; + for (int64_t OpndIdx = 7, RegIdx = 0; OpndIdx < VAStartPseudoInstr->getNumOperands() - 1; OpndIdx++, RegIdx++) { - - int64_t Offset = FrameOffset + VarArgsRegsOffset + RegIdx * 16; - - MachineMemOperand *MMO = Func->getMachineMemOperand( - MachinePointerInfo::getFixedStack(*Func, FrameIndex, Offset), - MachineMemOperand::MOStore, - /*Size=*/16, Align(16)); - - BuildMI(GuardedRegsBlk, DL, TII->get(MOVOpc)) - .addReg(BaseReg) - .addImm(/*Scale=*/1) - .addReg(/*IndexReg=*/0) - .addImm(/*Disp=*/Offset) - .addReg(/*Segment=*/0) - .addReg(VAStartPseudoInstr->getOperand(OpndIdx).getReg()) - .addMemOperand(MMO); + auto NewMI = BuildMI(GuardedRegsBlk, DL, TII->get(MOVOpc)); + for (int i = 0; i < X86::AddrNumOperands; ++i) { + if (i == X86::AddrDisp) + NewMI.addImm(FrameOffset + VarArgsRegsOffset + RegIdx * 16); + else + NewMI.add(VAStartPseudoInstr->getOperand(i + 1)); + } + NewMI.addReg(VAStartPseudoInstr->getOperand(OpndIdx).getReg()); assert(Register::isPhysicalRegister( VAStartPseudoInstr->getOperand(OpndIdx).getReg())); } diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 032db2a80a77..4b13b5b540b6 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -3533,13 +3533,19 @@ void VarArgsLoweringHelper::createVarArgAreaAndStoreRegisters( SmallVector<SDValue, 12> SaveXMMOps; SaveXMMOps.push_back(Chain); SaveXMMOps.push_back(ALVal); - SaveXMMOps.push_back( - DAG.getTargetConstant(FuncInfo->getRegSaveFrameIndex(), DL, MVT::i32)); + SaveXMMOps.push_back(RSFIN); SaveXMMOps.push_back( DAG.getTargetConstant(FuncInfo->getVarArgsFPOffset(), DL, MVT::i32)); llvm::append_range(SaveXMMOps, LiveXMMRegs); - MemOps.push_back(DAG.getNode(X86ISD::VASTART_SAVE_XMM_REGS, DL, - MVT::Other, SaveXMMOps)); + MachineMemOperand *StoreMMO = + DAG.getMachineFunction().getMachineMemOperand( + MachinePointerInfo::getFixedStack( + DAG.getMachineFunction(), FuncInfo->getRegSaveFrameIndex(), + Offset), + MachineMemOperand::MOStore, 128, Align(16)); + MemOps.push_back(DAG.getMemIntrinsicNode(X86ISD::VASTART_SAVE_XMM_REGS, + DL, DAG.getVTList(MVT::Other), + SaveXMMOps, MVT::i8, StoreMMO)); } if (!MemOps.empty()) @@ -44070,32 +44076,9 @@ static SDValue combineVectorHADDSUB(SDNode *N, SelectionDAG &DAG, "Unexpected horizontal add/sub opcode"); if (!shouldUseHorizontalOp(true, DAG, Subtarget)) { - // For slow-hop targets, if we have a hop with a single op, see if we already - // have another user that we can reuse and shuffle the result. MVT VT = N->getSimpleValueType(0); SDValue LHS = N->getOperand(0); SDValue RHS = N->getOperand(1); - if (VT.is128BitVector() && LHS == RHS) { - for (SDNode *User : LHS->uses()) { - if (User != N && User->getOpcode() == N->getOpcode()) { - MVT ShufVT = VT.isFloatingPoint() ? MVT::v4f32 : MVT::v4i32; - if (User->getOperand(0) == LHS && !User->getOperand(1).isUndef()) { - return DAG.getBitcast( - VT, - DAG.getVectorShuffle(ShufVT, SDLoc(N), - DAG.getBitcast(ShufVT, SDValue(User, 0)), - DAG.getUNDEF(ShufVT), {0, 1, 0, 1})); - } - if (User->getOperand(1) == LHS && !User->getOperand(0).isUndef()) { - return DAG.getBitcast( - VT, - DAG.getVectorShuffle(ShufVT, SDLoc(N), - DAG.getBitcast(ShufVT, SDValue(User, 0)), - DAG.getUNDEF(ShufVT), {2, 3, 2, 3})); - } - } - } - } // HOP(HOP'(X,X),HOP'(Y,Y)) -> HOP(PERMUTE(HOP'(X,Y)),PERMUTE(HOP'(X,Y)). if (LHS != RHS && LHS.getOpcode() == N->getOpcode() && diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h index 869857bcc0d6..8b18b5981e86 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -627,10 +627,6 @@ namespace llvm { // packed single precision. DPBF16PS, - // Save xmm argument registers to the stack, according to %al. An operator - // is needed so that this can be expanded with control flow. - VASTART_SAVE_XMM_REGS, - // Windows's _chkstk call to do stack probing. WIN_ALLOCA, @@ -848,6 +844,10 @@ namespace llvm { AESENCWIDE256KL, AESDECWIDE256KL, + // Save xmm argument registers to the stack, according to %al. An operator + // is needed so that this can be expanded with control flow. + VASTART_SAVE_XMM_REGS, + // WARNING: Do not add anything in the end unless you want the node to // have memop! In fact, starting from FIRST_TARGET_MEMORY_OPCODE all // opcodes will be thought as target memory ops! diff --git a/llvm/lib/Target/X86/X86InstrCompiler.td b/llvm/lib/Target/X86/X86InstrCompiler.td index 202d320cd731..aa14c8016a83 100644 --- a/llvm/lib/Target/X86/X86InstrCompiler.td +++ b/llvm/lib/Target/X86/X86InstrCompiler.td @@ -69,16 +69,12 @@ def : Pat<(X86callseq_start timm:$amt1, timm:$amt2), let SchedRW = [WriteSystem] in { // x86-64 va_start lowering magic. -let hasSideEffects = 1, Defs = [EFLAGS] in { +let hasSideEffects = 1, mayStore = 1, Defs = [EFLAGS] in { def VASTART_SAVE_XMM_REGS : I<0, Pseudo, (outs), - (ins GR8:$al, - i32imm:$regsavefi, i32imm:$offset, - variable_ops), - "#VASTART_SAVE_XMM_REGS $al, $regsavefi, $offset", - [(X86vastart_save_xmm_regs GR8:$al, - timm:$regsavefi, - timm:$offset), + (ins GR8:$al, i8mem:$regsavefi, variable_ops), + "#VASTART_SAVE_XMM_REGS $al, $regsavefi", + [(X86vastart_save_xmm_regs GR8:$al, addr:$regsavefi), (implicit EFLAGS)]>; } diff --git a/llvm/lib/Target/X86/X86InstrInfo.td b/llvm/lib/Target/X86/X86InstrInfo.td index 34afedb5bad2..489ea7fb127a 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.td +++ b/llvm/lib/Target/X86/X86InstrInfo.td @@ -91,8 +91,7 @@ def SDT_X86Call : SDTypeProfile<0, -1, [SDTCisVT<0, iPTR>]>; def SDT_X86NtBrind : SDTypeProfile<0, -1, [SDTCisVT<0, iPTR>]>; def SDT_X86VASTART_SAVE_XMM_REGS : SDTypeProfile<0, -1, [SDTCisVT<0, i8>, - SDTCisVT<1, iPTR>, - SDTCisVT<2, iPTR>]>; + SDTCisPtrTy<1>]>; def SDT_X86VAARG : SDTypeProfile<1, -1, [SDTCisPtrTy<0>, SDTCisPtrTy<1>, @@ -184,7 +183,7 @@ def X86iret : SDNode<"X86ISD::IRET", SDTX86Ret, def X86vastart_save_xmm_regs : SDNode<"X86ISD::VASTART_SAVE_XMM_REGS", SDT_X86VASTART_SAVE_XMM_REGS, - [SDNPHasChain, SDNPVariadic]>; + [SDNPHasChain, SDNPMayStore, SDNPMemOperand, SDNPVariadic]>; def X86vaarg64 : SDNode<"X86ISD::VAARG_64", SDT_X86VAARG, [SDNPHasChain, SDNPMayLoad, SDNPMayStore, diff --git a/llvm/lib/Target/X86/X86InstrSystem.td b/llvm/lib/Target/X86/X86InstrSystem.td index 48c27051a872..355ddf26e3bb 100644 --- a/llvm/lib/Target/X86/X86InstrSystem.td +++ b/llvm/lib/Target/X86/X86InstrSystem.td @@ -583,7 +583,7 @@ def XRSTORS64 : RI<0xC7, MRM3m, (outs), (ins opaquemem:$dst), //===----------------------------------------------------------------------===// // VIA PadLock crypto instructions let Defs = [RAX, RDI], Uses = [RDX, RDI], SchedRW = [WriteSystem] in - def XSTORE : I<0xa7, MRM_C0, (outs), (ins), "xstore", []>, TB, REP; + def XSTORE : I<0xa7, MRM_C0, (outs), (ins), "xstore", []>, TB; def : InstAlias<"xstorerng", (XSTORE)>; diff --git a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp index b6932dbbfc3f..fc83befe3950 100644 --- a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp +++ b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp @@ -29,6 +29,7 @@ #include "llvm/Analysis/CFG.h" #include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/CallGraphSCCPass.h" +#include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/LazyCallGraph.h" #include "llvm/IR/Argument.h" #include "llvm/IR/Attributes.h" @@ -1174,6 +1175,15 @@ scanPHIsAndUpdateValueMap(Instruction *Prev, BasicBlock *NewBlock, static bool simplifyTerminatorLeadingToRet(Instruction *InitialInst) { DenseMap<Value *, Value *> ResolvedValues; BasicBlock *UnconditionalSucc = nullptr; + assert(InitialInst->getModule()); + const DataLayout &DL = InitialInst->getModule()->getDataLayout(); + + auto TryResolveConstant = [&ResolvedValues](Value *V) { + auto It = ResolvedValues.find(V); + if (It != ResolvedValues.end()) + V = It->second; + return dyn_cast<ConstantInt>(V); + }; Instruction *I = InitialInst; while (I->isTerminator() || @@ -1190,47 +1200,65 @@ static bool simplifyTerminatorLeadingToRet(Instruction *InitialInst) { } if (auto *BR = dyn_cast<BranchInst>(I)) { if (BR->isUnconditional()) { - BasicBlock *BB = BR->getSuccessor(0); + BasicBlock *Succ = BR->getSuccessor(0); if (I == InitialInst) - UnconditionalSucc = BB; - scanPHIsAndUpdateValueMap(I, BB, ResolvedValues); - I = BB->getFirstNonPHIOrDbgOrLifetime(); + UnconditionalSucc = Succ; + scanPHIsAndUpdateValueMap(I, Succ, ResolvedValues); + I = Succ->getFirstNonPHIOrDbgOrLifetime(); + continue; + } + + BasicBlock *BB = BR->getParent(); + // Handle the case the condition of the conditional branch is constant. + // e.g., + // + // br i1 false, label %cleanup, label %CoroEnd + // + // It is possible during the transformation. We could continue the + // simplifying in this case. + if (ConstantFoldTerminator(BB, /*DeleteDeadConditions=*/true)) { + // Handle this branch in next iteration. + I = BB->getTerminator(); continue; } } else if (auto *CondCmp = dyn_cast<CmpInst>(I)) { + // If the case number of suspended switch instruction is reduced to + // 1, then it is simplified to CmpInst in llvm::ConstantFoldTerminator. auto *BR = dyn_cast<BranchInst>(I->getNextNode()); - if (BR && BR->isConditional() && CondCmp == BR->getCondition()) { - // If the case number of suspended switch instruction is reduced to - // 1, then it is simplified to CmpInst in llvm::ConstantFoldTerminator. - // And the comparsion looks like : %cond = icmp eq i8 %V, constant. - ConstantInt *CondConst = dyn_cast<ConstantInt>(CondCmp->getOperand(1)); - if (CondConst && CondCmp->getPredicate() == CmpInst::ICMP_EQ) { - Value *V = CondCmp->getOperand(0); - auto it = ResolvedValues.find(V); - if (it != ResolvedValues.end()) - V = it->second; - - if (ConstantInt *Cond0 = dyn_cast<ConstantInt>(V)) { - BasicBlock *BB = Cond0->equalsInt(CondConst->getZExtValue()) - ? BR->getSuccessor(0) - : BR->getSuccessor(1); - scanPHIsAndUpdateValueMap(I, BB, ResolvedValues); - I = BB->getFirstNonPHIOrDbgOrLifetime(); - continue; - } - } - } + if (!BR || !BR->isConditional() || CondCmp != BR->getCondition()) + return false; + + // And the comparsion looks like : %cond = icmp eq i8 %V, constant. + // So we try to resolve constant for the first operand only since the + // second operand should be literal constant by design. + ConstantInt *Cond0 = TryResolveConstant(CondCmp->getOperand(0)); + auto *Cond1 = dyn_cast<ConstantInt>(CondCmp->getOperand(1)); + if (!Cond0 || !Cond1) + return false; + + // Both operands of the CmpInst are Constant. So that we could evaluate + // it immediately to get the destination. + auto *ConstResult = + dyn_cast_or_null<ConstantInt>(ConstantFoldCompareInstOperands( + CondCmp->getPredicate(), Cond0, Cond1, DL)); + if (!ConstResult) + return false; + + CondCmp->replaceAllUsesWith(ConstResult); + CondCmp->eraseFromParent(); + + // Handle this branch in next iteration. + I = BR; + continue; } else if (auto *SI = dyn_cast<SwitchInst>(I)) { - Value *V = SI->getCondition(); - auto it = ResolvedValues.find(V); - if (it != ResolvedValues.end()) - V = it->second; - if (ConstantInt *Cond = dyn_cast<ConstantInt>(V)) { - BasicBlock *BB = SI->findCaseValue(Cond)->getCaseSuccessor(); - scanPHIsAndUpdateValueMap(I, BB, ResolvedValues); - I = BB->getFirstNonPHIOrDbgOrLifetime(); - continue; - } + ConstantInt *Cond = TryResolveConstant(SI->getCondition()); + if (!Cond) + return false; + + BasicBlock *BB = SI->findCaseValue(Cond)->getCaseSuccessor(); + scanPHIsAndUpdateValueMap(I, BB, ResolvedValues); + I = BB->getFirstNonPHIOrDbgOrLifetime(); + continue; } return false; } diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp index 4e3b18e805ee..71b3a411cc18 100644 --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -2843,6 +2843,26 @@ static Instruction *tryToMoveFreeBeforeNullTest(CallInst &FI, } assert(FreeInstrBB->size() == 1 && "Only the branch instruction should remain"); + + // Now that we've moved the call to free before the NULL check, we have to + // remove any attributes on its parameter that imply it's non-null, because + // those attributes might have only been valid because of the NULL check, and + // we can get miscompiles if we keep them. This is conservative if non-null is + // also implied by something other than the NULL check, but it's guaranteed to + // be correct, and the conservativeness won't matter in practice, since the + // attributes are irrelevant for the call to free itself and the pointer + // shouldn't be used after the call. + AttributeList Attrs = FI.getAttributes(); + Attrs = Attrs.removeParamAttribute(FI.getContext(), 0, Attribute::NonNull); + Attribute Dereferenceable = Attrs.getParamAttr(0, Attribute::Dereferenceable); + if (Dereferenceable.isValid()) { + uint64_t Bytes = Dereferenceable.getDereferenceableBytes(); + Attrs = Attrs.removeParamAttribute(FI.getContext(), 0, + Attribute::Dereferenceable); + Attrs = Attrs.addDereferenceableOrNullParamAttr(FI.getContext(), 0, Bytes); + } + FI.setAttributes(Attrs); + return &FI; } diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp index d22b3f409585..9d8130d1ac02 100644 --- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -1303,17 +1303,10 @@ struct DSEState { /// loop. In particular, this guarantees that it only references a single /// MemoryLocation during execution of the containing function. bool isGuaranteedLoopInvariant(const Value *Ptr) { - auto IsGuaranteedLoopInvariantBase = [this](const Value *Ptr) { + auto IsGuaranteedLoopInvariantBase = [](const Value *Ptr) { Ptr = Ptr->stripPointerCasts(); - if (auto *I = dyn_cast<Instruction>(Ptr)) { - if (isa<AllocaInst>(Ptr)) - return true; - - if (isAllocLikeFn(I, &TLI)) - return true; - - return false; - } + if (auto *I = dyn_cast<Instruction>(Ptr)) + return I->getParent()->isEntryBlock(); return true; }; diff --git a/llvm/lib/Transforms/Scalar/MergeICmps.cpp b/llvm/lib/Transforms/Scalar/MergeICmps.cpp index f13f24ad2027..a04d4ef3c086 100644 --- a/llvm/lib/Transforms/Scalar/MergeICmps.cpp +++ b/llvm/lib/Transforms/Scalar/MergeICmps.cpp @@ -154,6 +154,10 @@ BCEAtom visitICmpLoadOperand(Value *const Val, BaseIdentifier &BaseId) { return {}; } Value *const Addr = LoadI->getOperand(0); + if (Addr->getType()->getPointerAddressSpace() != 0) { + LLVM_DEBUG(dbgs() << "from non-zero AddressSpace\n"); + return {}; + } auto *const GEP = dyn_cast<GetElementPtrInst>(Addr); if (!GEP) return {}; diff --git a/llvm/lib/Transforms/Utils/RelLookupTableConverter.cpp b/llvm/lib/Transforms/Utils/RelLookupTableConverter.cpp index 85e5adaeaf5e..3127432dc6c9 100644 --- a/llvm/lib/Transforms/Utils/RelLookupTableConverter.cpp +++ b/llvm/lib/Transforms/Utils/RelLookupTableConverter.cpp @@ -144,6 +144,10 @@ static void convertToRelLookupTable(GlobalVariable &LookupTable) { Value *Offset = Builder.CreateShl(Index, ConstantInt::get(IntTy, 2), "reltable.shift"); + // Insert the call to load.relative instrinsic before LOAD. + // GEP might not be immediately followed by a LOAD, like it can be hoisted + // outside the loop or another instruction might be inserted them in between. + Builder.SetInsertPoint(Load); Function *LoadRelIntrinsic = llvm::Intrinsic::getDeclaration( &M, Intrinsic::load_relative, {Index->getType()}); Value *Base = Builder.CreateBitCast(RelLookupTable, Builder.getInt8PtrTy()); diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index cc3f5c7d4b48..1d06bc7d79a7 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -5430,8 +5430,11 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { // The pointer operand uses an in-tree scalar so we add the new BitCast // to ExternalUses list to make sure that an extract will be generated // in the future. - if (getTreeEntry(PO)) - ExternalUses.emplace_back(PO, cast<User>(VecPtr), 0); + if (TreeEntry *Entry = getTreeEntry(PO)) { + // Find which lane we need to extract. + unsigned FoundLane = Entry->findLaneForValue(PO); + ExternalUses.emplace_back(PO, cast<User>(VecPtr), FoundLane); + } NewLI = Builder.CreateAlignedLoad(VecTy, VecPtr, LI->getAlign()); } else { @@ -5474,8 +5477,12 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { // The pointer operand uses an in-tree scalar, so add the new BitCast to // ExternalUses to make sure that an extract will be generated in the // future. - if (getTreeEntry(ScalarPtr)) - ExternalUses.push_back(ExternalUser(ScalarPtr, cast<User>(VecPtr), 0)); + if (TreeEntry *Entry = getTreeEntry(ScalarPtr)) { + // Find which lane we need to extract. + unsigned FoundLane = Entry->findLaneForValue(ScalarPtr); + ExternalUses.push_back( + ExternalUser(ScalarPtr, cast<User>(VecPtr), FoundLane)); + } Value *V = propagateMetadata(ST, E->Scalars); @@ -5577,8 +5584,14 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { // The scalar argument uses an in-tree scalar so we add the new vectorized // call to ExternalUses list to make sure that an extract will be // generated in the future. - if (ScalarArg && getTreeEntry(ScalarArg)) - ExternalUses.push_back(ExternalUser(ScalarArg, cast<User>(V), 0)); + if (ScalarArg) { + if (TreeEntry *Entry = getTreeEntry(ScalarArg)) { + // Find which lane we need to extract. + unsigned FoundLane = Entry->findLaneForValue(ScalarArg); + ExternalUses.push_back( + ExternalUser(ScalarArg, cast<User>(V), FoundLane)); + } + } propagateIRFlags(V, E->Scalars, VL0); ShuffleBuilder.addMask(E->ReuseShuffleIndices); |