diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2021-09-09 09:01:17 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2021-09-09 09:01:17 +0000 |
commit | f9ef3ff6e2ee04c09e09e68d6ffcafc094485c1e (patch) | |
tree | 84051c2e3cc48297e330441dd7215326d26599a0 | |
parent | d545c2ce5ad1891282e8818b47ffe557c76a86b4 (diff) |
Vendor import of llvm-project branch release/13.x llvmorg-13.0.0-rc2-43-gf56129fe78d5.vendor/llvm-project/llvmorg-13.0.0-rc2-43-gf56129fe78d5
61 files changed, 762 insertions, 357 deletions
diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp index fdba204fbe7f..0e163f3161a3 100644 --- a/clang/lib/AST/ASTContext.cpp +++ b/clang/lib/AST/ASTContext.cpp @@ -9653,11 +9653,19 @@ static QualType mergeEnumWithInteger(ASTContext &Context, const EnumType *ET, QualType ASTContext::mergeTypes(QualType LHS, QualType RHS, bool OfBlockPointer, bool Unqualified, bool BlockReturnType) { + // For C++ we will not reach this code with reference types (see below), + // for OpenMP variant call overloading we might. + // // C++ [expr]: If an expression initially has the type "reference to T", the // type is adjusted to "T" prior to any further analysis, the expression // designates the object or function denoted by the reference, and the // expression is an lvalue unless the reference is an rvalue reference and // the expression is a function call (possibly inside parentheses). + if (LangOpts.OpenMP && LHS->getAs<ReferenceType>() && + RHS->getAs<ReferenceType>() && LHS->getTypeClass() == RHS->getTypeClass()) + return mergeTypes(LHS->getAs<ReferenceType>()->getPointeeType(), + RHS->getAs<ReferenceType>()->getPointeeType(), + OfBlockPointer, Unqualified, BlockReturnType); if (LHS->getAs<ReferenceType>() || RHS->getAs<ReferenceType>()) return {}; diff --git a/clang/lib/Basic/Targets/M68k.cpp b/clang/lib/Basic/Targets/M68k.cpp index 31cb36d37636..c0cd8fa90ed6 100644 --- a/clang/lib/Basic/Targets/M68k.cpp +++ b/clang/lib/Basic/Targets/M68k.cpp @@ -37,8 +37,8 @@ M68kTargetInfo::M68kTargetInfo(const llvm::Triple &Triple, // FIXME how to wire it with the used object format? Layout += "-m:e"; - // M68k pointers are always 32 bit wide even for 16 bit cpus - Layout += "-p:32:32"; + // M68k pointers are always 32 bit wide even for 16-bit CPUs + Layout += "-p:32:16:32"; // M68k integer data types Layout += "-i8:8:8-i16:16:16-i32:16:32"; diff --git a/clang/lib/Basic/Targets/OSTargets.h b/clang/lib/Basic/Targets/OSTargets.h index e24fb5cf082d..3fe39ed64d9c 100644 --- a/clang/lib/Basic/Targets/OSTargets.h +++ b/clang/lib/Basic/Targets/OSTargets.h @@ -460,6 +460,11 @@ protected: Builder.defineMacro("_REENTRANT"); if (this->HasFloat128) Builder.defineMacro("__FLOAT128__"); + + if (Opts.C11) { + Builder.defineMacro("__STDC_NO_ATOMICS__"); + Builder.defineMacro("__STDC_NO_THREADS__"); + } } public: diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index 5c323cb6ea23..94a7553e273b 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -5568,7 +5568,6 @@ llvm::StringRef clang::driver::getDriverMode(StringRef ProgName, if (!Arg.startswith(OptName)) continue; Opt = Arg; - break; } if (Opt.empty()) Opt = ToolChain::getTargetAndModeFromProgramName(ProgName).DriverMode; diff --git a/clang/lib/Driver/ToolChains/AMDGPU.cpp b/clang/lib/Driver/ToolChains/AMDGPU.cpp index d63c5e12c4af..4a7413112b55 100644 --- a/clang/lib/Driver/ToolChains/AMDGPU.cpp +++ b/clang/lib/Driver/ToolChains/AMDGPU.cpp @@ -893,3 +893,38 @@ bool AMDGPUToolChain::shouldSkipArgument(const llvm::opt::Arg *A) const { return true; return false; } + +llvm::SmallVector<std::string, 12> +ROCMToolChain::getCommonDeviceLibNames(const llvm::opt::ArgList &DriverArgs, + const std::string &GPUArch) const { + auto Kind = llvm::AMDGPU::parseArchAMDGCN(GPUArch); + const StringRef CanonArch = llvm::AMDGPU::getArchNameAMDGCN(Kind); + + std::string LibDeviceFile = RocmInstallation.getLibDeviceFile(CanonArch); + if (LibDeviceFile.empty()) { + getDriver().Diag(diag::err_drv_no_rocm_device_lib) << 1 << GPUArch; + return {}; + } + + // If --hip-device-lib is not set, add the default bitcode libraries. + // TODO: There are way too many flags that change this. Do we need to check + // them all? + bool DAZ = DriverArgs.hasFlag(options::OPT_fgpu_flush_denormals_to_zero, + options::OPT_fno_gpu_flush_denormals_to_zero, + getDefaultDenormsAreZeroForTarget(Kind)); + bool FiniteOnly = DriverArgs.hasFlag( + options::OPT_ffinite_math_only, options::OPT_fno_finite_math_only, false); + bool UnsafeMathOpt = + DriverArgs.hasFlag(options::OPT_funsafe_math_optimizations, + options::OPT_fno_unsafe_math_optimizations, false); + bool FastRelaxedMath = DriverArgs.hasFlag(options::OPT_ffast_math, + options::OPT_fno_fast_math, false); + bool CorrectSqrt = DriverArgs.hasFlag( + options::OPT_fhip_fp32_correctly_rounded_divide_sqrt, + options::OPT_fno_hip_fp32_correctly_rounded_divide_sqrt); + bool Wave64 = isWave64(DriverArgs, Kind); + + return RocmInstallation.getCommonBitcodeLibs( + DriverArgs, LibDeviceFile, Wave64, DAZ, FiniteOnly, UnsafeMathOpt, + FastRelaxedMath, CorrectSqrt); +}
\ No newline at end of file diff --git a/clang/lib/Driver/ToolChains/AMDGPU.h b/clang/lib/Driver/ToolChains/AMDGPU.h index 50ed3b3ded9a..a4bcf315ca76 100644 --- a/clang/lib/Driver/ToolChains/AMDGPU.h +++ b/clang/lib/Driver/ToolChains/AMDGPU.h @@ -136,6 +136,11 @@ public: addClangTargetOptions(const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args, Action::OffloadKind DeviceOffloadKind) const override; + + // Returns a list of device library names shared by different languages + llvm::SmallVector<std::string, 12> + getCommonDeviceLibNames(const llvm::opt::ArgList &DriverArgs, + const std::string &GPUArch) const; }; } // end namespace toolchains diff --git a/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp b/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp index fe1d19c2dd67..135e3694434d 100644 --- a/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp +++ b/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp @@ -9,12 +9,14 @@ #include "AMDGPUOpenMP.h" #include "AMDGPU.h" #include "CommonArgs.h" +#include "ToolChains/ROCm.h" #include "clang/Basic/DiagnosticDriver.h" #include "clang/Driver/Compilation.h" #include "clang/Driver/Driver.h" #include "clang/Driver/DriverDiagnostic.h" #include "clang/Driver/InputInfo.h" #include "clang/Driver/Options.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/FormatAdapters.h" #include "llvm/Support/FormatVariadic.h" @@ -84,14 +86,34 @@ static bool checkSystemForAMDGPU(const ArgList &Args, const AMDGPUToolChain &TC, } // namespace const char *AMDGCN::OpenMPLinker::constructLLVMLinkCommand( - Compilation &C, const JobAction &JA, const InputInfoList &Inputs, - const ArgList &Args, StringRef SubArchName, - StringRef OutputFilePrefix) const { + const toolchains::AMDGPUOpenMPToolChain &AMDGPUOpenMPTC, Compilation &C, + const JobAction &JA, const InputInfoList &Inputs, const ArgList &Args, + StringRef SubArchName, StringRef OutputFilePrefix) const { ArgStringList CmdArgs; for (const auto &II : Inputs) if (II.isFilename()) CmdArgs.push_back(II.getFilename()); + + if (Args.hasArg(options::OPT_l)) { + auto Lm = Args.getAllArgValues(options::OPT_l); + bool HasLibm = false; + for (auto &Lib : Lm) { + if (Lib == "m") { + HasLibm = true; + break; + } + } + + if (HasLibm) { + SmallVector<std::string, 12> BCLibs = + AMDGPUOpenMPTC.getCommonDeviceLibNames(Args, SubArchName.str()); + llvm::for_each(BCLibs, [&](StringRef BCFile) { + CmdArgs.push_back(Args.MakeArgString(BCFile)); + }); + } + } + // Add an intermediate output file. CmdArgs.push_back("-o"); const char *OutputFileName = @@ -180,8 +202,8 @@ void AMDGCN::OpenMPLinker::ConstructJob(Compilation &C, const JobAction &JA, assert(Prefix.length() && "no linker inputs are files "); // Each command outputs different files. - const char *LLVMLinkCommand = - constructLLVMLinkCommand(C, JA, Inputs, Args, GPUArch, Prefix); + const char *LLVMLinkCommand = constructLLVMLinkCommand( + AMDGPUOpenMPTC, C, JA, Inputs, Args, GPUArch, Prefix); // Produce readable assembly if save-temps is enabled. if (C.getDriver().isSaveTempsEnabled()) diff --git a/clang/lib/Driver/ToolChains/AMDGPUOpenMP.h b/clang/lib/Driver/ToolChains/AMDGPUOpenMP.h index effca7e212cc..233256bf7378 100644 --- a/clang/lib/Driver/ToolChains/AMDGPUOpenMP.h +++ b/clang/lib/Driver/ToolChains/AMDGPUOpenMP.h @@ -16,6 +16,10 @@ namespace clang { namespace driver { +namespace toolchains { +class AMDGPUOpenMPToolChain; +} + namespace tools { namespace AMDGCN { @@ -35,11 +39,11 @@ public: private: /// \return llvm-link output file name. - const char *constructLLVMLinkCommand(Compilation &C, const JobAction &JA, - const InputInfoList &Inputs, - const llvm::opt::ArgList &Args, - llvm::StringRef SubArchName, - llvm::StringRef OutputFilePrefix) const; + const char *constructLLVMLinkCommand( + const toolchains::AMDGPUOpenMPToolChain &AMDGPUOpenMPTC, Compilation &C, + const JobAction &JA, const InputInfoList &Inputs, + const llvm::opt::ArgList &Args, llvm::StringRef SubArchName, + llvm::StringRef OutputFilePrefix) const; /// \return llc output file name. const char *constructLlcCommand(Compilation &C, const JobAction &JA, diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 4c8ba8cdcd29..58ae08a3879c 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -1255,7 +1255,8 @@ void Clang::AddPreprocessingOptions(Compilation &C, const JobAction &JA, // If we are offloading to a target via OpenMP we need to include the // openmp_wrappers folder which contains alternative system headers. if (JA.isDeviceOffloading(Action::OFK_OpenMP) && - getToolChain().getTriple().isNVPTX()){ + (getToolChain().getTriple().isNVPTX() || + getToolChain().getTriple().isAMDGCN())) { if (!Args.hasArg(options::OPT_nobuiltininc)) { // Add openmp_wrappers/* to our system include path. This lets us wrap // standard library headers. diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp index 83cab3ac00cb..0ffe95795381 100644 --- a/clang/lib/Driver/ToolChains/CommonArgs.cpp +++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp @@ -775,7 +775,8 @@ void tools::linkSanitizerRuntimeDeps(const ToolChain &TC, CmdArgs.push_back("-ldl"); // Required for backtrace on some OSes if (TC.getTriple().isOSFreeBSD() || - TC.getTriple().isOSNetBSD()) + TC.getTriple().isOSNetBSD() || + TC.getTriple().isOSOpenBSD()) CmdArgs.push_back("-lexecinfo"); } diff --git a/clang/lib/Driver/ToolChains/HIP.cpp b/clang/lib/Driver/ToolChains/HIP.cpp index 59d58aadb687..c4e840de86e1 100644 --- a/clang/lib/Driver/ToolChains/HIP.cpp +++ b/clang/lib/Driver/ToolChains/HIP.cpp @@ -395,35 +395,8 @@ HIPToolChain::getHIPDeviceLibs(const llvm::opt::ArgList &DriverArgs) const { } StringRef GpuArch = getGPUArch(DriverArgs); assert(!GpuArch.empty() && "Must have an explicit GPU arch."); - (void)GpuArch; - auto Kind = llvm::AMDGPU::parseArchAMDGCN(GpuArch); - const StringRef CanonArch = llvm::AMDGPU::getArchNameAMDGCN(Kind); - - std::string LibDeviceFile = RocmInstallation.getLibDeviceFile(CanonArch); - if (LibDeviceFile.empty()) { - getDriver().Diag(diag::err_drv_no_rocm_device_lib) << 1 << GpuArch; - return {}; - } // If --hip-device-lib is not set, add the default bitcode libraries. - // TODO: There are way too many flags that change this. Do we need to check - // them all? - bool DAZ = DriverArgs.hasFlag(options::OPT_fgpu_flush_denormals_to_zero, - options::OPT_fno_gpu_flush_denormals_to_zero, - getDefaultDenormsAreZeroForTarget(Kind)); - bool FiniteOnly = - DriverArgs.hasFlag(options::OPT_ffinite_math_only, - options::OPT_fno_finite_math_only, false); - bool UnsafeMathOpt = - DriverArgs.hasFlag(options::OPT_funsafe_math_optimizations, - options::OPT_fno_unsafe_math_optimizations, false); - bool FastRelaxedMath = DriverArgs.hasFlag( - options::OPT_ffast_math, options::OPT_fno_fast_math, false); - bool CorrectSqrt = DriverArgs.hasFlag( - options::OPT_fhip_fp32_correctly_rounded_divide_sqrt, - options::OPT_fno_hip_fp32_correctly_rounded_divide_sqrt); - bool Wave64 = isWave64(DriverArgs, Kind); - if (DriverArgs.hasFlag(options::OPT_fgpu_sanitize, options::OPT_fno_gpu_sanitize, false)) { auto AsanRTL = RocmInstallation.getAsanRTLPath(); @@ -442,10 +415,8 @@ HIPToolChain::getHIPDeviceLibs(const llvm::opt::ArgList &DriverArgs) const { // Add the HIP specific bitcode library. BCLibs.push_back(RocmInstallation.getHIPPath().str()); - // Add the generic set of libraries. - BCLibs.append(RocmInstallation.getCommonBitcodeLibs( - DriverArgs, LibDeviceFile, Wave64, DAZ, FiniteOnly, UnsafeMathOpt, - FastRelaxedMath, CorrectSqrt)); + // Add common device libraries like ocml etc. + BCLibs.append(getCommonDeviceLibNames(DriverArgs, GpuArch.str())); // Add instrument lib. auto InstLib = diff --git a/clang/lib/Driver/ToolChains/OpenBSD.cpp b/clang/lib/Driver/ToolChains/OpenBSD.cpp index e162165b2561..89828fbb6f5f 100644 --- a/clang/lib/Driver/ToolChains/OpenBSD.cpp +++ b/clang/lib/Driver/ToolChains/OpenBSD.cpp @@ -174,6 +174,11 @@ void openbsd::Linker::ConstructJob(Compilation &C, const JobAction &JA, AddLinkerInputs(ToolChain, Inputs, Args, CmdArgs, JA); if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs)) { + // Use the static OpenMP runtime with -static-openmp + bool StaticOpenMP = Args.hasArg(options::OPT_static_openmp) && + !Args.hasArg(options::OPT_static); + addOpenMPRuntime(CmdArgs, ToolChain, Args, StaticOpenMP); + if (D.CCCIsCXX()) { if (ToolChain.ShouldLinkCXXStdlib(Args)) ToolChain.AddCXXStdlibLibArgs(Args, CmdArgs); @@ -221,6 +226,8 @@ void openbsd::Linker::ConstructJob(Compilation &C, const JobAction &JA, CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath(crtend))); } + ToolChain.addProfileRTLibs(Args, CmdArgs); + const char *Exec = Args.MakeArgString(ToolChain.GetLinkerPath()); C.addCommand(std::make_unique<Command>(JA, *this, ResponseFileSupport::AtFileCurCP(), diff --git a/clang/lib/Headers/__clang_cuda_device_functions.h b/clang/lib/Headers/__clang_cuda_device_functions.h index f801e5426aa4..cc4e1a4dd96a 100644 --- a/clang/lib/Headers/__clang_cuda_device_functions.h +++ b/clang/lib/Headers/__clang_cuda_device_functions.h @@ -34,10 +34,12 @@ __DEVICE__ unsigned long long __brevll(unsigned long long __a) { return __nv_brevll(__a); } #if defined(__cplusplus) -__DEVICE__ void __brkpt() { asm volatile("brkpt;"); } +__DEVICE__ void __brkpt() { __asm__ __volatile__("brkpt;"); } __DEVICE__ void __brkpt(int __a) { __brkpt(); } #else -__DEVICE__ void __attribute__((overloadable)) __brkpt(void) { asm volatile("brkpt;"); } +__DEVICE__ void __attribute__((overloadable)) __brkpt(void) { + __asm__ __volatile__("brkpt;"); +} __DEVICE__ void __attribute__((overloadable)) __brkpt(int __a) { __brkpt(); } #endif __DEVICE__ unsigned int __byte_perm(unsigned int __a, unsigned int __b, @@ -507,7 +509,7 @@ __DEVICE__ float __powf(float __a, float __b) { } // Parameter must have a known integer value. -#define __prof_trigger(__a) asm __volatile__("pmevent \t%0;" ::"i"(__a)) +#define __prof_trigger(__a) __asm__ __volatile__("pmevent \t%0;" ::"i"(__a)) __DEVICE__ int __rhadd(int __a, int __b) { return __nv_rhadd(__a, __b); } __DEVICE__ unsigned int __sad(int __a, int __b, unsigned int __c) { return __nv_sad(__a, __b, __c); @@ -526,7 +528,7 @@ __DEVICE__ float __tanf(float __a) { return __nv_fast_tanf(__a); } __DEVICE__ void __threadfence(void) { __nvvm_membar_gl(); } __DEVICE__ void __threadfence_block(void) { __nvvm_membar_cta(); }; __DEVICE__ void __threadfence_system(void) { __nvvm_membar_sys(); }; -__DEVICE__ void __trap(void) { asm volatile("trap;"); } +__DEVICE__ void __trap(void) { __asm__ __volatile__("trap;"); } __DEVICE__ unsigned int __uAtomicAdd(unsigned int *__p, unsigned int __v) { return __nvvm_atom_add_gen_i((int *)__p, __v); } @@ -1051,122 +1053,136 @@ __DEVICE__ unsigned int __bool2mask(unsigned int __a, int shift) { } __DEVICE__ unsigned int __vabs2(unsigned int __a) { unsigned int r; - asm("vabsdiff2.s32.s32.s32 %0,%1,%2,%3;" - : "=r"(r) - : "r"(__a), "r"(0), "r"(0)); + __asm__("vabsdiff2.s32.s32.s32 %0,%1,%2,%3;" + : "=r"(r) + : "r"(__a), "r"(0), "r"(0)); return r; } __DEVICE__ unsigned int __vabs4(unsigned int __a) { unsigned int r; - asm("vabsdiff4.s32.s32.s32 %0,%1,%2,%3;" - : "=r"(r) - : "r"(__a), "r"(0), "r"(0)); + __asm__("vabsdiff4.s32.s32.s32 %0,%1,%2,%3;" + : "=r"(r) + : "r"(__a), "r"(0), "r"(0)); return r; } __DEVICE__ unsigned int __vabsdiffs2(unsigned int __a, unsigned int __b) { unsigned int r; - asm("vabsdiff2.s32.s32.s32 %0,%1,%2,%3;" - : "=r"(r) - : "r"(__a), "r"(__b), "r"(0)); + __asm__("vabsdiff2.s32.s32.s32 %0,%1,%2,%3;" + : "=r"(r) + : "r"(__a), "r"(__b), "r"(0)); return r; } __DEVICE__ unsigned int __vabsdiffs4(unsigned int __a, unsigned int __b) { unsigned int r; - asm("vabsdiff4.s32.s32.s32 %0,%1,%2,%3;" - : "=r"(r) - : "r"(__a), "r"(__b), "r"(0)); + __asm__("vabsdiff4.s32.s32.s32 %0,%1,%2,%3;" + : "=r"(r) + : "r"(__a), "r"(__b), "r"(0)); return r; } __DEVICE__ unsigned int __vabsdiffu2(unsigned int __a, unsigned int __b) { unsigned int r; - asm("vabsdiff2.u32.u32.u32 %0,%1,%2,%3;" - : "=r"(r) - : "r"(__a), "r"(__b), "r"(0)); + __asm__("vabsdiff2.u32.u32.u32 %0,%1,%2,%3;" + : "=r"(r) + : "r"(__a), "r"(__b), "r"(0)); return r; } __DEVICE__ unsigned int __vabsdiffu4(unsigned int __a, unsigned int __b) { unsigned int r; - asm("vabsdiff4.u32.u32.u32 %0,%1,%2,%3;" - : "=r"(r) - : "r"(__a), "r"(__b), "r"(0)); + __asm__("vabsdiff4.u32.u32.u32 %0,%1,%2,%3;" + : "=r"(r) + : "r"(__a), "r"(__b), "r"(0)); return r; } __DEVICE__ unsigned int __vabsss2(unsigned int __a) { unsigned int r; - asm("vabsdiff2.s32.s32.s32.sat %0,%1,%2,%3;" - : "=r"(r) - : "r"(__a), "r"(0), "r"(0)); + __asm__("vabsdiff2.s32.s32.s32.sat %0,%1,%2,%3;" + : "=r"(r) + : "r"(__a), "r"(0), "r"(0)); return r; } __DEVICE__ unsigned int __vabsss4(unsigned int __a) { unsigned int r; - asm("vabsdiff4.s32.s32.s32.sat %0,%1,%2,%3;" - : "=r"(r) - : "r"(__a), "r"(0), "r"(0)); + __asm__("vabsdiff4.s32.s32.s32.sat %0,%1,%2,%3;" + : "=r"(r) + : "r"(__a), "r"(0), "r"(0)); return r; } __DEVICE__ unsigned int __vadd2(unsigned int __a, unsigned int __b) { unsigned int r; - asm("vadd2.u32.u32.u32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); + __asm__("vadd2.u32.u32.u32 %0,%1,%2,%3;" + : "=r"(r) + : "r"(__a), "r"(__b), "r"(0)); return r; } __DEVICE__ unsigned int __vadd4(unsigned int __a, unsigned int __b) { unsigned int r; - asm("vadd4.u32.u32.u32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); + __asm__("vadd4.u32.u32.u32 %0,%1,%2,%3;" + : "=r"(r) + : "r"(__a), "r"(__b), "r"(0)); return r; } __DEVICE__ unsigned int __vaddss2(unsigned int __a, unsigned int __b) { unsigned int r; - asm("vadd2.s32.s32.s32.sat %0,%1,%2,%3;" - : "=r"(r) - : "r"(__a), "r"(__b), "r"(0)); + __asm__("vadd2.s32.s32.s32.sat %0,%1,%2,%3;" + : "=r"(r) + : "r"(__a), "r"(__b), "r"(0)); return r; } __DEVICE__ unsigned int __vaddss4(unsigned int __a, unsigned int __b) { unsigned int r; - asm("vadd4.s32.s32.s32.sat %0,%1,%2,%3;" - : "=r"(r) - : "r"(__a), "r"(__b), "r"(0)); + __asm__("vadd4.s32.s32.s32.sat %0,%1,%2,%3;" + : "=r"(r) + : "r"(__a), "r"(__b), "r"(0)); return r; } __DEVICE__ unsigned int __vaddus2(unsigned int __a, unsigned int __b) { unsigned int r; - asm("vadd2.u32.u32.u32.sat %0,%1,%2,%3;" - : "=r"(r) - : "r"(__a), "r"(__b), "r"(0)); + __asm__("vadd2.u32.u32.u32.sat %0,%1,%2,%3;" + : "=r"(r) + : "r"(__a), "r"(__b), "r"(0)); return r; } __DEVICE__ unsigned int __vaddus4(unsigned int __a, unsigned int __b) { unsigned int r; - asm("vadd4.u32.u32.u32.sat %0,%1,%2,%3;" - : "=r"(r) - : "r"(__a), "r"(__b), "r"(0)); + __asm__("vadd4.u32.u32.u32.sat %0,%1,%2,%3;" + : "=r"(r) + : "r"(__a), "r"(__b), "r"(0)); return r; } __DEVICE__ unsigned int __vavgs2(unsigned int __a, unsigned int __b) { unsigned int r; - asm("vavrg2.s32.s32.s32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); + __asm__("vavrg2.s32.s32.s32 %0,%1,%2,%3;" + : "=r"(r) + : "r"(__a), "r"(__b), "r"(0)); return r; } __DEVICE__ unsigned int __vavgs4(unsigned int __a, unsigned int __b) { unsigned int r; - asm("vavrg4.s32.s32.s32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); + __asm__("vavrg4.s32.s32.s32 %0,%1,%2,%3;" + : "=r"(r) + : "r"(__a), "r"(__b), "r"(0)); return r; } __DEVICE__ unsigned int __vavgu2(unsigned int __a, unsigned int __b) { unsigned int r; - asm("vavrg2.u32.u32.u32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); + __asm__("vavrg2.u32.u32.u32 %0,%1,%2,%3;" + : "=r"(r) + : "r"(__a), "r"(__b), "r"(0)); return r; } __DEVICE__ unsigned int __vavgu4(unsigned int __a, unsigned int __b) { unsigned int r; - asm("vavrg4.u32.u32.u32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); + __asm__("vavrg4.u32.u32.u32 %0,%1,%2,%3;" + : "=r"(r) + : "r"(__a), "r"(__b), "r"(0)); return r; } __DEVICE__ unsigned int __vseteq2(unsigned int __a, unsigned int __b) { unsigned int r; - asm("vset2.u32.u32.eq %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); + __asm__("vset2.u32.u32.eq %0,%1,%2,%3;" + : "=r"(r) + : "r"(__a), "r"(__b), "r"(0)); return r; } __DEVICE__ unsigned int __vcmpeq2(unsigned int __a, unsigned int __b) { @@ -1174,7 +1190,9 @@ __DEVICE__ unsigned int __vcmpeq2(unsigned int __a, unsigned int __b) { } __DEVICE__ unsigned int __vseteq4(unsigned int __a, unsigned int __b) { unsigned int r; - asm("vset4.u32.u32.eq %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); + __asm__("vset4.u32.u32.eq %0,%1,%2,%3;" + : "=r"(r) + : "r"(__a), "r"(__b), "r"(0)); return r; } __DEVICE__ unsigned int __vcmpeq4(unsigned int __a, unsigned int __b) { @@ -1182,7 +1200,9 @@ __DEVICE__ unsigned int __vcmpeq4(unsigned int __a, unsigned int __b) { } __DEVICE__ unsigned int __vsetges2(unsigned int __a, unsigned int __b) { unsigned int r; - asm("vset2.s32.s32.ge %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); + __asm__("vset2.s32.s32.ge %0,%1,%2,%3;" + : "=r"(r) + : "r"(__a), "r"(__b), "r"(0)); return r; } __DEVICE__ unsigned int __vcmpges2(unsigned int __a, unsigned int __b) { @@ -1190,7 +1210,9 @@ __DEVICE__ unsigned int __vcmpges2(unsigned int __a, unsigned int __b) { } __DEVICE__ unsigned int __vsetges4(unsigned int __a, unsigned int __b) { unsigned int r; - asm("vset4.s32.s32.ge %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); + __asm__("vset4.s32.s32.ge %0,%1,%2,%3;" + : "=r"(r) + : "r"(__a), "r"(__b), "r"(0)); return r; } __DEVICE__ unsigned int __vcmpges4(unsigned int __a, unsigned int __b) { @@ -1198,7 +1220,9 @@ __DEVICE__ unsigned int __vcmpges4(unsigned int __a, unsigned int __b) { } __DEVICE__ unsigned int __vsetgeu2(unsigned int __a, unsigned int __b) { unsigned int r; - asm("vset2.u32.u32.ge %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); + __asm__("vset2.u32.u32.ge %0,%1,%2,%3;" + : "=r"(r) + : "r"(__a), "r"(__b), "r"(0)); return r; } __DEVICE__ unsigned int __vcmpgeu2(unsigned int __a, unsigned int __b) { @@ -1206,7 +1230,9 @@ __DEVICE__ unsigned int __vcmpgeu2(unsigned int __a, unsigned int __b) { } __DEVICE__ unsigned int __vsetgeu4(unsigned int __a, unsigned int __b) { unsigned int r; - asm("vset4.u32.u32.ge %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); + __asm__("vset4.u32.u32.ge %0,%1,%2,%3;" + : "=r"(r) + : "r"(__a), "r"(__b), "r"(0)); return r; } __DEVICE__ unsigned int __vcmpgeu4(unsigned int __a, unsigned int __b) { @@ -1214,7 +1240,9 @@ __DEVICE__ unsigned int __vcmpgeu4(unsigned int __a, unsigned int __b) { } __DEVICE__ unsigned int __vsetgts2(unsigned int __a, unsigned int __b) { unsigned int r; - asm("vset2.s32.s32.gt %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); + __asm__("vset2.s32.s32.gt %0,%1,%2,%3;" + : "=r"(r) + : "r"(__a), "r"(__b), "r"(0)); return r; } __DEVICE__ unsigned int __vcmpgts2(unsigned int __a, unsigned int __b) { @@ -1222,7 +1250,9 @@ __DEVICE__ unsigned int __vcmpgts2(unsigned int __a, unsigned int __b) { } __DEVICE__ unsigned int __vsetgts4(unsigned int __a, unsigned int __b) { unsigned int r; - asm("vset4.s32.s32.gt %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); + __asm__("vset4.s32.s32.gt %0,%1,%2,%3;" + : "=r"(r) + : "r"(__a), "r"(__b), "r"(0)); return r; } __DEVICE__ unsigned int __vcmpgts4(unsigned int __a, unsigned int __b) { @@ -1230,7 +1260,9 @@ __DEVICE__ unsigned int __vcmpgts4(unsigned int __a, unsigned int __b) { } __DEVICE__ unsigned int __vsetgtu2(unsigned int __a, unsigned int __b) { unsigned int r; - asm("vset2.u32.u32.gt %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); + __asm__("vset2.u32.u32.gt %0,%1,%2,%3;" + : "=r"(r) + : "r"(__a), "r"(__b), "r"(0)); return r; } __DEVICE__ unsigned int __vcmpgtu2(unsigned int __a, unsigned int __b) { @@ -1238,7 +1270,9 @@ __DEVICE__ unsigned int __vcmpgtu2(unsigned int __a, unsigned int __b) { } __DEVICE__ unsigned int __vsetgtu4(unsigned int __a, unsigned int __b) { unsigned int r; - asm("vset4.u32.u32.gt %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); + __asm__("vset4.u32.u32.gt %0,%1,%2,%3;" + : "=r"(r) + : "r"(__a), "r"(__b), "r"(0)); return r; } __DEVICE__ unsigned int __vcmpgtu4(unsigned int __a, unsigned int __b) { @@ -1246,7 +1280,9 @@ __DEVICE__ unsigned int __vcmpgtu4(unsigned int __a, unsigned int __b) { } __DEVICE__ unsigned int __vsetles2(unsigned int __a, unsigned int __b) { unsigned int r; - asm("vset2.s32.s32.le %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); + __asm__("vset2.s32.s32.le %0,%1,%2,%3;" + : "=r"(r) + : "r"(__a), "r"(__b), "r"(0)); return r; } __DEVICE__ unsigned int __vcmples2(unsigned int __a, unsigned int __b) { @@ -1254,7 +1290,9 @@ __DEVICE__ unsigned int __vcmples2(unsigned int __a, unsigned int __b) { } __DEVICE__ unsigned int __vsetles4(unsigned int __a, unsigned int __b) { unsigned int r; - asm("vset4.s32.s32.le %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); + __asm__("vset4.s32.s32.le %0,%1,%2,%3;" + : "=r"(r) + : "r"(__a), "r"(__b), "r"(0)); return r; } __DEVICE__ unsigned int __vcmples4(unsigned int __a, unsigned int __b) { @@ -1262,7 +1300,9 @@ __DEVICE__ unsigned int __vcmples4(unsigned int __a, unsigned int __b) { } __DEVICE__ unsigned int __vsetleu2(unsigned int __a, unsigned int __b) { unsigned int r; - asm("vset2.u32.u32.le %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); + __asm__("vset2.u32.u32.le %0,%1,%2,%3;" + : "=r"(r) + : "r"(__a), "r"(__b), "r"(0)); return r; } __DEVICE__ unsigned int __vcmpleu2(unsigned int __a, unsigned int __b) { @@ -1270,7 +1310,9 @@ __DEVICE__ unsigned int __vcmpleu2(unsigned int __a, unsigned int __b) { } __DEVICE__ unsigned int __vsetleu4(unsigned int __a, unsigned int __b) { unsigned int r; - asm("vset4.u32.u32.le %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); + __asm__("vset4.u32.u32.le %0,%1,%2,%3;" + : "=r"(r) + : "r"(__a), "r"(__b), "r"(0)); return r; } __DEVICE__ unsigned int __vcmpleu4(unsigned int __a, unsigned int __b) { @@ -1278,7 +1320,9 @@ __DEVICE__ unsigned int __vcmpleu4(unsigned int __a, unsigned int __b) { } __DEVICE__ unsigned int __vsetlts2(unsigned int __a, unsigned int __b) { unsigned int r; - asm("vset2.s32.s32.lt %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); + __asm__("vset2.s32.s32.lt %0,%1,%2,%3;" + : "=r"(r) + : "r"(__a), "r"(__b), "r"(0)); return r; } __DEVICE__ unsigned int __vcmplts2(unsigned int __a, unsigned int __b) { @@ -1286,7 +1330,9 @@ __DEVICE__ unsigned int __vcmplts2(unsigned int __a, unsigned int __b) { } __DEVICE__ unsigned int __vsetlts4(unsigned int __a, unsigned int __b) { unsigned int r; - asm("vset4.s32.s32.lt %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); + __asm__("vset4.s32.s32.lt %0,%1,%2,%3;" + : "=r"(r) + : "r"(__a), "r"(__b), "r"(0)); return r; } __DEVICE__ unsigned int __vcmplts4(unsigned int __a, unsigned int __b) { @@ -1294,7 +1340,9 @@ __DEVICE__ unsigned int __vcmplts4(unsigned int __a, unsigned int __b) { } __DEVICE__ unsigned int __vsetltu2(unsigned int __a, unsigned int __b) { unsigned int r; - asm("vset2.u32.u32.lt %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); + __asm__("vset2.u32.u32.lt %0,%1,%2,%3;" + : "=r"(r) + : "r"(__a), "r"(__b), "r"(0)); return r; } __DEVICE__ unsigned int __vcmpltu2(unsigned int __a, unsigned int __b) { @@ -1302,7 +1350,9 @@ __DEVICE__ unsigned int __vcmpltu2(unsigned int __a, unsigned int __b) { } __DEVICE__ unsigned int __vsetltu4(unsigned int __a, unsigned int __b) { unsigned int r; - asm("vset4.u32.u32.lt %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); + __asm__("vset4.u32.u32.lt %0,%1,%2,%3;" + : "=r"(r) + : "r"(__a), "r"(__b), "r"(0)); return r; } __DEVICE__ unsigned int __vcmpltu4(unsigned int __a, unsigned int __b) { @@ -1310,7 +1360,9 @@ __DEVICE__ unsigned int __vcmpltu4(unsigned int __a, unsigned int __b) { } __DEVICE__ unsigned int __vsetne2(unsigned int __a, unsigned int __b) { unsigned int r; - asm("vset2.u32.u32.ne %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); + __asm__("vset2.u32.u32.ne %0,%1,%2,%3;" + : "=r"(r) + : "r"(__a), "r"(__b), "r"(0)); return r; } __DEVICE__ unsigned int __vcmpne2(unsigned int __a, unsigned int __b) { @@ -1318,7 +1370,9 @@ __DEVICE__ unsigned int __vcmpne2(unsigned int __a, unsigned int __b) { } __DEVICE__ unsigned int __vsetne4(unsigned int __a, unsigned int __b) { unsigned int r; - asm("vset4.u32.u32.ne %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); + __asm__("vset4.u32.u32.ne %0,%1,%2,%3;" + : "=r"(r) + : "r"(__a), "r"(__b), "r"(0)); return r; } __DEVICE__ unsigned int __vcmpne4(unsigned int __a, unsigned int __b) { @@ -1345,94 +1399,112 @@ __DEVICE__ unsigned int __vmaxs2(unsigned int __a, unsigned int __b) { unsigned mask = __vcmpgts2(__a, __b); r = (__a & mask) | (__b & ~mask); } else { - asm("vmax2.s32.s32.s32 %0,%1,%2,%3;" - : "=r"(r) - : "r"(__a), "r"(__b), "r"(0)); + __asm__("vmax2.s32.s32.s32 %0,%1,%2,%3;" + : "=r"(r) + : "r"(__a), "r"(__b), "r"(0)); } return r; } __DEVICE__ unsigned int __vmaxs4(unsigned int __a, unsigned int __b) { unsigned int r; - asm("vmax4.s32.s32.s32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); + __asm__("vmax4.s32.s32.s32 %0,%1,%2,%3;" + : "=r"(r) + : "r"(__a), "r"(__b), "r"(0)); return r; } __DEVICE__ unsigned int __vmaxu2(unsigned int __a, unsigned int __b) { unsigned int r; - asm("vmax2.u32.u32.u32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); + __asm__("vmax2.u32.u32.u32 %0,%1,%2,%3;" + : "=r"(r) + : "r"(__a), "r"(__b), "r"(0)); return r; } __DEVICE__ unsigned int __vmaxu4(unsigned int __a, unsigned int __b) { unsigned int r; - asm("vmax4.u32.u32.u32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); + __asm__("vmax4.u32.u32.u32 %0,%1,%2,%3;" + : "=r"(r) + : "r"(__a), "r"(__b), "r"(0)); return r; } __DEVICE__ unsigned int __vmins2(unsigned int __a, unsigned int __b) { unsigned int r; - asm("vmin2.s32.s32.s32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); + __asm__("vmin2.s32.s32.s32 %0,%1,%2,%3;" + : "=r"(r) + : "r"(__a), "r"(__b), "r"(0)); return r; } __DEVICE__ unsigned int __vmins4(unsigned int __a, unsigned int __b) { unsigned int r; - asm("vmin4.s32.s32.s32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); + __asm__("vmin4.s32.s32.s32 %0,%1,%2,%3;" + : "=r"(r) + : "r"(__a), "r"(__b), "r"(0)); return r; } __DEVICE__ unsigned int __vminu2(unsigned int __a, unsigned int __b) { unsigned int r; - asm("vmin2.u32.u32.u32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); + __asm__("vmin2.u32.u32.u32 %0,%1,%2,%3;" + : "=r"(r) + : "r"(__a), "r"(__b), "r"(0)); return r; } __DEVICE__ unsigned int __vminu4(unsigned int __a, unsigned int __b) { unsigned int r; - asm("vmin4.u32.u32.u32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); + __asm__("vmin4.u32.u32.u32 %0,%1,%2,%3;" + : "=r"(r) + : "r"(__a), "r"(__b), "r"(0)); return r; } __DEVICE__ unsigned int __vsads2(unsigned int __a, unsigned int __b) { unsigned int r; - asm("vabsdiff2.s32.s32.s32.add %0,%1,%2,%3;" - : "=r"(r) - : "r"(__a), "r"(__b), "r"(0)); + __asm__("vabsdiff2.s32.s32.s32.add %0,%1,%2,%3;" + : "=r"(r) + : "r"(__a), "r"(__b), "r"(0)); return r; } __DEVICE__ unsigned int __vsads4(unsigned int __a, unsigned int __b) { unsigned int r; - asm("vabsdiff4.s32.s32.s32.add %0,%1,%2,%3;" - : "=r"(r) - : "r"(__a), "r"(__b), "r"(0)); + __asm__("vabsdiff4.s32.s32.s32.add %0,%1,%2,%3;" + : "=r"(r) + : "r"(__a), "r"(__b), "r"(0)); return r; } __DEVICE__ unsigned int __vsadu2(unsigned int __a, unsigned int __b) { unsigned int r; - asm("vabsdiff2.u32.u32.u32.add %0,%1,%2,%3;" - : "=r"(r) - : "r"(__a), "r"(__b), "r"(0)); + __asm__("vabsdiff2.u32.u32.u32.add %0,%1,%2,%3;" + : "=r"(r) + : "r"(__a), "r"(__b), "r"(0)); return r; } __DEVICE__ unsigned int __vsadu4(unsigned int __a, unsigned int __b) { unsigned int r; - asm("vabsdiff4.u32.u32.u32.add %0,%1,%2,%3;" - : "=r"(r) - : "r"(__a), "r"(__b), "r"(0)); + __asm__("vabsdiff4.u32.u32.u32.add %0,%1,%2,%3;" + : "=r"(r) + : "r"(__a), "r"(__b), "r"(0)); return r; } __DEVICE__ unsigned int __vsub2(unsigned int __a, unsigned int __b) { unsigned int r; - asm("vsub2.u32.u32.u32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); + __asm__("vsub2.u32.u32.u32 %0,%1,%2,%3;" + : "=r"(r) + : "r"(__a), "r"(__b), "r"(0)); return r; } __DEVICE__ unsigned int __vneg2(unsigned int __a) { return __vsub2(0, __a); } __DEVICE__ unsigned int __vsub4(unsigned int __a, unsigned int __b) { unsigned int r; - asm("vsub4.u32.u32.u32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0)); + __asm__("vsub4.u32.u32.u32 %0,%1,%2,%3;" + : "=r"(r) + : "r"(__a), "r"(__b), "r"(0)); return r; } __DEVICE__ unsigned int __vneg4(unsigned int __a) { return __vsub4(0, __a); } __DEVICE__ unsigned int __vsubss2(unsigned int __a, unsigned int __b) { unsigned int r; - asm("vsub2.s32.s32.s32.sat %0,%1,%2,%3;" - : "=r"(r) - : "r"(__a), "r"(__b), "r"(0)); + __asm__("vsub2.s32.s32.s32.sat %0,%1,%2,%3;" + : "=r"(r) + : "r"(__a), "r"(__b), "r"(0)); return r; } __DEVICE__ unsigned int __vnegss2(unsigned int __a) { @@ -1440,9 +1512,9 @@ __DEVICE__ unsigned int __vnegss2(unsigned int __a) { } __DEVICE__ unsigned int __vsubss4(unsigned int __a, unsigned int __b) { unsigned int r; - asm("vsub4.s32.s32.s32.sat %0,%1,%2,%3;" - : "=r"(r) - : "r"(__a), "r"(__b), "r"(0)); + __asm__("vsub4.s32.s32.s32.sat %0,%1,%2,%3;" + : "=r"(r) + : "r"(__a), "r"(__b), "r"(0)); return r; } __DEVICE__ unsigned int __vnegss4(unsigned int __a) { @@ -1450,16 +1522,16 @@ __DEVICE__ unsigned int __vnegss4(unsigned int __a) { } __DEVICE__ unsigned int __vsubus2(unsigned int __a, unsigned int __b) { unsigned int r; - asm("vsub2.u32.u32.u32.sat %0,%1,%2,%3;" - : "=r"(r) - : "r"(__a), "r"(__b), "r"(0)); + __asm__("vsub2.u32.u32.u32.sat %0,%1,%2,%3;" + : "=r"(r) + : "r"(__a), "r"(__b), "r"(0)); return r; } __DEVICE__ unsigned int __vsubus4(unsigned int __a, unsigned int __b) { unsigned int r; - asm("vsub4.u32.u32.u32.sat %0,%1,%2,%3;" - : "=r"(r) - : "r"(__a), "r"(__b), "r"(0)); + __asm__("vsub4.u32.u32.u32.sat %0,%1,%2,%3;" + : "=r"(r) + : "r"(__a), "r"(__b), "r"(0)); return r; } #endif // CUDA_VERSION >= 9020 diff --git a/clang/lib/Headers/__clang_hip_cmath.h b/clang/lib/Headers/__clang_hip_cmath.h index 7342705434e6..d488db0a94d9 100644 --- a/clang/lib/Headers/__clang_hip_cmath.h +++ b/clang/lib/Headers/__clang_hip_cmath.h @@ -10,7 +10,7 @@ #ifndef __CLANG_HIP_CMATH_H__ #define __CLANG_HIP_CMATH_H__ -#if !defined(__HIP__) +#if !defined(__HIP__) && !defined(__OPENMP_AMDGCN__) #error "This file is for HIP and OpenMP AMDGCN device compilation only." #endif @@ -25,31 +25,43 @@ #endif // !defined(__HIPCC_RTC__) #pragma push_macro("__DEVICE__") +#pragma push_macro("__CONSTEXPR__") +#ifdef __OPENMP_AMDGCN__ +#define __DEVICE__ static __attribute__((always_inline, nothrow)) +#define __CONSTEXPR__ constexpr +#else #define __DEVICE__ static __device__ inline __attribute__((always_inline)) +#define __CONSTEXPR__ +#endif // __OPENMP_AMDGCN__ // Start with functions that cannot be defined by DEF macros below. #if defined(__cplusplus) -__DEVICE__ double abs(double __x) { return ::fabs(__x); } -__DEVICE__ float abs(float __x) { return ::fabsf(__x); } -__DEVICE__ long long abs(long long __n) { return ::llabs(__n); } -__DEVICE__ long abs(long __n) { return ::labs(__n); } -__DEVICE__ float fma(float __x, float __y, float __z) { +#if defined __OPENMP_AMDGCN__ +__DEVICE__ __CONSTEXPR__ float fabs(float __x) { return ::fabsf(__x); } +__DEVICE__ __CONSTEXPR__ float sin(float __x) { return ::sinf(__x); } +__DEVICE__ __CONSTEXPR__ float cos(float __x) { return ::cosf(__x); } +#endif +__DEVICE__ __CONSTEXPR__ double abs(double __x) { return ::fabs(__x); } +__DEVICE__ __CONSTEXPR__ float abs(float __x) { return ::fabsf(__x); } +__DEVICE__ __CONSTEXPR__ long long abs(long long __n) { return ::llabs(__n); } +__DEVICE__ __CONSTEXPR__ long abs(long __n) { return ::labs(__n); } +__DEVICE__ __CONSTEXPR__ float fma(float __x, float __y, float __z) { return ::fmaf(__x, __y, __z); } #if !defined(__HIPCC_RTC__) // The value returned by fpclassify is platform dependent, therefore it is not // supported by hipRTC. -__DEVICE__ int fpclassify(float __x) { +__DEVICE__ __CONSTEXPR__ int fpclassify(float __x) { return __builtin_fpclassify(FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL, FP_ZERO, __x); } -__DEVICE__ int fpclassify(double __x) { +__DEVICE__ __CONSTEXPR__ int fpclassify(double __x) { return __builtin_fpclassify(FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL, FP_ZERO, __x); } #endif // !defined(__HIPCC_RTC__) -__DEVICE__ float frexp(float __arg, int *__exp) { +__DEVICE__ __CONSTEXPR__ float frexp(float __arg, int *__exp) { return ::frexpf(__arg, __exp); } @@ -71,93 +83,101 @@ __DEVICE__ float frexp(float __arg, int *__exp) { // of the variants inside the inner region and avoid the clash. #pragma omp begin declare variant match(implementation = {vendor(llvm)}) -__DEVICE__ int isinf(float __x) { return ::__isinff(__x); } -__DEVICE__ int isinf(double __x) { return ::__isinf(__x); } -__DEVICE__ int isfinite(float __x) { return ::__finitef(__x); } -__DEVICE__ int isfinite(double __x) { return ::__finite(__x); } -__DEVICE__ int isnan(float __x) { return ::__isnanf(__x); } -__DEVICE__ int isnan(double __x) { return ::__isnan(__x); } +__DEVICE__ __CONSTEXPR__ int isinf(float __x) { return ::__isinff(__x); } +__DEVICE__ __CONSTEXPR__ int isinf(double __x) { return ::__isinf(__x); } +__DEVICE__ __CONSTEXPR__ int isfinite(float __x) { return ::__finitef(__x); } +__DEVICE__ __CONSTEXPR__ int isfinite(double __x) { return ::__finite(__x); } +__DEVICE__ __CONSTEXPR__ int isnan(float __x) { return ::__isnanf(__x); } +__DEVICE__ __CONSTEXPR__ int isnan(double __x) { return ::__isnan(__x); } #pragma omp end declare variant #endif // defined(__OPENMP_AMDGCN__) -__DEVICE__ bool isinf(float __x) { return ::__isinff(__x); } -__DEVICE__ bool isinf(double __x) { return ::__isinf(__x); } -__DEVICE__ bool isfinite(float __x) { return ::__finitef(__x); } -__DEVICE__ bool isfinite(double __x) { return ::__finite(__x); } -__DEVICE__ bool isnan(float __x) { return ::__isnanf(__x); } -__DEVICE__ bool isnan(double __x) { return ::__isnan(__x); } +__DEVICE__ __CONSTEXPR__ bool isinf(float __x) { return ::__isinff(__x); } +__DEVICE__ __CONSTEXPR__ bool isinf(double __x) { return ::__isinf(__x); } +__DEVICE__ __CONSTEXPR__ bool isfinite(float __x) { return ::__finitef(__x); } +__DEVICE__ __CONSTEXPR__ bool isfinite(double __x) { return ::__finite(__x); } +__DEVICE__ __CONSTEXPR__ bool isnan(float __x) { return ::__isnanf(__x); } +__DEVICE__ __CONSTEXPR__ bool isnan(double __x) { return ::__isnan(__x); } #if defined(__OPENMP_AMDGCN__) #pragma omp end declare variant #endif // defined(__OPENMP_AMDGCN__) -__DEVICE__ bool isgreater(float __x, float __y) { +__DEVICE__ __CONSTEXPR__ bool isgreater(float __x, float __y) { return __builtin_isgreater(__x, __y); } -__DEVICE__ bool isgreater(double __x, double __y) { +__DEVICE__ __CONSTEXPR__ bool isgreater(double __x, double __y) { return __builtin_isgreater(__x, __y); } -__DEVICE__ bool isgreaterequal(float __x, float __y) { +__DEVICE__ __CONSTEXPR__ bool isgreaterequal(float __x, float __y) { return __builtin_isgreaterequal(__x, __y); } -__DEVICE__ bool isgreaterequal(double __x, double __y) { +__DEVICE__ __CONSTEXPR__ bool isgreaterequal(double __x, double __y) { return __builtin_isgreaterequal(__x, __y); } -__DEVICE__ bool isless(float __x, float __y) { +__DEVICE__ __CONSTEXPR__ bool isless(float __x, float __y) { return __builtin_isless(__x, __y); } -__DEVICE__ bool isless(double __x, double __y) { +__DEVICE__ __CONSTEXPR__ bool isless(double __x, double __y) { return __builtin_isless(__x, __y); } -__DEVICE__ bool islessequal(float __x, float __y) { +__DEVICE__ __CONSTEXPR__ bool islessequal(float __x, float __y) { return __builtin_islessequal(__x, __y); } -__DEVICE__ bool islessequal(double __x, double __y) { +__DEVICE__ __CONSTEXPR__ bool islessequal(double __x, double __y) { return __builtin_islessequal(__x, __y); } -__DEVICE__ bool islessgreater(float __x, float __y) { +__DEVICE__ __CONSTEXPR__ bool islessgreater(float __x, float __y) { return __builtin_islessgreater(__x, __y); } -__DEVICE__ bool islessgreater(double __x, double __y) { +__DEVICE__ __CONSTEXPR__ bool islessgreater(double __x, double __y) { return __builtin_islessgreater(__x, __y); } -__DEVICE__ bool isnormal(float __x) { return __builtin_isnormal(__x); } -__DEVICE__ bool isnormal(double __x) { return __builtin_isnormal(__x); } -__DEVICE__ bool isunordered(float __x, float __y) { +__DEVICE__ __CONSTEXPR__ bool isnormal(float __x) { + return __builtin_isnormal(__x); +} +__DEVICE__ __CONSTEXPR__ bool isnormal(double __x) { + return __builtin_isnormal(__x); +} +__DEVICE__ __CONSTEXPR__ bool isunordered(float __x, float __y) { return __builtin_isunordered(__x, __y); } -__DEVICE__ bool isunordered(double __x, double __y) { +__DEVICE__ __CONSTEXPR__ bool isunordered(double __x, double __y) { return __builtin_isunordered(__x, __y); } -__DEVICE__ float modf(float __x, float *__iptr) { return ::modff(__x, __iptr); } -__DEVICE__ float pow(float __base, int __iexp) { +__DEVICE__ __CONSTEXPR__ float modf(float __x, float *__iptr) { + return ::modff(__x, __iptr); +} +__DEVICE__ __CONSTEXPR__ float pow(float __base, int __iexp) { return ::powif(__base, __iexp); } -__DEVICE__ double pow(double __base, int __iexp) { +__DEVICE__ __CONSTEXPR__ double pow(double __base, int __iexp) { return ::powi(__base, __iexp); } -__DEVICE__ float remquo(float __x, float __y, int *__quo) { +__DEVICE__ __CONSTEXPR__ float remquo(float __x, float __y, int *__quo) { return ::remquof(__x, __y, __quo); } -__DEVICE__ float scalbln(float __x, long int __n) { +__DEVICE__ __CONSTEXPR__ float scalbln(float __x, long int __n) { return ::scalblnf(__x, __n); } -__DEVICE__ bool signbit(float __x) { return ::__signbitf(__x); } -__DEVICE__ bool signbit(double __x) { return ::__signbit(__x); } +__DEVICE__ __CONSTEXPR__ bool signbit(float __x) { return ::__signbitf(__x); } +__DEVICE__ __CONSTEXPR__ bool signbit(double __x) { return ::__signbit(__x); } // Notably missing above is nexttoward. We omit it because // ocml doesn't provide an implementation, and we don't want to be in the // business of implementing tricky libm functions in this header. // Other functions. -__DEVICE__ _Float16 fma(_Float16 __x, _Float16 __y, _Float16 __z) { +__DEVICE__ __CONSTEXPR__ _Float16 fma(_Float16 __x, _Float16 __y, + _Float16 __z) { return __ocml_fma_f16(__x, __y, __z); } -__DEVICE__ _Float16 pow(_Float16 __base, int __iexp) { +__DEVICE__ __CONSTEXPR__ _Float16 pow(_Float16 __base, int __iexp) { return __ocml_pown_f16(__base, __iexp); } +#ifndef __OPENMP_AMDGCN__ // BEGIN DEF_FUN and HIP_OVERLOAD // BEGIN DEF_FUN @@ -168,18 +188,19 @@ __DEVICE__ _Float16 pow(_Float16 __base, int __iexp) { // Define cmath functions with float argument and returns __retty. #define __DEF_FUN1(__retty, __func) \ - __DEVICE__ \ - __retty __func(float __x) { return __func##f(__x); } + __DEVICE__ __CONSTEXPR__ __retty __func(float __x) { return __func##f(__x); } // Define cmath functions with two float arguments and returns __retty. #define __DEF_FUN2(__retty, __func) \ - __DEVICE__ \ - __retty __func(float __x, float __y) { return __func##f(__x, __y); } + __DEVICE__ __CONSTEXPR__ __retty __func(float __x, float __y) { \ + return __func##f(__x, __y); \ + } // Define cmath functions with a float and an int argument and returns __retty. #define __DEF_FUN2_FI(__retty, __func) \ - __DEVICE__ \ - __retty __func(float __x, int __y) { return __func##f(__x, __y); } + __DEVICE__ __CONSTEXPR__ __retty __func(float __x, int __y) { \ + return __func##f(__x, __y); \ + } __DEF_FUN1(float, acos) __DEF_FUN1(float, acosh) @@ -426,7 +447,7 @@ class __promote : public __promote_imp<_A1, _A2, _A3> {}; // floor(double). #define __HIP_OVERLOAD1(__retty, __fn) \ template <typename __T> \ - __DEVICE__ \ + __DEVICE__ __CONSTEXPR__ \ typename __hip_enable_if<__hip::is_integral<__T>::value, __retty>::type \ __fn(__T __x) { \ return ::__fn((double)__x); \ @@ -438,7 +459,7 @@ class __promote : public __promote_imp<_A1, _A2, _A3> {}; #if __cplusplus >= 201103L #define __HIP_OVERLOAD2(__retty, __fn) \ template <typename __T1, typename __T2> \ - __DEVICE__ typename __hip_enable_if< \ + __DEVICE__ __CONSTEXPR__ typename __hip_enable_if< \ __hip::is_arithmetic<__T1>::value && __hip::is_arithmetic<__T2>::value, \ typename __hip::__promote<__T1, __T2>::type>::type \ __fn(__T1 __x, __T2 __y) { \ @@ -448,10 +469,11 @@ class __promote : public __promote_imp<_A1, _A2, _A3> {}; #else #define __HIP_OVERLOAD2(__retty, __fn) \ template <typename __T1, typename __T2> \ - __DEVICE__ typename __hip_enable_if<__hip::is_arithmetic<__T1>::value && \ - __hip::is_arithmetic<__T2>::value, \ - __retty>::type \ - __fn(__T1 __x, __T2 __y) { \ + __DEVICE__ __CONSTEXPR__ \ + typename __hip_enable_if<__hip::is_arithmetic<__T1>::value && \ + __hip::is_arithmetic<__T2>::value, \ + __retty>::type \ + __fn(__T1 __x, __T2 __y) { \ return __fn((double)__x, (double)__y); \ } #endif @@ -526,7 +548,7 @@ __HIP_OVERLOAD2(double, min) // Additional Overloads that don't quite match HIP_OVERLOAD. #if __cplusplus >= 201103L template <typename __T1, typename __T2, typename __T3> -__DEVICE__ typename __hip_enable_if< +__DEVICE__ __CONSTEXPR__ typename __hip_enable_if< __hip::is_arithmetic<__T1>::value && __hip::is_arithmetic<__T2>::value && __hip::is_arithmetic<__T3>::value, typename __hip::__promote<__T1, __T2, __T3>::type>::type @@ -536,31 +558,32 @@ fma(__T1 __x, __T2 __y, __T3 __z) { } #else template <typename __T1, typename __T2, typename __T3> -__DEVICE__ typename __hip_enable_if<__hip::is_arithmetic<__T1>::value && - __hip::is_arithmetic<__T2>::value && - __hip::is_arithmetic<__T3>::value, - double>::type -fma(__T1 __x, __T2 __y, __T3 __z) { +__DEVICE__ __CONSTEXPR__ + typename __hip_enable_if<__hip::is_arithmetic<__T1>::value && + __hip::is_arithmetic<__T2>::value && + __hip::is_arithmetic<__T3>::value, + double>::type + fma(__T1 __x, __T2 __y, __T3 __z) { return ::fma((double)__x, (double)__y, (double)__z); } #endif template <typename __T> -__DEVICE__ +__DEVICE__ __CONSTEXPR__ typename __hip_enable_if<__hip::is_integral<__T>::value, double>::type frexp(__T __x, int *__exp) { return ::frexp((double)__x, __exp); } template <typename __T> -__DEVICE__ +__DEVICE__ __CONSTEXPR__ typename __hip_enable_if<__hip::is_integral<__T>::value, double>::type ldexp(__T __x, int __exp) { return ::ldexp((double)__x, __exp); } template <typename __T> -__DEVICE__ +__DEVICE__ __CONSTEXPR__ typename __hip_enable_if<__hip::is_integral<__T>::value, double>::type modf(__T __x, double *__exp) { return ::modf((double)__x, __exp); @@ -568,7 +591,7 @@ __DEVICE__ #if __cplusplus >= 201103L template <typename __T1, typename __T2> -__DEVICE__ +__DEVICE__ __CONSTEXPR__ typename __hip_enable_if<__hip::is_arithmetic<__T1>::value && __hip::is_arithmetic<__T2>::value, typename __hip::__promote<__T1, __T2>::type>::type @@ -578,23 +601,24 @@ __DEVICE__ } #else template <typename __T1, typename __T2> -__DEVICE__ typename __hip_enable_if<__hip::is_arithmetic<__T1>::value && - __hip::is_arithmetic<__T2>::value, - double>::type -remquo(__T1 __x, __T2 __y, int *__quo) { +__DEVICE__ __CONSTEXPR__ + typename __hip_enable_if<__hip::is_arithmetic<__T1>::value && + __hip::is_arithmetic<__T2>::value, + double>::type + remquo(__T1 __x, __T2 __y, int *__quo) { return ::remquo((double)__x, (double)__y, __quo); } #endif template <typename __T> -__DEVICE__ +__DEVICE__ __CONSTEXPR__ typename __hip_enable_if<__hip::is_integral<__T>::value, double>::type scalbln(__T __x, long int __exp) { return ::scalbln((double)__x, __exp); } template <typename __T> -__DEVICE__ +__DEVICE__ __CONSTEXPR__ typename __hip_enable_if<__hip::is_integral<__T>::value, double>::type scalbn(__T __x, int __exp) { return ::scalbn((double)__x, __exp); @@ -607,8 +631,10 @@ __DEVICE__ // END DEF_FUN and HIP_OVERLOAD +#endif // ifndef __OPENMP_AMDGCN__ #endif // defined(__cplusplus) +#ifndef __OPENMP_AMDGCN__ // Define these overloads inside the namespace our standard library uses. #if !defined(__HIPCC_RTC__) #ifdef _LIBCPP_BEGIN_NAMESPACE_STD @@ -781,22 +807,26 @@ _GLIBCXX_END_NAMESPACE_VERSION #if defined(__cplusplus) extern "C" { #endif // defined(__cplusplus) -__DEVICE__ __attribute__((overloadable)) double _Cosh(double x, double y) { +__DEVICE__ __CONSTEXPR__ __attribute__((overloadable)) double _Cosh(double x, + double y) { return cosh(x) * y; } -__DEVICE__ __attribute__((overloadable)) float _FCosh(float x, float y) { +__DEVICE__ __CONSTEXPR__ __attribute__((overloadable)) float _FCosh(float x, + float y) { return coshf(x) * y; } -__DEVICE__ __attribute__((overloadable)) short _Dtest(double *p) { +__DEVICE__ __CONSTEXPR__ __attribute__((overloadable)) short _Dtest(double *p) { return fpclassify(*p); } -__DEVICE__ __attribute__((overloadable)) short _FDtest(float *p) { +__DEVICE__ __CONSTEXPR__ __attribute__((overloadable)) short _FDtest(float *p) { return fpclassify(*p); } -__DEVICE__ __attribute__((overloadable)) double _Sinh(double x, double y) { +__DEVICE__ __CONSTEXPR__ __attribute__((overloadable)) double _Sinh(double x, + double y) { return sinh(x) * y; } -__DEVICE__ __attribute__((overloadable)) float _FSinh(float x, float y) { +__DEVICE__ __CONSTEXPR__ __attribute__((overloadable)) float _FSinh(float x, + float y) { return sinhf(x) * y; } #if defined(__cplusplus) @@ -804,7 +834,9 @@ __DEVICE__ __attribute__((overloadable)) float _FSinh(float x, float y) { #endif // defined(__cplusplus) #endif // defined(_MSC_VER) #endif // !defined(__HIPCC_RTC__) +#endif // ifndef __OPENMP_AMDGCN__ #pragma pop_macro("__DEVICE__") +#pragma pop_macro("__CONSTEXPR__") #endif // __CLANG_HIP_CMATH_H__ diff --git a/clang/lib/Headers/__clang_hip_math.h b/clang/lib/Headers/__clang_hip_math.h index 1f0982d92eff..ef7e087b832c 100644 --- a/clang/lib/Headers/__clang_hip_math.h +++ b/clang/lib/Headers/__clang_hip_math.h @@ -9,7 +9,7 @@ #ifndef __CLANG_HIP_MATH_H__ #define __CLANG_HIP_MATH_H__ -#if !defined(__HIP__) +#if !defined(__HIP__) && !defined(__OPENMP_AMDGCN__) #error "This file is for HIP and OpenMP AMDGCN device compilation only." #endif @@ -19,18 +19,30 @@ #endif #include <limits.h> #include <stdint.h> -#endif // __HIPCC_RTC__ +#ifdef __OPENMP_AMDGCN__ +#include <omp.h> +#endif +#endif // !defined(__HIPCC_RTC__) #pragma push_macro("__DEVICE__") + +#ifdef __OPENMP_AMDGCN__ +#define __DEVICE__ static inline __attribute__((always_inline, nothrow)) +#else #define __DEVICE__ static __device__ inline __attribute__((always_inline)) +#endif // A few functions return bool type starting only in C++11. #pragma push_macro("__RETURN_TYPE") +#ifdef __OPENMP_AMDGCN__ +#define __RETURN_TYPE int +#else #if defined(__cplusplus) #define __RETURN_TYPE bool #else #define __RETURN_TYPE int #endif +#endif // __OPENMP_AMDGCN__ #if defined (__cplusplus) && __cplusplus < 201103L // emulate static_assert on type sizes @@ -249,6 +261,9 @@ float fmodf(float __x, float __y) { return __ocml_fmod_f32(__x, __y); } __DEVICE__ float frexpf(float __x, int *__nptr) { int __tmp; +#ifdef __OPENMP_AMDGCN__ +#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc) +#endif float __r = __ocml_frexp_f32(__x, (__attribute__((address_space(5))) int *)&__tmp); *__nptr = __tmp; @@ -334,6 +349,9 @@ long int lroundf(float __x) { return __ocml_round_f32(__x); } __DEVICE__ float modff(float __x, float *__iptr) { float __tmp; +#ifdef __OPENMP_AMDGCN__ +#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc) +#endif float __r = __ocml_modf_f32(__x, (__attribute__((address_space(5))) float *)&__tmp); *__iptr = __tmp; @@ -414,6 +432,9 @@ float remainderf(float __x, float __y) { __DEVICE__ float remquof(float __x, float __y, int *__quo) { int __tmp; +#ifdef __OPENMP_AMDGCN__ +#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc) +#endif float __r = __ocml_remquo_f32( __x, __y, (__attribute__((address_space(5))) int *)&__tmp); *__quo = __tmp; @@ -470,6 +491,9 @@ __RETURN_TYPE __signbitf(float __x) { return __ocml_signbit_f32(__x); } __DEVICE__ void sincosf(float __x, float *__sinptr, float *__cosptr) { float __tmp; +#ifdef __OPENMP_AMDGCN__ +#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc) +#endif *__sinptr = __ocml_sincos_f32(__x, (__attribute__((address_space(5))) float *)&__tmp); *__cosptr = __tmp; @@ -478,6 +502,9 @@ void sincosf(float __x, float *__sinptr, float *__cosptr) { __DEVICE__ void sincospif(float __x, float *__sinptr, float *__cosptr) { float __tmp; +#ifdef __OPENMP_AMDGCN__ +#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc) +#endif *__sinptr = __ocml_sincospi_f32( __x, (__attribute__((address_space(5))) float *)&__tmp); *__cosptr = __tmp; @@ -790,6 +817,9 @@ double fmod(double __x, double __y) { return __ocml_fmod_f64(__x, __y); } __DEVICE__ double frexp(double __x, int *__nptr) { int __tmp; +#ifdef __OPENMP_AMDGCN__ +#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc) +#endif double __r = __ocml_frexp_f64(__x, (__attribute__((address_space(5))) int *)&__tmp); *__nptr = __tmp; @@ -874,6 +904,9 @@ long int lround(double __x) { return __ocml_round_f64(__x); } __DEVICE__ double modf(double __x, double *__iptr) { double __tmp; +#ifdef __OPENMP_AMDGCN__ +#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc) +#endif double __r = __ocml_modf_f64(__x, (__attribute__((address_space(5))) double *)&__tmp); *__iptr = __tmp; @@ -962,6 +995,9 @@ double remainder(double __x, double __y) { __DEVICE__ double remquo(double __x, double __y, int *__quo) { int __tmp; +#ifdef __OPENMP_AMDGCN__ +#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc) +#endif double __r = __ocml_remquo_f64( __x, __y, (__attribute__((address_space(5))) int *)&__tmp); *__quo = __tmp; @@ -1020,6 +1056,9 @@ double sin(double __x) { return __ocml_sin_f64(__x); } __DEVICE__ void sincos(double __x, double *__sinptr, double *__cosptr) { double __tmp; +#ifdef __OPENMP_AMDGCN__ +#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc) +#endif *__sinptr = __ocml_sincos_f64( __x, (__attribute__((address_space(5))) double *)&__tmp); *__cosptr = __tmp; @@ -1028,6 +1067,9 @@ void sincos(double __x, double *__sinptr, double *__cosptr) { __DEVICE__ void sincospi(double __x, double *__sinptr, double *__cosptr) { double __tmp; +#ifdef __OPENMP_AMDGCN__ +#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc) +#endif *__sinptr = __ocml_sincospi_f64( __x, (__attribute__((address_space(5))) double *)&__tmp); *__cosptr = __tmp; @@ -1262,7 +1304,7 @@ float min(float __x, float __y) { return fminf(__x, __y); } __DEVICE__ double min(double __x, double __y) { return fmin(__x, __y); } -#if !defined(__HIPCC_RTC__) +#if !defined(__HIPCC_RTC__) && !defined(__OPENMP_AMDGCN__) __host__ inline static int min(int __arg1, int __arg2) { return std::min(__arg1, __arg2); } @@ -1270,7 +1312,7 @@ __host__ inline static int min(int __arg1, int __arg2) { __host__ inline static int max(int __arg1, int __arg2) { return std::max(__arg1, __arg2); } -#endif // __HIPCC_RTC__ +#endif // !defined(__HIPCC_RTC__) && !defined(__OPENMP_AMDGCN__) #endif #pragma pop_macro("__DEVICE__") diff --git a/clang/lib/Headers/openmp_wrappers/__clang_openmp_device_functions.h b/clang/lib/Headers/openmp_wrappers/__clang_openmp_device_functions.h index 953857badfc4..279fb26fbaf7 100644 --- a/clang/lib/Headers/openmp_wrappers/__clang_openmp_device_functions.h +++ b/clang/lib/Headers/openmp_wrappers/__clang_openmp_device_functions.h @@ -14,13 +14,13 @@ #error "This file is for OpenMP compilation only." #endif -#pragma omp begin declare variant match( \ - device = {arch(nvptx, nvptx64)}, implementation = {extension(match_any)}) - #ifdef __cplusplus extern "C" { #endif +#pragma omp begin declare variant match( \ + device = {arch(nvptx, nvptx64)}, implementation = {extension(match_any)}) + #define __CUDA__ #define __OPENMP_NVPTX__ @@ -33,11 +33,33 @@ extern "C" { #undef __OPENMP_NVPTX__ #undef __CUDA__ -#ifdef __cplusplus -} // extern "C" +#pragma omp end declare variant + +#ifdef __AMDGCN__ +#pragma omp begin declare variant match(device = {arch(amdgcn)}) + +// Import types which will be used by __clang_hip_libdevice_declares.h +#ifndef __cplusplus +#include <stdbool.h> +#include <stdint.h> #endif +#define __OPENMP_AMDGCN__ +#pragma push_macro("__device__") +#define __device__ + +/// Include declarations for libdevice functions. +#include <__clang_hip_libdevice_declares.h> + +#pragma pop_macro("__device__") +#undef __OPENMP_AMDGCN__ + #pragma omp end declare variant +#endif + +#ifdef __cplusplus +} // extern "C" +#endif // Ensure we make `_ZdlPv`, aka. `operator delete(void*)` available without the // need to `include <new>` in C++ mode. diff --git a/clang/lib/Headers/openmp_wrappers/cmath b/clang/lib/Headers/openmp_wrappers/cmath index 1aff66af7d52..22a720aca956 100644 --- a/clang/lib/Headers/openmp_wrappers/cmath +++ b/clang/lib/Headers/openmp_wrappers/cmath @@ -75,4 +75,58 @@ __DEVICE__ float tgamma(float __x) { return ::tgammaf(__x); } #pragma omp end declare variant +#ifdef __AMDGCN__ +#pragma omp begin declare variant match(device = {arch(amdgcn)}) + +#pragma push_macro("__constant__") +#define __constant__ __attribute__((constant)) +#define __OPENMP_AMDGCN__ + +#include <__clang_hip_cmath.h> + +#pragma pop_macro("__constant__") +#undef __OPENMP_AMDGCN__ + +// Define overloads otherwise which are absent +#define __DEVICE__ static constexpr __attribute__((always_inline, nothrow)) + +__DEVICE__ float acos(float __x) { return ::acosf(__x); } +__DEVICE__ float acosh(float __x) { return ::acoshf(__x); } +__DEVICE__ float asin(float __x) { return ::asinf(__x); } +__DEVICE__ float asinh(float __x) { return ::asinhf(__x); } +__DEVICE__ float atan(float __x) { return ::atanf(__x); } +__DEVICE__ float atan2(float __x, float __y) { return ::atan2f(__x, __y); } +__DEVICE__ float atanh(float __x) { return ::atanhf(__x); } +__DEVICE__ float cbrt(float __x) { return ::cbrtf(__x); } +__DEVICE__ float cosh(float __x) { return ::coshf(__x); } +__DEVICE__ float erf(float __x) { return ::erff(__x); } +__DEVICE__ float erfc(float __x) { return ::erfcf(__x); } +__DEVICE__ float exp2(float __x) { return ::exp2f(__x); } +__DEVICE__ float expm1(float __x) { return ::expm1f(__x); } +__DEVICE__ float fdim(float __x, float __y) { return ::fdimf(__x, __y); } +__DEVICE__ float hypot(float __x, float __y) { return ::hypotf(__x, __y); } +__DEVICE__ int ilogb(float __x) { return ::ilogbf(__x); } +__DEVICE__ float ldexp(float __arg, int __exp) { + return ::ldexpf(__arg, __exp); +} +__DEVICE__ float lgamma(float __x) { return ::lgammaf(__x); } +__DEVICE__ float log1p(float __x) { return ::log1pf(__x); } +__DEVICE__ float logb(float __x) { return ::logbf(__x); } +__DEVICE__ float nextafter(float __x, float __y) { + return ::nextafterf(__x, __y); +} +__DEVICE__ float remainder(float __x, float __y) { + return ::remainderf(__x, __y); +} +__DEVICE__ float scalbn(float __x, int __y) { return ::scalbnf(__x, __y); } +__DEVICE__ float sinh(float __x) { return ::sinhf(__x); } +__DEVICE__ float tan(float __x) { return ::tanf(__x); } +__DEVICE__ float tanh(float __x) { return ::tanhf(__x); } +__DEVICE__ float tgamma(float __x) { return ::tgammaf(__x); } + +#undef __DEVICE__ + +#pragma omp end declare variant +#endif // __AMDGCN__ + #endif diff --git a/clang/lib/Headers/openmp_wrappers/math.h b/clang/lib/Headers/openmp_wrappers/math.h index c64af8b13ece..1e3c07cfdb8c 100644 --- a/clang/lib/Headers/openmp_wrappers/math.h +++ b/clang/lib/Headers/openmp_wrappers/math.h @@ -48,4 +48,14 @@ #pragma omp end declare variant +#ifdef __AMDGCN__ +#pragma omp begin declare variant match(device = {arch(amdgcn)}) + +#define __OPENMP_AMDGCN__ +#include <__clang_hip_math.h> +#undef __OPENMP_AMDGCN__ + +#pragma omp end declare variant +#endif + #endif diff --git a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp index be4c51930789..25f134868758 100644 --- a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp +++ b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp @@ -1087,7 +1087,7 @@ Decl *TemplateDeclInstantiator::VisitVarDecl(VarDecl *D, SemaRef.BuildVariableInstantiation(Var, D, TemplateArgs, LateAttrs, Owner, StartingScope, InstantiatingVarTemplate); - if (D->isNRVOVariable()) { + if (D->isNRVOVariable() && !Var->isInvalidDecl()) { QualType RT; if (auto *F = dyn_cast<FunctionDecl>(DC)) RT = F->getReturnType(); diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h index 70ba631dbfc6..d8a5b6ad4f94 100644 --- a/clang/lib/Sema/TreeTransform.h +++ b/clang/lib/Sema/TreeTransform.h @@ -6578,7 +6578,7 @@ QualType TreeTransform<Derived>::TransformAutoType(TypeLocBuilder &TLB, NewTL.setFoundDecl(TL.getFoundDecl()); NewTL.setLAngleLoc(TL.getLAngleLoc()); NewTL.setRAngleLoc(TL.getRAngleLoc()); - for (unsigned I = 0; I < TL.getNumArgs(); ++I) + for (unsigned I = 0; I < NewTL.getNumArgs(); ++I) NewTL.setArgLocInfo(I, NewTemplateArgs.arguments()[I].getLocInfo()); return Result; diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp index 83bade9941b3..1722572f1a27 100644 --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -8456,6 +8456,8 @@ void ASTReader::ReadLateParsedTemplates( LPTMap.insert(std::make_pair(FD, std::move(LT))); } } + + LateParsedTemplates.clear(); } void ASTReader::LoadSelector(Selector Sel) { diff --git a/compiler-rt/lib/profile/InstrProfilingFile.c b/compiler-rt/lib/profile/InstrProfilingFile.c index 518447e3e422..2e91f16a2158 100644 --- a/compiler-rt/lib/profile/InstrProfilingFile.c +++ b/compiler-rt/lib/profile/InstrProfilingFile.c @@ -592,11 +592,17 @@ intptr_t INSTR_PROF_PROFILE_COUNTER_BIAS_DEFAULT_VAR = 0; /* This variable is a weak external reference which could be used to detect * whether or not the compiler defined this symbol. */ -#if defined(_WIN32) +#if defined(_MSC_VER) COMPILER_RT_VISIBILITY extern intptr_t INSTR_PROF_PROFILE_COUNTER_BIAS_VAR; -#pragma comment(linker, "/alternatename:" \ - INSTR_PROF_QUOTE(INSTR_PROF_PROFILE_COUNTER_BIAS_VAR) "=" \ - INSTR_PROF_QUOTE(INSTR_PROF_PROFILE_COUNTER_BIAS_DEFAULT_VAR)) +#if defined(_M_IX86) || defined(__i386__) +#define WIN_SYM_PREFIX "_" +#else +#define WIN_SYM_PREFIX +#endif +#pragma comment( \ + linker, "/alternatename:" WIN_SYM_PREFIX INSTR_PROF_QUOTE( \ + INSTR_PROF_PROFILE_COUNTER_BIAS_VAR) "=" WIN_SYM_PREFIX \ + INSTR_PROF_QUOTE(INSTR_PROF_PROFILE_COUNTER_BIAS_DEFAULT_VAR)) #else COMPILER_RT_VISIBILITY extern intptr_t INSTR_PROF_PROFILE_COUNTER_BIAS_VAR __attribute__((weak, alias(INSTR_PROF_QUOTE( @@ -651,8 +657,9 @@ static void initializeProfileForContinuousMode(void) { const uint64_t *CountersBegin = __llvm_profile_begin_counters(); const uint64_t *CountersEnd = __llvm_profile_end_counters(); uint64_t DataSize = __llvm_profile_get_data_size(DataBegin, DataEnd); - const uint64_t CountersOffset = - sizeof(__llvm_profile_header) + (DataSize * sizeof(__llvm_profile_data)); + const uint64_t CountersOffset = sizeof(__llvm_profile_header) + + __llvm_write_binary_ids(NULL) + + (DataSize * sizeof(__llvm_profile_data)); int Length = getCurFilenameLength(); char *FilenameBuf = (char *)COMPILER_RT_ALLOCA(Length + 1); diff --git a/compiler-rt/lib/profile/InstrProfilingPlatformFuchsia.c b/compiler-rt/lib/profile/InstrProfilingPlatformFuchsia.c index 0146b14c193f..1be0ef36a288 100644 --- a/compiler-rt/lib/profile/InstrProfilingPlatformFuchsia.c +++ b/compiler-rt/lib/profile/InstrProfilingPlatformFuchsia.c @@ -119,8 +119,9 @@ void __llvm_profile_initialize(void) { const uint64_t *CountersBegin = __llvm_profile_begin_counters(); const uint64_t *CountersEnd = __llvm_profile_end_counters(); const uint64_t DataSize = __llvm_profile_get_data_size(DataBegin, DataEnd); - const uint64_t CountersOffset = - sizeof(__llvm_profile_header) + (DataSize * sizeof(__llvm_profile_data)); + const uint64_t CountersOffset = sizeof(__llvm_profile_header) + + __llvm_write_binary_ids(NULL) + + (DataSize * sizeof(__llvm_profile_data)); uint64_t CountersSize = CountersEnd - CountersBegin; /* Don't publish a VMO if there are no counters. */ diff --git a/compiler-rt/lib/profile/InstrProfilingPlatformLinux.c b/compiler-rt/lib/profile/InstrProfilingPlatformLinux.c index 7c15f97aff89..5d47083b8bfe 100644 --- a/compiler-rt/lib/profile/InstrProfilingPlatformLinux.c +++ b/compiler-rt/lib/profile/InstrProfilingPlatformLinux.c @@ -94,8 +94,8 @@ static size_t RoundUp(size_t size, size_t align) { * Write binary id length and then its data, because binary id does not * have a fixed length. */ -int WriteOneBinaryId(ProfDataWriter *Writer, uint64_t BinaryIdLen, - const uint8_t *BinaryIdData) { +static int WriteOneBinaryId(ProfDataWriter *Writer, uint64_t BinaryIdLen, + const uint8_t *BinaryIdData) { ProfDataIOVec BinaryIdIOVec[] = { {&BinaryIdLen, sizeof(uint64_t), 1, 0}, {BinaryIdData, sizeof(uint8_t), BinaryIdLen, 0}}; @@ -119,7 +119,8 @@ int WriteOneBinaryId(ProfDataWriter *Writer, uint64_t BinaryIdLen, * Note sections like .note.ABI-tag and .note.gnu.build-id are aligned * to 4 bytes, so round n_namesz and n_descsz to the nearest 4 bytes. */ -int WriteBinaryIdForNote(ProfDataWriter *Writer, const ElfW(Nhdr) * Note) { +static int WriteBinaryIdForNote(ProfDataWriter *Writer, + const ElfW(Nhdr) * Note) { int BinaryIdSize = 0; const char *NoteName = (const char *)Note + sizeof(ElfW(Nhdr)); @@ -144,8 +145,8 @@ int WriteBinaryIdForNote(ProfDataWriter *Writer, const ElfW(Nhdr) * Note) { * If writer is given, write binary ids into profiles. * If an error happens while writing, return -1. */ -int WriteBinaryIds(ProfDataWriter *Writer, const ElfW(Nhdr) * Note, - const ElfW(Nhdr) * NotesEnd) { +static int WriteBinaryIds(ProfDataWriter *Writer, const ElfW(Nhdr) * Note, + const ElfW(Nhdr) * NotesEnd) { int TotalBinaryIdsSize = 0; while (Note < NotesEnd) { int Result = WriteBinaryIdForNote(Writer, Note); diff --git a/libcxx/include/cwctype b/libcxx/include/cwctype index 17c68d6d4544..27eea2f15730 100644 --- a/libcxx/include/cwctype +++ b/libcxx/include/cwctype @@ -59,6 +59,7 @@ wctrans_t wctrans(const char* property); _LIBCPP_BEGIN_NAMESPACE_STD +#if defined(_LIBCPP_INCLUDED_C_LIBRARY_WCTYPE_H) using ::wint_t _LIBCPP_USING_IF_EXISTS; using ::wctrans_t _LIBCPP_USING_IF_EXISTS; using ::wctype_t _LIBCPP_USING_IF_EXISTS; @@ -80,6 +81,7 @@ using ::towlower _LIBCPP_USING_IF_EXISTS; using ::towupper _LIBCPP_USING_IF_EXISTS; using ::towctrans _LIBCPP_USING_IF_EXISTS; using ::wctrans _LIBCPP_USING_IF_EXISTS; +#endif // _LIBCPP_INCLUDED_C_LIBRARY_WCTYPE_H _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/string b/libcxx/include/string index 4940021b0c68..4159ea580345 100644 --- a/libcxx/include/string +++ b/libcxx/include/string @@ -522,6 +522,7 @@ basic_string<char32_t> operator "" s( const char32_t *str, size_t len ); // C++1 #include <algorithm> #include <compare> #include <cstdio> // EOF +#include <cstdlib> #include <cstring> #include <cwchar> #include <initializer_list> @@ -1714,6 +1715,24 @@ private: return data() <= __p && __p <= data() + size(); } + _LIBCPP_NORETURN _LIBCPP_HIDE_FROM_ABI + void __throw_length_error() const { +#ifndef _LIBCPP_NO_EXCEPTIONS + __basic_string_common<true>::__throw_length_error(); +#else + _VSTD::abort(); +#endif + } + + _LIBCPP_NORETURN _LIBCPP_HIDE_FROM_ABI + void __throw_out_of_range() const { +#ifndef _LIBCPP_NO_EXCEPTIONS + __basic_string_common<true>::__throw_out_of_range(); +#else + _VSTD::abort(); +#endif + } + friend basic_string operator+<>(const basic_string&, const basic_string&); friend basic_string operator+<>(const value_type*, const basic_string&); friend basic_string operator+<>(value_type, const basic_string&); diff --git a/libcxx/include/vector b/libcxx/include/vector index 9189ed44a80c..90d8b946f135 100644 --- a/libcxx/include/vector +++ b/libcxx/include/vector @@ -281,6 +281,7 @@ erase_if(vector<T, Allocator>& c, Predicate pred); // C++20 #include <algorithm> #include <climits> #include <compare> +#include <cstdlib> #include <cstring> #include <initializer_list> #include <iosfwd> // for forward declaration of vector @@ -390,6 +391,25 @@ protected: is_nothrow_move_assignable<allocator_type>::value) {__move_assign_alloc(__c, integral_constant<bool, __alloc_traits::propagate_on_container_move_assignment::value>());} + + _LIBCPP_NORETURN _LIBCPP_HIDE_FROM_ABI + void __throw_length_error() const { +#ifndef _LIBCPP_NO_EXCEPTIONS + __vector_base_common<true>::__throw_length_error(); +#else + _VSTD::abort(); +#endif + } + + _LIBCPP_NORETURN _LIBCPP_HIDE_FROM_ABI + void __throw_out_of_range() const { +#ifndef _LIBCPP_NO_EXCEPTIONS + __vector_base_common<true>::__throw_out_of_range(); +#else + _VSTD::abort(); +#endif + } + private: _LIBCPP_INLINE_VISIBILITY void __copy_assign_alloc(const __vector_base& __c, true_type) diff --git a/libcxx/include/wctype.h b/libcxx/include/wctype.h index 1b4b1461496c..3b614759ac6d 100644 --- a/libcxx/include/wctype.h +++ b/libcxx/include/wctype.h @@ -50,8 +50,18 @@ wctrans_t wctrans(const char* property); #pragma GCC system_header #endif +// TODO: +// In the future, we should unconditionally include_next <wctype.h> here and instead +// have a mode under which the library does not need libc++'s <wctype.h> or <cwctype> +// at all (i.e. a mode without wchar_t). As it stands, we need to do that to completely +// bypass the using declarations in <cwctype> when we did not include <wctype.h>. +// Otherwise, a using declaration like `using ::wint_t` in <cwctype> will refer to +// nothing (with using_if_exists), and if we include another header that defines one +// of these declarations (e.g. <wchar.h>), the second `using ::wint_t` with using_if_exists +// will fail because it does not refer to the same declaration. #if __has_include_next(<wctype.h>) # include_next <wctype.h> +# define _LIBCPP_INCLUDED_C_LIBRARY_WCTYPE_H #endif #ifdef __cplusplus diff --git a/libunwind/src/Unwind-EHABI.cpp b/libunwind/src/Unwind-EHABI.cpp index 32b5cbc3be92..8843db7f54c3 100644 --- a/libunwind/src/Unwind-EHABI.cpp +++ b/libunwind/src/Unwind-EHABI.cpp @@ -97,9 +97,11 @@ _Unwind_Reason_Code ProcessDescriptors( case Descriptor::LU32: descriptor = getNextWord(descriptor, &length); descriptor = getNextWord(descriptor, &offset); + break; case Descriptor::LU16: descriptor = getNextNibble(descriptor, &length); descriptor = getNextNibble(descriptor, &offset); + break; default: assert(false); return _URC_FAILURE; diff --git a/llvm/include/llvm/Analysis/LazyCallGraph.h b/llvm/include/llvm/Analysis/LazyCallGraph.h index ca276d2f3cf8..81500905c0f5 100644 --- a/llvm/include/llvm/Analysis/LazyCallGraph.h +++ b/llvm/include/llvm/Analysis/LazyCallGraph.h @@ -419,7 +419,7 @@ public: /// outer structure. SCCs do not support mutation of the call graph, that /// must be done through the containing \c RefSCC in order to fully reason /// about the ordering and connections of the graph. - class SCC { + class LLVM_EXTERNAL_VISIBILITY SCC { friend class LazyCallGraph; friend class LazyCallGraph::Node; diff --git a/llvm/include/llvm/Analysis/LoopInfo.h b/llvm/include/llvm/Analysis/LoopInfo.h index 164ec50e47bc..5983f98d84cf 100644 --- a/llvm/include/llvm/Analysis/LoopInfo.h +++ b/llvm/include/llvm/Analysis/LoopInfo.h @@ -527,7 +527,7 @@ extern template class LoopBase<BasicBlock, Loop>; /// Represents a single loop in the control flow graph. Note that not all SCCs /// in the CFG are necessarily loops. -class Loop : public LoopBase<BasicBlock, Loop> { +class LLVM_EXTERNAL_VISIBILITY Loop : public LoopBase<BasicBlock, Loop> { public: /// A range representing the start and end location of a loop. class LocRange { diff --git a/llvm/include/llvm/Analysis/LoopNestAnalysis.h b/llvm/include/llvm/Analysis/LoopNestAnalysis.h index 9a749a1c8eae..df10e126c31a 100644 --- a/llvm/include/llvm/Analysis/LoopNestAnalysis.h +++ b/llvm/include/llvm/Analysis/LoopNestAnalysis.h @@ -24,7 +24,7 @@ using LoopVectorTy = SmallVector<Loop *, 8>; class LPMUpdater; /// This class represents a loop nest and can be used to query its properties. -class LoopNest { +class LLVM_EXTERNAL_VISIBILITY LoopNest { public: /// Construct a loop nest rooted by loop \p Root. LoopNest(Loop &Root, ScalarEvolution &SE); diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h index 628058142e48..5ab58ca0646a 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfo.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -97,7 +97,7 @@ struct HardwareLoopInfo { Loop *L = nullptr; BasicBlock *ExitBlock = nullptr; BranchInst *ExitBranch = nullptr; - const SCEV *TripCount = nullptr; + const SCEV *ExitCount = nullptr; IntegerType *CountType = nullptr; Value *LoopDecrement = nullptr; // Decrement the loop counter by this // value in every iteration. diff --git a/llvm/include/llvm/CodeGen/MachineFunction.h b/llvm/include/llvm/CodeGen/MachineFunction.h index 786fe908f68f..c63a5d42e9b3 100644 --- a/llvm/include/llvm/CodeGen/MachineFunction.h +++ b/llvm/include/llvm/CodeGen/MachineFunction.h @@ -227,7 +227,7 @@ struct LandingPadInfo { : LandingPadBlock(MBB) {} }; -class MachineFunction { +class LLVM_EXTERNAL_VISIBILITY MachineFunction { Function &F; const LLVMTargetMachine &Target; const TargetSubtargetInfo *STI; diff --git a/llvm/include/llvm/IR/Function.h b/llvm/include/llvm/IR/Function.h index e0094e2afff2..c33e8e94b467 100644 --- a/llvm/include/llvm/IR/Function.h +++ b/llvm/include/llvm/IR/Function.h @@ -58,7 +58,8 @@ class User; class BranchProbabilityInfo; class BlockFrequencyInfo; -class Function : public GlobalObject, public ilist_node<Function> { +class LLVM_EXTERNAL_VISIBILITY Function : public GlobalObject, + public ilist_node<Function> { public: using BasicBlockListType = SymbolTableList<BasicBlock>; diff --git a/llvm/include/llvm/IR/Module.h b/llvm/include/llvm/IR/Module.h index 97aea5aedf22..bd3a196c7181 100644 --- a/llvm/include/llvm/IR/Module.h +++ b/llvm/include/llvm/IR/Module.h @@ -64,9 +64,9 @@ class VersionTuple; /// constant references to global variables in the module. When a global /// variable is destroyed, it should have no entries in the GlobalValueRefMap. /// The main container class for the LLVM Intermediate Representation. -class Module { -/// @name Types And Enumerations -/// @{ +class LLVM_EXTERNAL_VISIBILITY Module { + /// @name Types And Enumerations + /// @{ public: /// The type for the list of global variables. using GlobalListType = SymbolTableList<GlobalVariable>; diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp index f22d834b5e57..2d980e6935b3 100644 --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -13969,7 +13969,7 @@ const SCEV *ScalarEvolution::applyLoopGuards(const SCEV *Expr, const Loop *L) { if (ExactRegion.isWrappedSet() || ExactRegion.isFullSet()) return false; auto I = RewriteMap.find(LHSUnknown->getValue()); - const SCEV *RewrittenLHS = I != RewriteMap.end() ? I->second : LHS; + const SCEV *RewrittenLHS = I != RewriteMap.end() ? I->second : LHSUnknown; RewriteMap[LHSUnknown->getValue()] = getUMaxExpr( getConstant(ExactRegion.getUnsignedMin()), getUMinExpr(RewrittenLHS, getConstant(ExactRegion.getUnsignedMax()))); diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp index 304d24fe8e4a..9053acce60c4 100644 --- a/llvm/lib/Analysis/TargetTransformInfo.cpp +++ b/llvm/lib/Analysis/TargetTransformInfo.cpp @@ -167,11 +167,7 @@ bool HardwareLoopInfo::isHardwareLoopCandidate(ScalarEvolution &SE, // Note that this block may not be the loop latch block, even if the loop // has a latch block. ExitBlock = BB; - TripCount = SE.getAddExpr(EC, SE.getOne(EC->getType())); - - if (!EC->getType()->isPointerTy() && EC->getType() != CountType) - TripCount = SE.getZeroExtendExpr(TripCount, CountType); - + ExitCount = EC; break; } diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp index faa14dca1c3f..7edc44c48bbd 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp @@ -1162,7 +1162,7 @@ DwarfCompileUnit::getDwarf5OrGNULocationAtom(dwarf::LocationAtom Loc) const { } DIE &DwarfCompileUnit::constructCallSiteEntryDIE(DIE &ScopeDIE, - DIE *CalleeDIE, + const DISubprogram *CalleeSP, bool IsTail, const MCSymbol *PCAddr, const MCSymbol *CallAddr, @@ -1176,7 +1176,8 @@ DIE &DwarfCompileUnit::constructCallSiteEntryDIE(DIE &ScopeDIE, addAddress(CallSiteDIE, getDwarf5OrGNUAttr(dwarf::DW_AT_call_target), MachineLocation(CallReg)); } else { - assert(CalleeDIE && "No DIE for call site entry origin"); + DIE *CalleeDIE = getOrCreateSubprogramDIE(CalleeSP); + assert(CalleeDIE && "Could not create DIE for call site entry origin"); addDIEEntry(CallSiteDIE, getDwarf5OrGNUAttr(dwarf::DW_AT_call_origin), *CalleeDIE); } diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h index 6d8186a5ee2b..6e9261087686 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h @@ -249,16 +249,14 @@ public: dwarf::LocationAtom getDwarf5OrGNULocationAtom(dwarf::LocationAtom Loc) const; /// Construct a call site entry DIE describing a call within \p Scope to a - /// callee described by \p CalleeDIE. - /// \p CalleeDIE is a declaration or definition subprogram DIE for the callee. - /// For indirect calls \p CalleeDIE is set to nullptr. + /// callee described by \p CalleeSP. /// \p IsTail specifies whether the call is a tail call. /// \p PCAddr points to the PC value after the call instruction. /// \p CallAddr points to the PC value at the call instruction (or is null). /// \p CallReg is a register location for an indirect call. For direct calls /// the \p CallReg is set to 0. - DIE &constructCallSiteEntryDIE(DIE &ScopeDIE, DIE *CalleeDIE, bool IsTail, - const MCSymbol *PCAddr, + DIE &constructCallSiteEntryDIE(DIE &ScopeDIE, const DISubprogram *CalleeSP, + bool IsTail, const MCSymbol *PCAddr, const MCSymbol *CallAddr, unsigned CallReg); /// Construct call site parameter DIEs for the \p CallSiteDIE. The \p Params /// were collected by the \ref collectCallSiteParameters. diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index ee14423ca3d0..52591a18791f 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -587,14 +587,6 @@ void DwarfDebug::constructAbstractSubprogramScopeDIE(DwarfCompileUnit &SrcCU, } } -DIE &DwarfDebug::constructSubprogramDefinitionDIE(const DISubprogram *SP) { - DICompileUnit *Unit = SP->getUnit(); - assert(SP->isDefinition() && "Subprogram not a definition"); - assert(Unit && "Subprogram definition without parent unit"); - auto &CU = getOrCreateDwarfCompileUnit(Unit); - return *CU.getOrCreateSubprogramDIE(SP); -} - /// Represents a parameter whose call site value can be described by applying a /// debug expression to a register in the forwarded register worklist. struct FwdRegParamInfo { @@ -945,7 +937,7 @@ void DwarfDebug::constructCallSiteEntryDIEs(const DISubprogram &SP, continue; unsigned CallReg = 0; - DIE *CalleeDIE = nullptr; + const DISubprogram *CalleeSP = nullptr; const Function *CalleeDecl = nullptr; if (CalleeOp.isReg()) { CallReg = CalleeOp.getReg(); @@ -955,19 +947,7 @@ void DwarfDebug::constructCallSiteEntryDIEs(const DISubprogram &SP, CalleeDecl = dyn_cast<Function>(CalleeOp.getGlobal()); if (!CalleeDecl || !CalleeDecl->getSubprogram()) continue; - const DISubprogram *CalleeSP = CalleeDecl->getSubprogram(); - - if (CalleeSP->isDefinition()) { - // Ensure that a subprogram DIE for the callee is available in the - // appropriate CU. - CalleeDIE = &constructSubprogramDefinitionDIE(CalleeSP); - } else { - // Create the declaration DIE if it is missing. This is required to - // support compilation of old bitcode with an incomplete list of - // retained metadata. - CalleeDIE = CU.getOrCreateSubprogramDIE(CalleeSP); - } - assert(CalleeDIE && "Must have a DIE for the callee"); + CalleeSP = CalleeDecl->getSubprogram(); } // TODO: Omit call site entries for runtime calls (objc_msgSend, etc). @@ -1004,7 +984,7 @@ void DwarfDebug::constructCallSiteEntryDIEs(const DISubprogram &SP, << (IsTail ? " [IsTail]" : "") << "\n"); DIE &CallSiteDIE = CU.constructCallSiteEntryDIE( - ScopeDIE, CalleeDIE, IsTail, PCAddr, CallAddr, CallReg); + ScopeDIE, CalleeSP, IsTail, PCAddr, CallAddr, CallReg); // Optionally emit call-site-param debug info. if (emitDebugEntryValues()) { @@ -1121,6 +1101,11 @@ DwarfDebug::getOrCreateDwarfCompileUnit(const DICompileUnit *DIUnit) { NewCU.setSection(Asm->getObjFileLowering().getDwarfInfoSection()); } + // Create DIEs for function declarations used for call site debug info. + for (auto Scope : DIUnit->getRetainedTypes()) + if (auto *SP = dyn_cast_or_null<DISubprogram>(Scope)) + NewCU.getOrCreateSubprogramDIE(SP); + CUMap.insert({DIUnit, &NewCU}); CUDieMap.insert({&NewCU.getUnitDie(), &NewCU}); return NewCU; diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h index 6356a65b50d3..b55be799b6bc 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h @@ -471,9 +471,6 @@ private: /// Construct a DIE for this abstract scope. void constructAbstractSubprogramScopeDIE(DwarfCompileUnit &SrcCU, LexicalScope *Scope); - /// Construct a DIE for the subprogram definition \p SP and return it. - DIE &constructSubprogramDefinitionDIE(const DISubprogram *SP); - /// Construct DIEs for call site entries describing the calls in \p MF. void constructCallSiteEntryDIEs(const DISubprogram &SP, DwarfCompileUnit &CU, DIE &ScopeDIE, const MachineFunction &MF); diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp index 344d30fad347..9d7b3d6e1891 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp @@ -186,9 +186,8 @@ int64_t DwarfUnit::getDefaultLowerBound() const { /// Check whether the DIE for this MDNode can be shared across CUs. bool DwarfUnit::isShareableAcrossCUs(const DINode *D) const { - // When the MDNode can be part of the type system (this includes subprogram - // declarations *and* subprogram definitions, even local definitions), the - // DIE must be shared across CUs. + // When the MDNode can be part of the type system, the DIE can be shared + // across CUs. // Combining type units and cross-CU DIE sharing is lower value (since // cross-CU DIE sharing is used in LTO and removes type redundancy at that // level already) but may be implementable for some value in projects @@ -196,7 +195,9 @@ bool DwarfUnit::isShareableAcrossCUs(const DINode *D) const { // together. if (isDwoUnit() && !DD->shareAcrossDWOCUs()) return false; - return (isa<DIType>(D) || isa<DISubprogram>(D)) && !DD->generateTypeUnits(); + return (isa<DIType>(D) || + (isa<DISubprogram>(D) && !cast<DISubprogram>(D)->isDefinition())) && + !DD->generateTypeUnits(); } DIE *DwarfUnit::getDIE(const DINode *D) const { diff --git a/llvm/lib/CodeGen/HardwareLoops.cpp b/llvm/lib/CodeGen/HardwareLoops.cpp index 4316034371a5..248ef6c23974 100644 --- a/llvm/lib/CodeGen/HardwareLoops.cpp +++ b/llvm/lib/CodeGen/HardwareLoops.cpp @@ -187,7 +187,7 @@ namespace { const DataLayout &DL, OptimizationRemarkEmitter *ORE) : SE(SE), DL(DL), ORE(ORE), L(Info.L), M(L->getHeader()->getModule()), - TripCount(Info.TripCount), + ExitCount(Info.ExitCount), CountType(Info.CountType), ExitBranch(Info.ExitBranch), LoopDecrement(Info.LoopDecrement), @@ -202,7 +202,7 @@ namespace { OptimizationRemarkEmitter *ORE = nullptr; Loop *L = nullptr; Module *M = nullptr; - const SCEV *TripCount = nullptr; + const SCEV *ExitCount = nullptr; Type *CountType = nullptr; BranchInst *ExitBranch = nullptr; Value *LoopDecrement = nullptr; @@ -296,7 +296,7 @@ bool HardwareLoops::TryConvertLoop(HardwareLoopInfo &HWLoopInfo) { } assert( - (HWLoopInfo.ExitBlock && HWLoopInfo.ExitBranch && HWLoopInfo.TripCount) && + (HWLoopInfo.ExitBlock && HWLoopInfo.ExitBranch && HWLoopInfo.ExitCount) && "Hardware Loop must have set exit info."); BasicBlock *Preheader = L->getLoopPreheader(); @@ -381,13 +381,18 @@ Value *HardwareLoop::InitLoopCount() { // loop counter and tests that is not zero? SCEVExpander SCEVE(SE, DL, "loopcnt"); + if (!ExitCount->getType()->isPointerTy() && + ExitCount->getType() != CountType) + ExitCount = SE.getZeroExtendExpr(ExitCount, CountType); + + ExitCount = SE.getAddExpr(ExitCount, SE.getOne(CountType)); // If we're trying to use the 'test and set' form of the intrinsic, we need // to replace a conditional branch that is controlling entry to the loop. It // is likely (guaranteed?) that the preheader has an unconditional branch to // the loop header, so also check if it has a single predecessor. - if (SE.isLoopEntryGuardedByCond(L, ICmpInst::ICMP_NE, TripCount, - SE.getZero(TripCount->getType()))) { + if (SE.isLoopEntryGuardedByCond(L, ICmpInst::ICMP_NE, ExitCount, + SE.getZero(ExitCount->getType()))) { LLVM_DEBUG(dbgs() << " - Attempting to use test.set counter.\n"); UseLoopGuard |= ForceGuardLoopEntry; } else @@ -399,19 +404,19 @@ Value *HardwareLoop::InitLoopCount() { BasicBlock *Predecessor = BB->getSinglePredecessor(); // If it's not safe to create a while loop then don't force it and create a // do-while loop instead - if (!isSafeToExpandAt(TripCount, Predecessor->getTerminator(), SE)) + if (!isSafeToExpandAt(ExitCount, Predecessor->getTerminator(), SE)) UseLoopGuard = false; else BB = Predecessor; } - if (!isSafeToExpandAt(TripCount, BB->getTerminator(), SE)) { - LLVM_DEBUG(dbgs() << "- Bailing, unsafe to expand TripCount " << *TripCount - << "\n"); + if (!isSafeToExpandAt(ExitCount, BB->getTerminator(), SE)) { + LLVM_DEBUG(dbgs() << "- Bailing, unsafe to expand ExitCount " + << *ExitCount << "\n"); return nullptr; } - Value *Count = SCEVE.expandCodeFor(TripCount, CountType, + Value *Count = SCEVE.expandCodeFor(ExitCount, CountType, BB->getTerminator()); // FIXME: We've expanded Count where we hope to insert the counter setting diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 4f730b2cf372..dc245f0d7b16 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -5133,8 +5133,9 @@ SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, SDNode *N) { if (SDValue V = foldLogicOfSetCCs(true, N0, N1, DL)) return V; + // TODO: Rewrite this to return a new 'AND' instead of using CombineTo. if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL && - VT.getSizeInBits() <= 64) { + VT.getSizeInBits() <= 64 && N0->hasOneUse()) { if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) { // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index b8a3dd014901..328e9430d635 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -3464,8 +3464,11 @@ void DAGTypeLegalizer::ExpandIntRes_MULFIX(SDNode *N, SDValue &Lo, SDValue SatMin = DAG.getConstant(MinVal, dl, VT); SDValue SatMax = DAG.getConstant(MaxVal, dl, VT); SDValue Zero = DAG.getConstant(0, dl, VT); - SDValue ProdNeg = DAG.getSetCC(dl, BoolVT, Product, Zero, ISD::SETLT); - Result = DAG.getSelect(dl, VT, ProdNeg, SatMax, SatMin); + // Xor the inputs, if resulting sign bit is 0 the product will be + // positive, else negative. + SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, LHS, RHS); + SDValue ProdNeg = DAG.getSetCC(dl, BoolVT, Xor, Zero, ISD::SETLT); + Result = DAG.getSelect(dl, VT, ProdNeg, SatMin, SatMax); Result = DAG.getSelect(dl, VT, Overflow, Result, Product); } else { // For unsigned multiplication, we only need to check the max since we diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index a08548393979..bd2ebfd0bd3b 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -8677,8 +8677,10 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call, RegisterSDNode *R = dyn_cast<RegisterSDNode>(AsmNodeOperands[CurOp+1]); Register TiedReg = R->getReg(); MVT RegVT = R->getSimpleValueType(0); - const TargetRegisterClass *RC = TiedReg.isVirtual() ? - MRI.getRegClass(TiedReg) : TRI.getMinimalPhysRegClass(TiedReg); + const TargetRegisterClass *RC = + TiedReg.isVirtual() ? MRI.getRegClass(TiedReg) + : RegVT != MVT::Untyped ? TLI.getRegClassFor(RegVT) + : TRI.getMinimalPhysRegClass(TiedReg); unsigned NumRegs = InlineAsm::getNumOperandRegisters(OpFlag); for (unsigned i = 0; i != NumRegs; ++i) Regs.push_back(MRI.createVirtualRegister(RC)); diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 5e1786958b6f..7f80ce37e28a 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -8155,8 +8155,11 @@ TargetLowering::expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const { APInt MaxVal = APInt::getSignedMaxValue(VTSize); SDValue SatMin = DAG.getConstant(MinVal, dl, VT); SDValue SatMax = DAG.getConstant(MaxVal, dl, VT); - SDValue ProdNeg = DAG.getSetCC(dl, BoolVT, Product, Zero, ISD::SETLT); - Result = DAG.getSelect(dl, VT, ProdNeg, SatMax, SatMin); + // Xor the inputs, if resulting sign bit is 0 the product will be + // positive, else negative. + SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, LHS, RHS); + SDValue ProdNeg = DAG.getSetCC(dl, BoolVT, Xor, Zero, ISD::SETLT); + Result = DAG.getSelect(dl, VT, ProdNeg, SatMin, SatMax); return DAG.getSelect(dl, VT, Overflow, Result, Product); } else if (!Signed && isOperationLegalOrCustom(ISD::UMULO, VT)) { SDValue Result = diff --git a/llvm/lib/Linker/LinkModules.cpp b/llvm/lib/Linker/LinkModules.cpp index 97d6f8cd8075..efdbc49cdf47 100644 --- a/llvm/lib/Linker/LinkModules.cpp +++ b/llvm/lib/Linker/LinkModules.cpp @@ -177,9 +177,25 @@ bool ModuleLinker::computeResultingSelectionKind(StringRef ComdatName, // Go with Dst. LinkFromSrc = false; break; - case Comdat::SelectionKind::NoDeduplicate: - return emitError("Linking COMDATs named '" + ComdatName + - "': nodeduplicate has been violated!"); + case Comdat::SelectionKind::NoDeduplicate: { + const GlobalVariable *DstGV; + const GlobalVariable *SrcGV; + if (getComdatLeader(DstM, ComdatName, DstGV) || + getComdatLeader(*SrcM, ComdatName, SrcGV)) + return true; + + if (SrcGV->isWeakForLinker()) { + // Go with Dst. + LinkFromSrc = false; + } else if (DstGV->isWeakForLinker()) { + // Go with Src. + LinkFromSrc = true; + } else { + return emitError("Linking COMDATs named '" + ComdatName + + "': nodeduplicate has been violated!"); + } + break; + } case Comdat::SelectionKind::ExactMatch: case Comdat::SelectionKind::Largest: case Comdat::SelectionKind::SameSize: { diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp index 79fcc8569b6d..0bc955dbbfea 100644 --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -1784,9 +1784,12 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, MPM.addPass(GlobalOptPass()); // Garbage collect dead functions. - // FIXME: Add ArgumentPromotion pass after once it's ported. MPM.addPass(GlobalDCEPass()); + // If we didn't decide to inline a function, check to see if we can + // transform it to pass arguments by value instead of by reference. + MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(ArgumentPromotionPass())); + FunctionPassManager FPM; // The IPO Passes may leave cruft around. Clean up after them. FPM.addPass(InstCombinePass()); diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index b27a02b8c182..60c00f47859b 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -13680,6 +13680,8 @@ static bool isEssentiallyExtractHighSubvector(SDValue N) { N = N.getOperand(0); if (N.getOpcode() != ISD::EXTRACT_SUBVECTOR) return false; + if (N.getOperand(0).getValueType().isScalableVector()) + return false; return cast<ConstantSDNode>(N.getOperand(1))->getAPIntValue() == N.getOperand(0).getValueType().getVectorNumElements() / 2; } diff --git a/llvm/lib/Target/AArch64/SMEInstrFormats.td b/llvm/lib/Target/AArch64/SMEInstrFormats.td index 62089166f4b7..00fd374587bc 100644 --- a/llvm/lib/Target/AArch64/SMEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SMEInstrFormats.td @@ -480,7 +480,7 @@ multiclass sme_vector_to_tile_aliases<Instruction inst, MatrixTileVectorOperand tile_ty, ZPRRegOp zpr_ty, Operand imm_ty> { def : InstAlias<"mov\t$ZAd[$Rv, $imm], $Pg/m, $Zn", - (inst tile_ty:$ZAd, MatrixIndexGPR32Op12_15:$Rv, imm0_15:$imm, PPR3bAny:$Pg, zpr_ty:$Zn), 1>; + (inst tile_ty:$ZAd, MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm, PPR3bAny:$Pg, zpr_ty:$Zn), 1>; } multiclass sme_vector_v_to_tile<string mnemonic, bit is_col> { diff --git a/llvm/lib/Target/M68k/AsmParser/M68kAsmParser.cpp b/llvm/lib/Target/M68k/AsmParser/M68kAsmParser.cpp index d8465f6d682b..94126e179462 100644 --- a/llvm/lib/Target/M68k/AsmParser/M68kAsmParser.cpp +++ b/llvm/lib/Target/M68k/AsmParser/M68kAsmParser.cpp @@ -117,14 +117,14 @@ struct M68kMemOp { class M68kOperand : public MCParsedAsmOperand { typedef MCParsedAsmOperand Base; - enum class Kind { + enum class KindTy { Invalid, Token, Imm, MemOp, }; - Kind Kind; + KindTy Kind; SMLoc Start, End; union { StringRef Token; @@ -134,7 +134,7 @@ class M68kOperand : public MCParsedAsmOperand { }; public: - M68kOperand(enum Kind Kind, SMLoc Start, SMLoc End) + M68kOperand(KindTy Kind, SMLoc Start, SMLoc End) : Base(), Kind(Kind), Start(Start), End(End) {} SMLoc getStartLoc() const override { return Start; } @@ -143,7 +143,7 @@ public: void print(raw_ostream &OS) const override; bool isMem() const override { return false; } - bool isMemOp() const { return Kind == Kind::MemOp; } + bool isMemOp() const { return Kind == KindTy::MemOp; } static void addExpr(MCInst &Inst, const MCExpr *Expr); @@ -248,7 +248,7 @@ void M68kOperand::addExpr(MCInst &Inst, const MCExpr *Expr) { // Reg bool M68kOperand::isReg() const { - return Kind == Kind::MemOp && MemOp.Op == M68kMemOp::Kind::Reg; + return Kind == KindTy::MemOp && MemOp.Op == M68kMemOp::Kind::Reg; } unsigned M68kOperand::getReg() const { @@ -265,13 +265,13 @@ void M68kOperand::addRegOperands(MCInst &Inst, unsigned N) const { std::unique_ptr<M68kOperand> M68kOperand::createMemOp(M68kMemOp MemOp, SMLoc Start, SMLoc End) { - auto Op = std::make_unique<M68kOperand>(Kind::MemOp, Start, End); + auto Op = std::make_unique<M68kOperand>(KindTy::MemOp, Start, End); Op->MemOp = MemOp; return Op; } // Token -bool M68kOperand::isToken() const { return Kind == Kind::Token; } +bool M68kOperand::isToken() const { return Kind == KindTy::Token; } StringRef M68kOperand::getToken() const { assert(isToken()); return Token; @@ -279,13 +279,13 @@ StringRef M68kOperand::getToken() const { std::unique_ptr<M68kOperand> M68kOperand::createToken(StringRef Token, SMLoc Start, SMLoc End) { - auto Op = std::make_unique<M68kOperand>(Kind::Token, Start, End); + auto Op = std::make_unique<M68kOperand>(KindTy::Token, Start, End); Op->Token = Token; return Op; } // Imm -bool M68kOperand::isImm() const { return Kind == Kind::Imm; } +bool M68kOperand::isImm() const { return Kind == KindTy::Imm; } void M68kOperand::addImmOperands(MCInst &Inst, unsigned N) const { assert(isImm() && "wrong oeprand kind"); assert((N == 1) && "can only handle one register operand"); @@ -295,7 +295,7 @@ void M68kOperand::addImmOperands(MCInst &Inst, unsigned N) const { std::unique_ptr<M68kOperand> M68kOperand::createImm(const MCExpr *Expr, SMLoc Start, SMLoc End) { - auto Op = std::make_unique<M68kOperand>(Kind::Imm, Start, End); + auto Op = std::make_unique<M68kOperand>(KindTy::Imm, Start, End); Op->Expr = Expr; return Op; } @@ -842,19 +842,19 @@ bool M68kAsmParser::MatchAndEmitInstruction(SMLoc Loc, unsigned &Opcode, void M68kOperand::print(raw_ostream &OS) const { switch (Kind) { - case Kind::Invalid: + case KindTy::Invalid: OS << "invalid"; break; - case Kind::Token: + case KindTy::Token: OS << "token '" << Token << "'"; break; - case Kind::Imm: + case KindTy::Imm: OS << "immediate " << Imm; break; - case Kind::MemOp: + case KindTy::MemOp: MemOp.print(OS); break; } diff --git a/llvm/lib/Target/M68k/M68kTargetMachine.cpp b/llvm/lib/Target/M68k/M68kTargetMachine.cpp index 5b8fd3d41b14..cb7d8f8b25e3 100644 --- a/llvm/lib/Target/M68k/M68kTargetMachine.cpp +++ b/llvm/lib/Target/M68k/M68kTargetMachine.cpp @@ -49,10 +49,14 @@ std::string computeDataLayout(const Triple &TT, StringRef CPU, // FIXME how to wire it with the used object format? Ret += "-m:e"; - // M68k pointers are always 32 bit wide even for 16 bit cpus - Ret += "-p:32:32"; - - // M68k requires i8 to align on 2 byte boundry + // M68k pointers are always 32 bit wide even for 16-bit CPUs. + // The ABI only specifies 16-bit alignment. + // On at least the 68020+ with a 32-bit bus, there is a performance benefit + // to having 32-bit alignment. + Ret += "-p:32:16:32"; + + // Bytes do not require special alignment, words are word aligned and + // long words are word aligned at minimum. Ret += "-i8:8:8-i16:16:16-i32:16:32"; // FIXME no floats at the moment diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp index a541daaff9f4..207101763ac2 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp @@ -1223,7 +1223,7 @@ bool RISCVInstrInfo::findCommutedOpIndices(const MachineInstr &MI, // Both of operands are not fixed. Set one of commutable // operands to the tied source. CommutableOpIdx1 = 1; - } else if (SrcOpIdx1 == CommutableOpIdx1) { + } else if (SrcOpIdx1 == CommuteAnyOperandIndex) { // Only one of the operands is not fixed. CommutableOpIdx1 = SrcOpIdx2; } diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp index 171d59ae4c6b..ae5108b0cb0d 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp @@ -157,7 +157,7 @@ private: void addLoadStoreOperands(const Address &Addr, const MachineInstrBuilder &MIB, MachineMemOperand *MMO); unsigned maskI1Value(unsigned Reg, const Value *V); - unsigned getRegForI1Value(const Value *V, bool &Not); + unsigned getRegForI1Value(const Value *V, const BasicBlock *BB, bool &Not); unsigned zeroExtendToI32(unsigned Reg, const Value *V, MVT::SimpleValueType From); unsigned signExtendToI32(unsigned Reg, const Value *V, @@ -418,20 +418,17 @@ unsigned WebAssemblyFastISel::maskI1Value(unsigned Reg, const Value *V) { return zeroExtendToI32(Reg, V, MVT::i1); } -unsigned WebAssemblyFastISel::getRegForI1Value(const Value *V, bool &Not) { +unsigned WebAssemblyFastISel::getRegForI1Value(const Value *V, + const BasicBlock *BB, + bool &Not) { if (const auto *ICmp = dyn_cast<ICmpInst>(V)) if (const ConstantInt *C = dyn_cast<ConstantInt>(ICmp->getOperand(1))) - if (ICmp->isEquality() && C->isZero() && C->getType()->isIntegerTy(32)) { + if (ICmp->isEquality() && C->isZero() && C->getType()->isIntegerTy(32) && + ICmp->getParent() == BB) { Not = ICmp->isTrueWhenEqual(); return getRegForValue(ICmp->getOperand(0)); } - Value *NotV; - if (match(V, m_Not(m_Value(NotV))) && V->getType()->isIntegerTy(32)) { - Not = true; - return getRegForValue(NotV); - } - Not = false; unsigned Reg = getRegForValue(V); if (Reg == 0) @@ -912,7 +909,8 @@ bool WebAssemblyFastISel::selectSelect(const Instruction *I) { const auto *Select = cast<SelectInst>(I); bool Not; - unsigned CondReg = getRegForI1Value(Select->getCondition(), Not); + unsigned CondReg = + getRegForI1Value(Select->getCondition(), I->getParent(), Not); if (CondReg == 0) return false; @@ -1312,7 +1310,7 @@ bool WebAssemblyFastISel::selectBr(const Instruction *I) { MachineBasicBlock *FBB = FuncInfo.MBBMap[Br->getSuccessor(1)]; bool Not; - unsigned CondReg = getRegForI1Value(Br->getCondition(), Not); + unsigned CondReg = getRegForI1Value(Br->getCondition(), Br->getParent(), Not); if (CondReg == 0) return false; diff --git a/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp b/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp index 37329b489555..eea848d3eb2f 100644 --- a/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp +++ b/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp @@ -33,6 +33,19 @@ using namespace llvm; namespace { +// Determine if a promotion alias should be created for a symbol name. +static bool allowPromotionAlias(const std::string &Name) { + // Promotion aliases are used only in inline assembly. It's safe to + // simply skip unusual names. Subset of MCAsmInfo::isAcceptableChar() + // and MCAsmInfoXCOFF::isAcceptableChar(). + for (const char &C : Name) { + if (isAlnum(C) || C == '_' || C == '.') + continue; + return false; + } + return true; +} + // Promote each local-linkage entity defined by ExportM and used by ImportM by // changing visibility and appending the given ModuleId. void promoteInternals(Module &ExportM, Module &ImportM, StringRef ModuleId, @@ -55,6 +68,7 @@ void promoteInternals(Module &ExportM, Module &ImportM, StringRef ModuleId, } } + std::string OldName = Name.str(); std::string NewName = (Name + ModuleId).str(); if (const auto *C = ExportGV.getComdat()) @@ -69,6 +83,13 @@ void promoteInternals(Module &ExportM, Module &ImportM, StringRef ModuleId, ImportGV->setName(NewName); ImportGV->setVisibility(GlobalValue::HiddenVisibility); } + + if (isa<Function>(&ExportGV) && allowPromotionAlias(OldName)) { + // Create a local alias with the original name to avoid breaking + // references from inline assembly. + std::string Alias = ".set " + OldName + "," + NewName + "\n"; + ExportM.appendModuleInlineAsm(Alias); + } } if (!RenamedComdats.empty()) diff --git a/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp b/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp index be21db9087d2..e4ec5f266eb8 100644 --- a/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp +++ b/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp @@ -221,6 +221,10 @@ bool AlignmentFromAssumptionsPass::extractAlignmentInfo(CallInst *I, AAPtr = AAPtr->stripPointerCastsSameRepresentation(); AlignSCEV = SE->getSCEV(AlignOB.Inputs[1].get()); AlignSCEV = SE->getTruncateOrZeroExtend(AlignSCEV, Int64Ty); + if (!isa<SCEVConstant>(AlignSCEV)) + // Added to suppress a crash because consumer doesn't expect non-constant + // alignments in the assume bundle. TODO: Consider generalizing caller. + return false; if (AlignOB.Inputs.size() == 3) OffSCEV = SE->getSCEV(AlignOB.Inputs[2].get()); else diff --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp index 3d60e205b002..a153f393448c 100644 --- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -1247,6 +1247,11 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad( mayLoopAccessLocation(StoreBasePtr, ModRefInfo::ModRef, CurLoop, BECount, StoreSize, *AA, Stores); if (UseMemMove) { + // For memmove case it's not enough to guarantee that loop doesn't access + // TheStore and TheLoad. Additionally we need to make sure that TheStore is + // the only user of TheLoad. + if (!TheLoad->hasOneUse()) + return Changed; Stores.insert(TheLoad); if (mayLoopAccessLocation(StoreBasePtr, ModRefInfo::ModRef, CurLoop, BECount, StoreSize, *AA, Stores)) { diff --git a/llvm/tools/llvm-cov/CoverageExporterLcov.cpp b/llvm/tools/llvm-cov/CoverageExporterLcov.cpp index 6cf5d9285b90..0096a3d44d85 100644 --- a/llvm/tools/llvm-cov/CoverageExporterLcov.cpp +++ b/llvm/tools/llvm-cov/CoverageExporterLcov.cpp @@ -167,7 +167,7 @@ void renderLineSummary(raw_ostream &OS, const FileCoverageSummary &Summary) { void renderBranchSummary(raw_ostream &OS, const FileCoverageSummary &Summary) { OS << "BRF:" << Summary.BranchCoverage.getNumBranches() << '\n' - << "BFH:" << Summary.BranchCoverage.getCovered() << '\n'; + << "BRH:" << Summary.BranchCoverage.getCovered() << '\n'; } void renderFile(raw_ostream &OS, const coverage::CoverageMapping &Coverage, diff --git a/llvm/tools/llvm-objdump/llvm-objdump.cpp b/llvm/tools/llvm-objdump/llvm-objdump.cpp index 48ae92f734c7..9d461b08f3f8 100644 --- a/llvm/tools/llvm-objdump/llvm-objdump.cpp +++ b/llvm/tools/llvm-objdump/llvm-objdump.cpp @@ -1286,6 +1286,10 @@ static void disassembleObject(const Target *TheTarget, const ObjectFile *Obj, if (shouldAdjustVA(Section)) VMAAdjustment = AdjustVMA; + // In executable and shared objects, r_offset holds a virtual address. + // Subtract SectionAddr from the r_offset field of a relocation to get + // the section offset. + uint64_t RelAdjustment = Obj->isRelocatableObject() ? 0 : SectionAddr; uint64_t Size; uint64_t Index; bool PrintedSection = false; @@ -1432,7 +1436,8 @@ static void disassembleObject(const Target *TheTarget, const ObjectFile *Obj, // For --reloc: print zero blocks patched by relocations, so that // relocations can be shown in the dump. if (RelCur != RelEnd) - MaxOffset = RelCur->getOffset() - Index; + MaxOffset = std::min(RelCur->getOffset() - RelAdjustment - Index, + MaxOffset); if (size_t N = countSkippableZeroBytes(Bytes.slice(Index, MaxOffset))) { @@ -1581,7 +1586,7 @@ static void disassembleObject(const Target *TheTarget, const ObjectFile *Obj, if (Obj->getArch() != Triple::hexagon) { // Print relocation for instruction and data. while (RelCur != RelEnd) { - uint64_t Offset = RelCur->getOffset(); + uint64_t Offset = RelCur->getOffset() - RelAdjustment; // If this relocation is hidden, skip it. if (getHidden(*RelCur) || SectionAddr + Offset < StartAddress) { ++RelCur; |