aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm-project/clang/lib/Driver/Driver.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm-project/clang/lib/Driver/Driver.cpp')
-rw-r--r--contrib/llvm-project/clang/lib/Driver/Driver.cpp39
1 files changed, 30 insertions, 9 deletions
diff --git a/contrib/llvm-project/clang/lib/Driver/Driver.cpp b/contrib/llvm-project/clang/lib/Driver/Driver.cpp
index 0da32dae2ef6..3a8400a55741 100644
--- a/contrib/llvm-project/clang/lib/Driver/Driver.cpp
+++ b/contrib/llvm-project/clang/lib/Driver/Driver.cpp
@@ -2930,7 +2930,7 @@ class OffloadingActionBuilder final {
return false;
Relocatable = Args.hasFlag(options::OPT_fgpu_rdc,
- options::OPT_fno_gpu_rdc, /*Default=*/false);
+ options::OPT_fno_gpu_rdc, /*Default=*/false);
const ToolChain *HostTC = C.getSingleOffloadToolChain<Action::OFK_Host>();
assert(HostTC && "No toolchain for host compilation.");
@@ -3326,7 +3326,7 @@ class OffloadingActionBuilder final {
AssociatedOffloadKind);
if (CompileDeviceOnly && CurPhase == FinalPhase && BundleOutput &&
- BundleOutput.getValue()) {
+ BundleOutput.value()) {
for (unsigned I = 0, E = GpuArchList.size(); I != E; ++I) {
OffloadAction::DeviceDependences DDep;
DDep.add(*CudaDeviceActions[I], *ToolChains.front(), GpuArchList[I],
@@ -4355,7 +4355,17 @@ Driver::getOffloadArchs(Compilation &C, const llvm::opt::DerivedArgList &Args,
return KnownArchs.lookup(TC);
llvm::DenseSet<StringRef> Archs;
- for (auto &Arg : Args) {
+ for (auto *Arg : Args) {
+ // Extract any '--[no-]offload-arch' arguments intended for this toolchain.
+ std::unique_ptr<llvm::opt::Arg> ExtractedArg = nullptr;
+ if (Arg->getOption().matches(options::OPT_Xopenmp_target_EQ) &&
+ ToolChain::getOpenMPTriple(Arg->getValue(0)) == TC->getTriple()) {
+ Arg->claim();
+ unsigned Index = Args.getBaseArgs().MakeIndex(Arg->getValue(1));
+ ExtractedArg = getOpts().ParseOneArg(Args, Index);
+ Arg = ExtractedArg.get();
+ }
+
if (Arg->getOption().matches(options::OPT_offload_arch_EQ)) {
for (StringRef Arch : llvm::split(Arg->getValue(), ","))
Archs.insert(getCanonicalArchString(C, Args, Arch, TC->getTriple()));
@@ -4425,8 +4435,7 @@ Action *Driver::BuildOffloadingActions(Compilation &C,
// Get the product of all bound architectures and toolchains.
SmallVector<std::pair<const ToolChain *, StringRef>> TCAndArchs;
for (const ToolChain *TC : ToolChains)
- for (StringRef Arch : getOffloadArchs(
- C, C.getArgsForToolChain(TC, "generic", Kind), Kind, TC))
+ for (StringRef Arch : getOffloadArchs(C, Args, Kind, TC))
TCAndArchs.push_back(std::make_pair(TC, Arch));
for (unsigned I = 0, E = TCAndArchs.size(); I != E; ++I)
@@ -4477,11 +4486,23 @@ Action *Driver::BuildOffloadingActions(Compilation &C,
if (offloadDeviceOnly())
return C.MakeAction<OffloadAction>(DDeps, types::TY_Nothing);
- Action *OffloadPackager =
- C.MakeAction<OffloadPackagerJobAction>(OffloadActions, types::TY_Image);
OffloadAction::DeviceDependences DDep;
- DDep.add(*OffloadPackager, *C.getSingleOffloadToolChain<Action::OFK_Host>(),
- nullptr, Action::OFK_None);
+ if (C.isOffloadingHostKind(Action::OFK_Cuda) &&
+ !Args.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc, false)) {
+ // If we are not in RDC-mode we just emit the final CUDA fatbinary for each
+ // translation unit without requiring any linking.
+ Action *FatbinAction =
+ C.MakeAction<LinkJobAction>(OffloadActions, types::TY_CUDA_FATBIN);
+ DDep.add(*FatbinAction, *C.getSingleOffloadToolChain<Action::OFK_Cuda>(),
+ nullptr, Action::OFK_Cuda);
+ } else {
+ // Package all the offloading actions into a single output that can be
+ // embedded in the host and linked.
+ Action *PackagerAction =
+ C.MakeAction<OffloadPackagerJobAction>(OffloadActions, types::TY_Image);
+ DDep.add(*PackagerAction, *C.getSingleOffloadToolChain<Action::OFK_Host>(),
+ nullptr, Action::OFK_None);
+ }
OffloadAction::HostDependence HDep(
*HostAction, *C.getSingleOffloadToolChain<Action::OFK_Host>(),
/*BoundArch=*/nullptr, isa<CompileJobAction>(HostAction) ? DDep : DDeps);