diff options
Diffstat (limited to 'contrib/llvm-project/clang/lib/Driver/Driver.cpp')
-rw-r--r-- | contrib/llvm-project/clang/lib/Driver/Driver.cpp | 39 |
1 files changed, 30 insertions, 9 deletions
diff --git a/contrib/llvm-project/clang/lib/Driver/Driver.cpp b/contrib/llvm-project/clang/lib/Driver/Driver.cpp index 0da32dae2ef6..3a8400a55741 100644 --- a/contrib/llvm-project/clang/lib/Driver/Driver.cpp +++ b/contrib/llvm-project/clang/lib/Driver/Driver.cpp @@ -2930,7 +2930,7 @@ class OffloadingActionBuilder final { return false; Relocatable = Args.hasFlag(options::OPT_fgpu_rdc, - options::OPT_fno_gpu_rdc, /*Default=*/false); + options::OPT_fno_gpu_rdc, /*Default=*/false); const ToolChain *HostTC = C.getSingleOffloadToolChain<Action::OFK_Host>(); assert(HostTC && "No toolchain for host compilation."); @@ -3326,7 +3326,7 @@ class OffloadingActionBuilder final { AssociatedOffloadKind); if (CompileDeviceOnly && CurPhase == FinalPhase && BundleOutput && - BundleOutput.getValue()) { + BundleOutput.value()) { for (unsigned I = 0, E = GpuArchList.size(); I != E; ++I) { OffloadAction::DeviceDependences DDep; DDep.add(*CudaDeviceActions[I], *ToolChains.front(), GpuArchList[I], @@ -4355,7 +4355,17 @@ Driver::getOffloadArchs(Compilation &C, const llvm::opt::DerivedArgList &Args, return KnownArchs.lookup(TC); llvm::DenseSet<StringRef> Archs; - for (auto &Arg : Args) { + for (auto *Arg : Args) { + // Extract any '--[no-]offload-arch' arguments intended for this toolchain. + std::unique_ptr<llvm::opt::Arg> ExtractedArg = nullptr; + if (Arg->getOption().matches(options::OPT_Xopenmp_target_EQ) && + ToolChain::getOpenMPTriple(Arg->getValue(0)) == TC->getTriple()) { + Arg->claim(); + unsigned Index = Args.getBaseArgs().MakeIndex(Arg->getValue(1)); + ExtractedArg = getOpts().ParseOneArg(Args, Index); + Arg = ExtractedArg.get(); + } + if (Arg->getOption().matches(options::OPT_offload_arch_EQ)) { for (StringRef Arch : llvm::split(Arg->getValue(), ",")) Archs.insert(getCanonicalArchString(C, Args, Arch, TC->getTriple())); @@ -4425,8 +4435,7 @@ Action *Driver::BuildOffloadingActions(Compilation &C, // Get the product of all bound architectures and toolchains. SmallVector<std::pair<const ToolChain *, StringRef>> TCAndArchs; for (const ToolChain *TC : ToolChains) - for (StringRef Arch : getOffloadArchs( - C, C.getArgsForToolChain(TC, "generic", Kind), Kind, TC)) + for (StringRef Arch : getOffloadArchs(C, Args, Kind, TC)) TCAndArchs.push_back(std::make_pair(TC, Arch)); for (unsigned I = 0, E = TCAndArchs.size(); I != E; ++I) @@ -4477,11 +4486,23 @@ Action *Driver::BuildOffloadingActions(Compilation &C, if (offloadDeviceOnly()) return C.MakeAction<OffloadAction>(DDeps, types::TY_Nothing); - Action *OffloadPackager = - C.MakeAction<OffloadPackagerJobAction>(OffloadActions, types::TY_Image); OffloadAction::DeviceDependences DDep; - DDep.add(*OffloadPackager, *C.getSingleOffloadToolChain<Action::OFK_Host>(), - nullptr, Action::OFK_None); + if (C.isOffloadingHostKind(Action::OFK_Cuda) && + !Args.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc, false)) { + // If we are not in RDC-mode we just emit the final CUDA fatbinary for each + // translation unit without requiring any linking. + Action *FatbinAction = + C.MakeAction<LinkJobAction>(OffloadActions, types::TY_CUDA_FATBIN); + DDep.add(*FatbinAction, *C.getSingleOffloadToolChain<Action::OFK_Cuda>(), + nullptr, Action::OFK_Cuda); + } else { + // Package all the offloading actions into a single output that can be + // embedded in the host and linked. + Action *PackagerAction = + C.MakeAction<OffloadPackagerJobAction>(OffloadActions, types::TY_Image); + DDep.add(*PackagerAction, *C.getSingleOffloadToolChain<Action::OFK_Host>(), + nullptr, Action::OFK_None); + } OffloadAction::HostDependence HDep( *HostAction, *C.getSingleOffloadToolChain<Action::OFK_Host>(), /*BoundArch=*/nullptr, isa<CompileJobAction>(HostAction) ? DDep : DDeps); |