1 files changed, 230 insertions, 137 deletions
diff --git a/lib/Driver/Driver.cpp b/lib/Driver/Driver.cpp
index 180c412bd791..4f8481c0beec 100644
--- a/lib/Driver/Driver.cpp
+++ b/lib/Driver/Driver.cpp
@@ -11,6 +11,7 @@
 #include "InputInfo.h"
 #include "ToolChains.h"
 #include "clang/Basic/Version.h"
+#include "clang/Basic/VirtualFileSystem.h"
 #include "clang/Config/config.h"
 #include "clang/Driver/Action.h"
 #include "clang/Driver/Compilation.h"
@@ -46,9 +47,11 @@ using namespace clang;
 using namespace llvm::opt;
 
 Driver::Driver(StringRef ClangExecutable, StringRef DefaultTargetTriple,
-               DiagnosticsEngine &Diags)
-    : Opts(createDriverOptTable()), Diags(Diags), Mode(GCCMode),
-      SaveTemps(SaveTempsNone), ClangExecutable(ClangExecutable),
+               DiagnosticsEngine &Diags,
+               IntrusiveRefCntPtr<vfs::FileSystem> VFS)
+    : Opts(createDriverOptTable()), Diags(Diags), VFS(VFS), Mode(GCCMode),
+      SaveTemps(SaveTempsNone), LTOMode(LTOK_None),
+      ClangExecutable(ClangExecutable),
       SysRoot(DEFAULT_SYSROOT), UseStdLib(true),
       DefaultTargetTriple(DefaultTargetTriple),
       DriverTitle("clang LLVM compiler"), CCPrintOptionsFilename(nullptr),
@@ -57,8 +60,13 @@ Driver::Driver(StringRef ClangExecutable, StringRef DefaultTargetTriple,
       CCGenDiagnostics(false), CCCGenericGCCName(""), CheckInputsExist(true),
       CCCUsePCH(true), SuppressMissingInputWarning(false) {
 
+  // Provide a sane fallback if no VFS is specified.
+  if (!this->VFS)
+    this->VFS = vfs::getRealFileSystem();
+
   Name = llvm::sys::path::filename(ClangExecutable);
   Dir = llvm::sys::path::parent_path(ClangExecutable);
+  InstalledDir = Dir; // Provide a sensible default installed dir.
 
   // Compute the path to the resource directory.
   StringRef ClangResourceDir(CLANG_RESOURCE_DIR);
@@ -174,10 +182,8 @@ phases::ID Driver::getFinalPhase(const DerivedArgList &DAL,
   } else if ((PhaseArg = DAL.getLastArg(options::OPT_S))) {
     FinalPhase = phases::Backend;
 
-    // -c and partial CUDA compilations only run up to the assembler.
-  } else if ((PhaseArg = DAL.getLastArg(options::OPT_c)) ||
-             (PhaseArg = DAL.getLastArg(options::OPT_cuda_device_only)) ||
-             (PhaseArg = DAL.getLastArg(options::OPT_cuda_host_only))) {
+    // -c compilation only runs up to the assembler.
+  } else if ((PhaseArg = DAL.getLastArg(options::OPT_c))) {
     FinalPhase = phases::Assemble;
 
     // Otherwise do everything.
@@ -203,6 +209,7 @@ DerivedArgList *Driver::TranslateInputArgs(const InputArgList &Args) const {
   DerivedArgList *DAL = new DerivedArgList(Args);
 
   bool HasNostdlib = Args.hasArg(options::OPT_nostdlib);
+  bool HasNodefaultlib = Args.hasArg(options::OPT_nodefaultlibs);
   for (Arg *A : Args) {
     // Unfortunately, we have to parse some forwarding options (-Xassembler,
     // -Xlinker, -Xpreprocessor) because we either integrate their functionality
@@ -217,7 +224,7 @@ DerivedArgList *Driver::TranslateInputArgs(const InputArgList &Args) const {
       DAL->AddFlagArg(A, Opts->getOption(options::OPT_Z_Xlinker__no_demangle));
 
       // Add the remaining values as Xlinker arguments.
-      for (const StringRef Val : A->getValues())
+      for (StringRef Val : A->getValues())
         if (Val != "--no-demangle")
           DAL->AddSeparateArg(A, Opts->getOption(options::OPT_Xlinker), Val);
 
@@ -246,7 +253,7 @@ DerivedArgList *Driver::TranslateInputArgs(const InputArgList &Args) const {
       StringRef Value = A->getValue();
 
       // Rewrite unless -nostdlib is present.
-      if (!HasNostdlib && Value == "stdc++") {
+      if (!HasNostdlib && !HasNodefaultlib && Value == "stdc++") {
         DAL->AddFlagArg(A, Opts->getOption(options::OPT_Z_reserved_lib_stdcxx));
         continue;
       }
@@ -261,7 +268,7 @@ DerivedArgList *Driver::TranslateInputArgs(const InputArgList &Args) const {
     // Pick up inputs via the -- option.
     if (A->getOption().matches(options::OPT__DASH_DASH)) {
       A->claim();
-      for (const StringRef Val : A->getValues())
+      for (StringRef Val : A->getValues())
         DAL->append(MakeInputArg(*DAL, Opts, Val));
       continue;
     }
@@ -327,7 +334,8 @@ static llvm::Triple computeTargetTriple(StringRef DefaultTargetTriple,
   }
 
   // Skip further flag support on OSes which don't support '-m32' or '-m64'.
-  if (Target.getArchName() == "tce" || Target.getOS() == llvm::Triple::Minix)
+  if (Target.getArch() == llvm::Triple::tce ||
+      Target.getOS() == llvm::Triple::Minix)
     return Target;
 
   // Handle pseudo-target flags '-m64', '-mx32', '-m32' and '-m16'.
@@ -360,6 +368,32 @@ static llvm::Triple computeTargetTriple(StringRef DefaultTargetTriple,
   return Target;
 }
 
+// \brief Parse the LTO options and record the type of LTO compilation
+// based on which -f(no-)?lto(=.*)? option occurs last.
+void Driver::setLTOMode(const llvm::opt::ArgList &Args) {
+  LTOMode = LTOK_None;
+  if (!Args.hasFlag(options::OPT_flto, options::OPT_flto_EQ,
+                    options::OPT_fno_lto, false))
+    return;
+
+  StringRef LTOName("full");
+
+  const Arg *A = Args.getLastArg(options::OPT_flto_EQ);
+  if (A)
+    LTOName = A->getValue();
+
+  LTOMode = llvm::StringSwitch<LTOKind>(LTOName)
+                .Case("full", LTOK_Full)
+                .Case("thin", LTOK_Thin)
+                .Default(LTOK_Unknown);
+
+  if (LTOMode == LTOK_Unknown) {
+    assert(A);
+    Diag(diag::err_drv_unsupported_option_argument) << A->getOption().getName()
+                                                    << A->getValue();
+  }
+}
+
 Compilation *Driver::BuildCompilation(ArrayRef<const char *> ArgList) {
   llvm::PrettyStackTraceString CrashInfo("Compilation construction");
 
@@ -387,6 +421,9 @@ Compilation *Driver::BuildCompilation(ArrayRef<const char *> ArgList) {
 
   InputArgList Args = ParseArgStrings(ArgList.slice(1));
 
+  // Silence driver warnings if requested
+  Diags.setIgnoreAllWarnings(Args.hasArg(options::OPT_w));
+
   // -no-canonical-prefixes is used very early in main.
   Args.ClaimAllArgs(options::OPT_no_canonical_prefixes);
 
@@ -411,6 +448,7 @@ Compilation *Driver::BuildCompilation(ArrayRef<const char *> ArgList) {
     // clang-cl targets MSVC-style Win32.
     llvm::Triple T(DefaultTargetTriple);
     T.setOS(llvm::Triple::Win32);
+    T.setVendor(llvm::Triple::PC);
     T.setEnvironment(llvm::Triple::MSVC);
     DefaultTargetTriple = T.str();
   }
@@ -439,6 +477,8 @@ Compilation *Driver::BuildCompilation(ArrayRef<const char *> ArgList) {
                     .Default(SaveTempsCwd);
   }
 
+  setLTOMode(Args);
+
   std::unique_ptr<llvm::opt::InputArgList> UArgs =
       llvm::make_unique<InputArgList>(std::move(Args));
 
@@ -452,6 +492,10 @@ Compilation *Driver::BuildCompilation(ArrayRef<const char *> ArgList) {
   // The compilation takes ownership of Args.
   Compilation *C = new Compilation(*this, TC, UArgs.release(), TranslatedArgs);
 
+  C->setCudaDeviceToolChain(
+      &getToolChain(C->getArgs(), llvm::Triple(TC.getTriple().isArch64Bit()
+                                                   ? "nvptx64-nvidia-cuda"
+                                                   : "nvptx-nvidia-cuda")));
   if (!HandleImmediateArgs(*C))
     return C;
 
@@ -462,10 +506,9 @@ Compilation *Driver::BuildCompilation(ArrayRef<const char *> ArgList) {
   // Construct the list of abstract actions to perform for this compilation. On
   // MachO targets this uses the driver-driver and universal actions.
   if (TC.getTriple().isOSBinFormatMachO())
-    BuildUniversalActions(C->getDefaultToolChain(), C->getArgs(), Inputs,
-                          C->getActions());
+    BuildUniversalActions(*C, C->getDefaultToolChain(), Inputs);
   else
-    BuildActions(C->getDefaultToolChain(), C->getArgs(), Inputs,
+    BuildActions(*C, C->getDefaultToolChain(), C->getArgs(), Inputs,
                  C->getActions());
 
   if (CCCPrintPhases) {
@@ -578,9 +621,9 @@ void Driver::generateCompilationDiagnostics(Compilation &C,
   // Darwin OSes this uses the driver-driver and builds universal actions.
   const ToolChain &TC = C.getDefaultToolChain();
   if (TC.getTriple().isOSBinFormatMachO())
-    BuildUniversalActions(TC, C.getArgs(), Inputs, C.getActions());
+    BuildUniversalActions(C, TC, Inputs);
   else
-    BuildActions(TC, C.getArgs(), Inputs, C.getActions());
+    BuildActions(C, TC, C.getArgs(), Inputs, C.getActions());
 
   BuildJobs(C);
 
@@ -761,6 +804,9 @@ void Driver::PrintVersion(const Compilation &C, raw_ostream &OS) const {
   } else
     OS << "Thread model: " << TC.getThreadModel();
   OS << '\n';
+
+  // Print out the install directory.
+  OS << "InstalledDir: " << InstalledDir << '\n';
 }
 
 /// PrintDiagnosticCategories - Implement the --print-diagnostic-categories
@@ -906,7 +952,7 @@ static unsigned PrintActions1(const Compilation &C, Action *A,
     os << '"' << CDA->getGpuArchName() << '"' << ", {"
        << PrintActions1(C, *CDA->begin(), Ids) << "}";
   } else {
-    ActionList *AL;
+    const ActionList *AL;
     if (CudaHostAction *CHA = dyn_cast<CudaHostAction>(A)) {
       os << "{" << PrintActions1(C, *CHA->begin(), Ids) << "}"
          << ", gpu binaries ";
@@ -914,12 +960,15 @@ static unsigned PrintActions1(const Compilation &C, Action *A,
     } else
       AL = &A->getInputs();
 
-    const char *Prefix = "{";
-    for (Action *PreRequisite : *AL) {
-      os << Prefix << PrintActions1(C, PreRequisite, Ids);
-      Prefix = ", ";
-    }
-    os << "}";
+    if (AL->size()) {
+      const char *Prefix = "{";
+      for (Action *PreRequisite : *AL) {
+        os << Prefix << PrintActions1(C, PreRequisite, Ids);
+        Prefix = ", ";
+      }
+      os << "}";
+    } else
+      os << "{}";
   }
 
   unsigned Id = Ids.size();
@@ -945,16 +994,17 @@ static bool ContainsCompileOrAssembleAction(const Action *A) {
       isa<AssembleJobAction>(A))
     return true;
 
-  for (Action::const_iterator it = A->begin(), ie = A->end(); it != ie; ++it)
-    if (ContainsCompileOrAssembleAction(*it))
+  for (const Action *Input : *A)
+    if (ContainsCompileOrAssembleAction(Input))
       return true;
 
   return false;
 }
 
-void Driver::BuildUniversalActions(const ToolChain &TC, DerivedArgList &Args,
-                                   const InputList &BAInputs,
-                                   ActionList &Actions) const {
+void Driver::BuildUniversalActions(Compilation &C, const ToolChain &TC,
+                                   const InputList &BAInputs) const {
+  DerivedArgList &Args = C.getArgs();
+  ActionList &Actions = C.getActions();
   llvm::PrettyStackTraceString CrashInfo("Building universal build actions");
   // Collect the list of architectures. Duplicates are allowed, but should only
   // be handled once (in the order seen).
@@ -983,13 +1033,11 @@ void Driver::BuildUniversalActions(const ToolChain &TC, DerivedArgList &Args,
     Archs.push_back(Args.MakeArgString(TC.getDefaultUniversalArchName()));
 
   ActionList SingleActions;
-  BuildActions(TC, Args, BAInputs, SingleActions);
+  BuildActions(C, TC, Args, BAInputs, SingleActions);
 
   // Add in arch bindings for every top level action, as well as lipo and
   // dsymutil steps if needed.
-  for (unsigned i = 0, e = SingleActions.size(); i != e; ++i) {
-    Action *Act = SingleActions[i];
-
+  for (Action* Act : SingleActions) {
     // Make sure we can lipo this kind of output. If not (and it is an actual
     // output) then we disallow, since we can't create an output file with the
     // right name without overwriting it. We could remove this oddity by just
@@ -1228,18 +1276,23 @@ void Driver::BuildInputs(const ToolChain &TC, DerivedArgList &Args,
   }
 }
 
-// For each unique --cuda-gpu-arch= argument creates a TY_CUDA_DEVICE input
-// action and then wraps each in CudaDeviceAction paired with appropriate GPU
-// arch name. If we're only building device-side code, each action remains
-// independent. Otherwise we pass device-side actions as inputs to a new
-// CudaHostAction which combines both host and device side actions.
+// For each unique --cuda-gpu-arch= argument creates a TY_CUDA_DEVICE
+// input action and then wraps each in CudaDeviceAction paired with
+// appropriate GPU arch name. In case of partial (i.e preprocessing
+// only) or device-only compilation, each device action is added to /p
+// Actions and /p Current is released. Otherwise the function creates
+// and returns a new CudaHostAction which wraps /p Current and device
+// side actions.
 static std::unique_ptr<Action>
-buildCudaActions(const Driver &D, const ToolChain &TC, DerivedArgList &Args,
-                 const Arg *InputArg, const types::ID InputType,
-                 std::unique_ptr<Action> Current, ActionList &Actions) {
-
-  assert(InputType == types::TY_CUDA &&
-         "CUDA Actions only apply to CUDA inputs.");
+buildCudaActions(Compilation &C, DerivedArgList &Args, const Arg *InputArg,
+                 std::unique_ptr<Action> HostAction, ActionList &Actions) {
+  Arg *PartialCompilationArg = Args.getLastArg(options::OPT_cuda_host_only,
+                                               options::OPT_cuda_device_only);
+  // Host-only compilation case.
+  if (PartialCompilationArg &&
+      PartialCompilationArg->getOption().matches(options::OPT_cuda_host_only))
+    return std::unique_ptr<Action>(
+        new CudaHostAction(std::move(HostAction), {}));
 
   // Collect all cuda_gpu_arch parameters, removing duplicates.
   SmallVector<const char *, 4> GpuArchList;
@@ -1259,20 +1312,22 @@ buildCudaActions(const Driver &D, const ToolChain &TC, DerivedArgList &Args,
 
   // Replicate inputs for each GPU architecture.
   Driver::InputList CudaDeviceInputs;
-  for (unsigned i = 0, e = GpuArchList.size(); i != e; ++i)
+  for (unsigned I = 0, E = GpuArchList.size(); I != E; ++I)
     CudaDeviceInputs.push_back(std::make_pair(types::TY_CUDA_DEVICE, InputArg));
 
   // Build actions for all device inputs.
+  assert(C.getCudaDeviceToolChain() &&
+         "Missing toolchain for device-side compilation.");
   ActionList CudaDeviceActions;
-  D.BuildActions(TC, Args, CudaDeviceInputs, CudaDeviceActions);
+  C.getDriver().BuildActions(C, *C.getCudaDeviceToolChain(), Args,
+                             CudaDeviceInputs, CudaDeviceActions);
   assert(GpuArchList.size() == CudaDeviceActions.size() &&
          "Failed to create actions for all devices");
 
   // Check whether any of device actions stopped before they could generate PTX.
   bool PartialCompilation = false;
-  bool DeviceOnlyCompilation = Args.hasArg(options::OPT_cuda_device_only);
-  for (unsigned i = 0, e = GpuArchList.size(); i != e; ++i) {
-    if (CudaDeviceActions[i]->getKind() != Action::BackendJobClass) {
+  for (unsigned I = 0, E = GpuArchList.size(); I != E; ++I) {
+    if (CudaDeviceActions[I]->getKind() != Action::BackendJobClass) {
       PartialCompilation = true;
       break;
     }
@@ -1280,6 +1335,7 @@ buildCudaActions(const Driver &D, const ToolChain &TC, DerivedArgList &Args,
 
   // Figure out what to do with device actions -- pass them as inputs to the
   // host action or run each of them independently.
+  bool DeviceOnlyCompilation = PartialCompilationArg != nullptr;
   if (PartialCompilation || DeviceOnlyCompilation) {
     // In case of partial or device-only compilation results of device actions
     // are not consumed by the host action device actions have to be added to
@@ -1288,35 +1344,37 @@ buildCudaActions(const Driver &D, const ToolChain &TC, DerivedArgList &Args,
     // -o is ambiguous if we have more than one top-level action.
     if (Args.hasArg(options::OPT_o) &&
         (!DeviceOnlyCompilation || GpuArchList.size() > 1)) {
-      D.Diag(clang::diag::err_drv_output_argument_with_multiple_files);
+      C.getDriver().Diag(
+          clang::diag::err_drv_output_argument_with_multiple_files);
       return nullptr;
     }
 
-    for (unsigned i = 0, e = GpuArchList.size(); i != e; ++i)
-      Actions.push_back(
-          new CudaDeviceAction(std::unique_ptr<Action>(CudaDeviceActions[i]),
-                               GpuArchList[i], /* AtTopLevel */ true));
+    for (unsigned I = 0, E = GpuArchList.size(); I != E; ++I)
+      Actions.push_back(new CudaDeviceAction(
+          std::unique_ptr<Action>(CudaDeviceActions[I]), GpuArchList[I],
+          /* AtTopLevel */ true));
     // Kill host action in case of device-only compilation.
     if (DeviceOnlyCompilation)
-      Current.reset(nullptr);
-    return Current;
-  } else {
-    // Outputs of device actions during complete CUDA compilation get created
-    // with AtTopLevel=false and become inputs for the host action.
-    ActionList DeviceActions;
-    for (unsigned i = 0, e = GpuArchList.size(); i != e; ++i)
-      DeviceActions.push_back(
-          new CudaDeviceAction(std::unique_ptr<Action>(CudaDeviceActions[i]),
-                               GpuArchList[i], /* AtTopLevel */ false));
-    // Return a new host action that incorporates original host action and all
-    // device actions.
-    return std::unique_ptr<Action>(
-        new CudaHostAction(std::move(Current), DeviceActions));
-  }
+      HostAction.reset(nullptr);
+    return HostAction;
+  }
+
+  // Outputs of device actions during complete CUDA compilation get created
+  // with AtTopLevel=false and become inputs for the host action.
+  ActionList DeviceActions;
+  for (unsigned I = 0, E = GpuArchList.size(); I != E; ++I)
+    DeviceActions.push_back(new CudaDeviceAction(
+        std::unique_ptr<Action>(CudaDeviceActions[I]), GpuArchList[I],
+        /* AtTopLevel */ false));
+  // Return a new host action that incorporates original host action and all
+  // device actions.
+  return std::unique_ptr<Action>(
+      new CudaHostAction(std::move(HostAction), DeviceActions));
 }
 
-void Driver::BuildActions(const ToolChain &TC, DerivedArgList &Args,
-                          const InputList &Inputs, ActionList &Actions) const {
+void Driver::BuildActions(Compilation &C, const ToolChain &TC,
+                          DerivedArgList &Args, const InputList &Inputs,
+                          ActionList &Actions) const {
   llvm::PrettyStackTraceString CrashInfo("Building compilation actions");
 
   if (!SuppressMissingInputWarning && Inputs.empty()) {
@@ -1373,9 +1431,9 @@ void Driver::BuildActions(const ToolChain &TC, DerivedArgList &Args,
   ActionList LinkerInputs;
 
   llvm::SmallVector<phases::ID, phases::MaxNumberOfPhases> PL;
-  for (unsigned i = 0, e = Inputs.size(); i != e; ++i) {
-    types::ID InputType = Inputs[i].first;
-    const Arg *InputArg = Inputs[i].second;
+  for (auto &I : Inputs) {
+    types::ID InputType = I.first;
+    const Arg *InputArg = I.second;
 
     PL.clear();
     types::getCompilationPhases(InputType, PL);
@@ -1412,24 +1470,12 @@ void Driver::BuildActions(const ToolChain &TC, DerivedArgList &Args,
       continue;
     }
 
-    phases::ID CudaInjectionPhase;
-    if (isSaveTempsEnabled()) {
-      // All phases are done independently, inject GPU blobs during compilation
-      // phase as that's where we generate glue code to init them.
-      CudaInjectionPhase = phases::Compile;
-    } else {
-      // Assumes that clang does everything up until linking phase, so we inject
-      // cuda device actions at the last step before linking. Otherwise CUDA
-      // host action forces preprocessor into a separate invocation.
-      if (FinalPhase == phases::Link) {
-        for (auto i = PL.begin(), e = PL.end(); i != e; ++i) {
-          auto next = i + 1;
-          if (next != e && *next == phases::Link)
-            CudaInjectionPhase = *i;
-        }
-      } else
-        CudaInjectionPhase = FinalPhase;
-    }
+    phases::ID CudaInjectionPhase = FinalPhase;
+    for (const auto &Phase : PL)
+      if (Phase <= FinalPhase && Phase == phases::Compile) {
+        CudaInjectionPhase = Phase;
+        break;
+      }
 
     // Build the pipeline for this file.
     std::unique_ptr<Action> Current(new InputAction(*InputArg, InputType));
@@ -1457,10 +1503,9 @@ void Driver::BuildActions(const ToolChain &TC, DerivedArgList &Args,
       // Otherwise construct the appropriate action.
       Current = ConstructPhaseAction(TC, Args, Phase, std::move(Current));
 
-      if (InputType == types::TY_CUDA && Phase == CudaInjectionPhase &&
-          !Args.hasArg(options::OPT_cuda_host_only)) {
-        Current = buildCudaActions(*this, TC, Args, InputArg, InputType,
-                                   std::move(Current), Actions);
+      if (InputType == types::TY_CUDA && Phase == CudaInjectionPhase) {
+        Current =
+            buildCudaActions(C, Args, InputArg, std::move(Current), Actions);
         if (!Current)
           break;
       }
@@ -1487,6 +1532,10 @@ void Driver::BuildActions(const ToolChain &TC, DerivedArgList &Args,
 
   // Claim ignored clang-cl options.
   Args.ClaimAllArgs(options::OPT_cl_ignored_Group);
+
+  // Claim --cuda-host-only arg which may be passed to non-CUDA
+  // compilations and should not trigger warnings there.
+  Args.ClaimAllArgs(options::OPT_cuda_host_only);
 }
 
 std::unique_ptr<Action>
@@ -1551,7 +1600,7 @@ Driver::ConstructPhaseAction(const ToolChain &TC, const ArgList &Args,
                                                types::TY_LLVM_BC);
   }
   case phases::Backend: {
-    if (IsUsingLTO(Args)) {
+    if (isUsingLTO()) {
       types::ID Output =
           Args.hasArg(options::OPT_S) ? types::TY_LTO_IR : types::TY_LTO_BC;
       return llvm::make_unique<BackendJobAction>(std::move(Input), Output);
@@ -1572,10 +1621,6 @@ Driver::ConstructPhaseAction(const ToolChain &TC, const ArgList &Args,
   llvm_unreachable("invalid phase in ConstructPhaseAction");
 }
 
-bool Driver::IsUsingLTO(const ArgList &Args) const {
-  return Args.hasFlag(options::OPT_flto, options::OPT_fno_lto, false);
-}
-
 void Driver::BuildJobs(Compilation &C) const {
   llvm::PrettyStackTraceString CrashInfo("Building compilation jobs");
 
@@ -1668,10 +1713,17 @@ void Driver::BuildJobs(Compilation &C) const {
   }
 }
 
-static const Tool *SelectToolForJob(Compilation &C, bool SaveTemps,
+// Returns a Tool for a given JobAction.  In case the action and its
+// predecessors can be combined, updates Inputs with the inputs of the
+// first combined action. If one of the collapsed actions is a
+// CudaHostAction, updates CollapsedCHA with the pointer to it so the
+// caller can deal with extra handling such action requires.
+static const Tool *selectToolForJob(Compilation &C, bool SaveTemps,
                                     const ToolChain *TC, const JobAction *JA,
-                                    const ActionList *&Inputs) {
+                                    const ActionList *&Inputs,
+                                    const CudaHostAction *&CollapsedCHA) {
   const Tool *ToolForJob = nullptr;
+  CollapsedCHA = nullptr;
 
   // See if we should look for a compiler with an integrated assembler. We match
   // bottom up, so what we are actually looking for is an assembler job with a
@@ -1688,13 +1740,19 @@ static const Tool *SelectToolForJob(Compilation &C, bool SaveTemps,
     // checking the backend tool, check if the tool for the CompileJob
     // has an integrated assembler.
     const ActionList *BackendInputs = &(*Inputs)[0]->getInputs();
-    JobAction *CompileJA = cast<CompileJobAction>(*BackendInputs->begin());
+    // Compile job may be wrapped in CudaHostAction, extract it if
+    // that's the case and update CollapsedCHA if we combine phases.
+    CudaHostAction *CHA = dyn_cast<CudaHostAction>(*BackendInputs->begin());
+    JobAction *CompileJA =
+        cast<CompileJobAction>(CHA ? *CHA->begin() : *BackendInputs->begin());
+    assert(CompileJA && "Backend job is not preceeded by compile job.");
     const Tool *Compiler = TC->SelectTool(*CompileJA);
     if (!Compiler)
       return nullptr;
     if (Compiler->hasIntegratedAssembler()) {
-      Inputs = &(*BackendInputs)[0]->getInputs();
+      Inputs = &CompileJA->getInputs();
       ToolForJob = Compiler;
+      CollapsedCHA = CHA;
     }
   }
 
@@ -1704,19 +1762,19 @@ static const Tool *SelectToolForJob(Compilation &C, bool SaveTemps,
   if (isa<BackendJobAction>(JA)) {
     // Check if the compiler supports emitting LLVM IR.
     assert(Inputs->size() == 1);
-    JobAction *CompileJA;
-    // Extract real host action, if it's a CudaHostAction.
-    if (CudaHostAction *CudaHA = dyn_cast<CudaHostAction>(*Inputs->begin()))
-      CompileJA = cast<CompileJobAction>(*CudaHA->begin());
-    else
-      CompileJA = cast<CompileJobAction>(*Inputs->begin());
-
+    // Compile job may be wrapped in CudaHostAction, extract it if
+    // that's the case and update CollapsedCHA if we combine phases.
+    CudaHostAction *CHA = dyn_cast<CudaHostAction>(*Inputs->begin());
+    JobAction *CompileJA =
+        cast<CompileJobAction>(CHA ? *CHA->begin() : *Inputs->begin());
+    assert(CompileJA && "Backend job is not preceeded by compile job.");
     const Tool *Compiler = TC->SelectTool(*CompileJA);
     if (!Compiler)
       return nullptr;
     if (!Compiler->canEmitIR() || !SaveTemps) {
-      Inputs = &(*Inputs)[0]->getInputs();
+      Inputs = &CompileJA->getInputs();
       ToolForJob = Compiler;
+      CollapsedCHA = CHA;
     }
   }
 
@@ -1749,7 +1807,7 @@ void Driver::BuildJobsForAction(Compilation &C, const Action *A,
     InputInfo II;
     // Append outputs of device jobs to the input list.
     for (const Action *DA : CHA->getDeviceActions()) {
-      BuildJobsForAction(C, DA, TC, "", AtTopLevel,
+      BuildJobsForAction(C, DA, TC, nullptr, AtTopLevel,
                          /*MultipleArchs*/ false, LinkingOutput, II);
       CudaDeviceInputInfos.push_back(II);
     }
@@ -1789,13 +1847,10 @@ void Driver::BuildJobsForAction(Compilation &C, const Action *A,
   }
 
   if (const CudaDeviceAction *CDA = dyn_cast<CudaDeviceAction>(A)) {
-    // Figure out which NVPTX triple to use for device-side compilation based on
-    // whether host is 64-bit.
-    llvm::Triple DeviceTriple(C.getDefaultToolChain().getTriple().isArch64Bit()
-                                  ? "nvptx64-nvidia-cuda"
-                                  : "nvptx-nvidia-cuda");
-    BuildJobsForAction(C, *CDA->begin(),
-                       &getToolChain(C.getArgs(), DeviceTriple),
+    // Initial processing of CudaDeviceAction carries host params.
+    // Call BuildJobsForAction() again, now with correct device parameters.
+    assert(CDA->getGpuArchName() && "No GPU name in device action.");
+    BuildJobsForAction(C, *CDA->begin(), C.getCudaDeviceToolChain(),
                        CDA->getGpuArchName(), CDA->isAtTopLevel(),
                        /*MultipleArchs*/ true, LinkingOutput, Result);
     return;
@@ -1804,10 +1859,23 @@ void Driver::BuildJobsForAction(Compilation &C, const Action *A,
   const ActionList *Inputs = &A->getInputs();
 
   const JobAction *JA = cast<JobAction>(A);
-  const Tool *T = SelectToolForJob(C, isSaveTempsEnabled(), TC, JA, Inputs);
+  const CudaHostAction *CollapsedCHA = nullptr;
+  const Tool *T =
+      selectToolForJob(C, isSaveTempsEnabled(), TC, JA, Inputs, CollapsedCHA);
   if (!T)
     return;
 
+  // If we've collapsed action list that contained CudaHostAction we
+  // need to build jobs for device-side inputs it may have held.
+  if (CollapsedCHA) {
+    InputInfo II;
+    for (const Action *DA : CollapsedCHA->getDeviceActions()) {
+      BuildJobsForAction(C, DA, TC, "", AtTopLevel,
+                         /*MultipleArchs*/ false, LinkingOutput, II);
+      CudaDeviceInputInfos.push_back(II);
+    }
+  }
+
   // Only use pipes when there is exactly one input.
   InputInfoList InputInfos;
   for (const Action *Input : *Inputs) {
@@ -2091,6 +2159,11 @@ void Driver::generatePrefixedToolNames(
   // FIXME: Needs a better variable than DefaultTargetTriple
   Names.emplace_back(DefaultTargetTriple + "-" + Tool);
   Names.emplace_back(Tool);
+
+  // Allow the discovery of tools prefixed with LLVM's default target triple.
+  std::string LLVMDefaultTargetTriple = llvm::sys::getDefaultTargetTriple();
+  if (LLVMDefaultTargetTriple != DefaultTargetTriple)
+    Names.emplace_back(LLVMDefaultTargetTriple + "-" + Tool);
 }
 
 static bool ScanDirForExecutable(SmallString<128> &Dir,
@@ -2163,6 +2236,8 @@ const ToolChain &Driver::getToolChain(const ArgList &Args,
     case llvm::Triple::Darwin:
     case llvm::Triple::MacOSX:
     case llvm::Triple::IOS:
+    case llvm::Triple::TvOS:
+    case llvm::Triple::WatchOS:
       TC = new toolchains::DarwinClang(*this, Target, Args);
       break;
     case llvm::Triple::DragonFly:
@@ -2185,16 +2260,22 @@ const ToolChain &Driver::getToolChain(const ArgList &Args,
       break;
     case llvm::Triple::Linux:
       if (Target.getArch() == llvm::Triple::hexagon)
-        TC = new toolchains::Hexagon_TC(*this, Target, Args);
+        TC = new toolchains::HexagonToolChain(*this, Target, Args);
+      else if ((Target.getVendor() == llvm::Triple::MipsTechnologies) &&
+               !Target.hasEnvironment())
+        TC = new toolchains::MipsLLVMToolChain(*this, Target, Args);
       else
         TC = new toolchains::Linux(*this, Target, Args);
       break;
     case llvm::Triple::NaCl:
-      TC = new toolchains::NaCl_TC(*this, Target, Args);
+      TC = new toolchains::NaClToolChain(*this, Target, Args);
       break;
     case llvm::Triple::Solaris:
       TC = new toolchains::Solaris(*this, Target, Args);
       break;
+    case llvm::Triple::AMDHSA:
+      TC = new toolchains::AMDGPUToolChain(*this, Target, Args);
+      break;
     case llvm::Triple::Win32:
       switch (Target.getEnvironment()) {
       default:
@@ -2220,24 +2301,36 @@ const ToolChain &Driver::getToolChain(const ArgList &Args,
     case llvm::Triple::CUDA:
       TC = new toolchains::CudaToolChain(*this, Target, Args);
       break;
+    case llvm::Triple::PS4:
+      TC = new toolchains::PS4CPU(*this, Target, Args);
+      break;
     default:
       // Of these targets, Hexagon is the only one that might have
       // an OS of Linux, in which case it got handled above already.
-      if (Target.getArchName() == "tce")
+      switch (Target.getArch()) {
+      case llvm::Triple::tce:
         TC = new toolchains::TCEToolChain(*this, Target, Args);
-      else if (Target.getArch() == llvm::Triple::hexagon)
-        TC = new toolchains::Hexagon_TC(*this, Target, Args);
-      else if (Target.getArch() == llvm::Triple::xcore)
-        TC = new toolchains::XCore(*this, Target, Args);
-      else if (Target.getArch() == llvm::Triple::shave)
-        TC = new toolchains::SHAVEToolChain(*this, Target, Args);
-      else if (Target.isOSBinFormatELF())
-        TC = new toolchains::Generic_ELF(*this, Target, Args);
-      else if (Target.isOSBinFormatMachO())
-        TC = new toolchains::MachO(*this, Target, Args);
-      else
-        TC = new toolchains::Generic_GCC(*this, Target, Args);
-      break;
+        break;
+      case llvm::Triple::hexagon:
+        TC = new toolchains::HexagonToolChain(*this, Target, Args);
+        break;
+      case llvm::Triple::xcore:
+        TC = new toolchains::XCoreToolChain(*this, Target, Args);
+        break;
+      case llvm::Triple::wasm32:
+      case llvm::Triple::wasm64:
+        TC = new toolchains::WebAssembly(*this, Target, Args);
+        break;
+      default:
+        if (Target.getVendor() == llvm::Triple::Myriad)
+          TC = new toolchains::MyriadToolChain(*this, Target, Args);
+        else if (Target.isOSBinFormatELF())
+          TC = new toolchains::Generic_ELF(*this, Target, Args);
+        else if (Target.isOSBinFormatMachO())
+          TC = new toolchains::MachO(*this, Target, Args);
+        else
+          TC = new toolchains::Generic_GCC(*this, Target, Args);
+      }
     }
   }
   return *TC;