aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/LTO
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/LTO')
-rw-r--r--llvm/lib/LTO/Caching.cpp1
-rw-r--r--llvm/lib/LTO/LTO.cpp75
-rw-r--r--llvm/lib/LTO/LTOBackend.cpp240
-rw-r--r--llvm/lib/LTO/LTOCodeGenerator.cpp54
-rw-r--r--llvm/lib/LTO/LTOModule.cpp3
-rw-r--r--llvm/lib/LTO/ThinLTOCodeGenerator.cpp68
6 files changed, 322 insertions, 119 deletions
diff --git a/llvm/lib/LTO/Caching.cpp b/llvm/lib/LTO/Caching.cpp
index 46cac3fb1830..75a89e729f43 100644
--- a/llvm/lib/LTO/Caching.cpp
+++ b/llvm/lib/LTO/Caching.cpp
@@ -13,6 +13,7 @@
#include "llvm/LTO/Caching.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Support/Errc.h"
+#include "llvm/Support/FileSystem.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/Process.h"
diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp
index 6e1e3998e490..9103d11059e0 100644
--- a/llvm/lib/LTO/LTO.cpp
+++ b/llvm/lib/LTO/LTO.cpp
@@ -798,7 +798,7 @@ Error LTO::linkRegularLTO(RegularLTOState::AddedModule Mod,
for (GlobalValue *GV : Mod.Keep) {
if (LivenessFromIndex && !ThinLTO.CombinedIndex.isGUIDLive(GV->getGUID())) {
if (Function *F = dyn_cast<Function>(GV)) {
- OptimizationRemarkEmitter ORE(F);
+ OptimizationRemarkEmitter ORE(F, nullptr);
ORE.emit(OptimizationRemark(DEBUG_TYPE, "deadfunction", F)
<< ore::NV("Function", F)
<< " not added to the combined module ");
@@ -983,7 +983,8 @@ Error LTO::runRegularLTO(AddStreamFn AddStream) {
// Setup optimization remarks.
auto DiagFileOrErr = lto::setupLLVMOptimizationRemarks(
RegularLTO.CombinedModule->getContext(), Conf.RemarksFilename,
- Conf.RemarksPasses, Conf.RemarksFormat, Conf.RemarksWithHotness);
+ Conf.RemarksPasses, Conf.RemarksFormat, Conf.RemarksWithHotness,
+ Conf.RemarksHotnessThreshold);
if (!DiagFileOrErr)
return DiagFileOrErr.takeError();
@@ -1107,6 +1108,7 @@ public:
const std::map<GlobalValue::GUID, GlobalValue::LinkageTypes> &ResolvedODR,
MapVector<StringRef, BitcodeModule> &ModuleMap) = 0;
virtual Error wait() = 0;
+ virtual unsigned getThreadCount() = 0;
};
namespace {
@@ -1221,6 +1223,10 @@ public:
else
return Error::success();
}
+
+ unsigned getThreadCount() override {
+ return BackendThreadPool.getThreadCount();
+ }
};
} // end anonymous namespace
@@ -1309,6 +1315,10 @@ public:
}
Error wait() override { return Error::success(); }
+
+ // WriteIndexesThinBackend should always return 1 to prevent module
+ // re-ordering and avoid non-determinism in the final link.
+ unsigned getThreadCount() override { return 1; }
};
} // end anonymous namespace
@@ -1443,23 +1453,44 @@ Error LTO::runThinLTO(AddStreamFn AddStream, NativeObjectCache Cache,
auto &ModuleMap =
ThinLTO.ModulesToCompile ? *ThinLTO.ModulesToCompile : ThinLTO.ModuleMap;
- // Tasks 0 through ParallelCodeGenParallelismLevel-1 are reserved for combined
- // module and parallel code generation partitions.
- unsigned Task = RegularLTO.ParallelCodeGenParallelismLevel;
- for (auto &Mod : ModuleMap) {
- if (Error E = BackendProc->start(Task, Mod.second, ImportLists[Mod.first],
- ExportLists[Mod.first],
- ResolvedODR[Mod.first], ThinLTO.ModuleMap))
- return E;
- ++Task;
- }
+ auto ProcessOneModule = [&](int I) -> Error {
+ auto &Mod = *(ModuleMap.begin() + I);
+ // Tasks 0 through ParallelCodeGenParallelismLevel-1 are reserved for
+ // combined module and parallel code generation partitions.
+ return BackendProc->start(RegularLTO.ParallelCodeGenParallelismLevel + I,
+ Mod.second, ImportLists[Mod.first],
+ ExportLists[Mod.first], ResolvedODR[Mod.first],
+ ThinLTO.ModuleMap);
+ };
+ if (BackendProc->getThreadCount() == 1) {
+ // Process the modules in the order they were provided on the command-line.
+ // It is important for this codepath to be used for WriteIndexesThinBackend,
+ // to ensure the emitted LinkedObjectsFile lists ThinLTO objects in the same
+ // order as the inputs, which otherwise would affect the final link order.
+ for (int I = 0, E = ModuleMap.size(); I != E; ++I)
+ if (Error E = ProcessOneModule(I))
+ return E;
+ } else {
+ // When executing in parallel, process largest bitsize modules first to
+ // improve parallelism, and avoid starving the thread pool near the end.
+ // This saves about 15 sec on a 36-core machine while link `clang.exe` (out
+ // of 100 sec).
+ std::vector<BitcodeModule *> ModulesVec;
+ ModulesVec.reserve(ModuleMap.size());
+ for (auto &Mod : ModuleMap)
+ ModulesVec.push_back(&Mod.second);
+ for (int I : generateModulesOrdering(ModulesVec))
+ if (Error E = ProcessOneModule(I))
+ return E;
+ }
return BackendProc->wait();
}
Expected<std::unique_ptr<ToolOutputFile>> lto::setupLLVMOptimizationRemarks(
LLVMContext &Context, StringRef RemarksFilename, StringRef RemarksPasses,
- StringRef RemarksFormat, bool RemarksWithHotness, int Count) {
+ StringRef RemarksFormat, bool RemarksWithHotness,
+ Optional<uint64_t> RemarksHotnessThreshold, int Count) {
std::string Filename = std::string(RemarksFilename);
// For ThinLTO, file.opt.<format> becomes
// file.opt.<format>.thin.<num>.<format>.
@@ -1469,7 +1500,8 @@ Expected<std::unique_ptr<ToolOutputFile>> lto::setupLLVMOptimizationRemarks(
.str();
auto ResultOrErr = llvm::setupLLVMOptimizationRemarks(
- Context, Filename, RemarksPasses, RemarksFormat, RemarksWithHotness);
+ Context, Filename, RemarksPasses, RemarksFormat, RemarksWithHotness,
+ RemarksHotnessThreshold);
if (Error E = ResultOrErr.takeError())
return std::move(E);
@@ -1495,3 +1527,18 @@ lto::setupStatsFile(StringRef StatsFilename) {
StatsFile->keep();
return std::move(StatsFile);
}
+
+// Compute the ordering we will process the inputs: the rough heuristic here
+// is to sort them per size so that the largest module get schedule as soon as
+// possible. This is purely a compile-time optimization.
+std::vector<int> lto::generateModulesOrdering(ArrayRef<BitcodeModule *> R) {
+ std::vector<int> ModulesOrdering;
+ ModulesOrdering.resize(R.size());
+ std::iota(ModulesOrdering.begin(), ModulesOrdering.end(), 0);
+ llvm::sort(ModulesOrdering, [&](int LeftIndex, int RightIndex) {
+ auto LSize = R[LeftIndex]->getBuffer().size();
+ auto RSize = R[RightIndex]->getBuffer().size();
+ return LSize > RSize;
+ });
+ return ModulesOrdering;
+}
diff --git a/llvm/lib/LTO/LTOBackend.cpp b/llvm/lib/LTO/LTOBackend.cpp
index 0c395f9bbf28..1796d6ba60cc 100644
--- a/llvm/lib/LTO/LTOBackend.cpp
+++ b/llvm/lib/LTO/LTOBackend.cpp
@@ -50,6 +50,30 @@
using namespace llvm;
using namespace lto;
+#define DEBUG_TYPE "lto-backend"
+
+enum class LTOBitcodeEmbedding {
+ DoNotEmbed = 0,
+ EmbedOptimized = 1,
+ EmbedPostMergePreOptimized = 2
+};
+
+static cl::opt<LTOBitcodeEmbedding> EmbedBitcode(
+ "lto-embed-bitcode", cl::init(LTOBitcodeEmbedding::DoNotEmbed),
+ cl::values(clEnumValN(LTOBitcodeEmbedding::DoNotEmbed, "none",
+ "Do not embed"),
+ clEnumValN(LTOBitcodeEmbedding::EmbedOptimized, "optimized",
+ "Embed after all optimization passes"),
+ clEnumValN(LTOBitcodeEmbedding::EmbedPostMergePreOptimized,
+ "post-merge-pre-opt",
+ "Embed post merge, but before optimizations")),
+ cl::desc("Embed LLVM bitcode in object files produced by LTO"));
+
+static cl::opt<bool> ThinLTOAssumeMerged(
+ "thinlto-assume-merged", cl::init(false),
+ cl::desc("Assume the input has already undergone ThinLTO function "
+ "importing and the other pre-optimization pipeline changes."));
+
LLVM_ATTRIBUTE_NORETURN static void reportOpenError(StringRef Path, Twine Msg) {
errs() << "failed to open " << Path << ": " << Msg << '\n';
errs().flush();
@@ -152,9 +176,7 @@ static void RegisterPassPlugins(ArrayRef<std::string> PassPlugins,
}
}
-namespace {
-
-std::unique_ptr<TargetMachine>
+static std::unique_ptr<TargetMachine>
createTargetMachine(const Config &Conf, const Target *TheTarget, Module &M) {
StringRef TheTriple = M.getTargetTriple();
SubtargetFeatures Features;
@@ -175,9 +197,11 @@ createTargetMachine(const Config &Conf, const Target *TheTarget, Module &M) {
else
CodeModel = M.getCodeModel();
- return std::unique_ptr<TargetMachine>(TheTarget->createTargetMachine(
+ std::unique_ptr<TargetMachine> TM(TheTarget->createTargetMachine(
TheTriple, Conf.CPU, Features.getString(), Conf.Options, RelocModel,
CodeModel, Conf.CGOptLevel));
+ assert(TM && "Failed to create target machine");
+ return TM;
}
static void runNewPMPasses(const Config &Conf, Module &Mod, TargetMachine *TM,
@@ -197,9 +221,9 @@ static void runNewPMPasses(const Config &Conf, Module &Mod, TargetMachine *TM,
}
PassInstrumentationCallbacks PIC;
- StandardInstrumentations SI;
+ StandardInstrumentations SI(Conf.DebugPassManager);
SI.registerCallbacks(PIC);
- PassBuilder PB(TM, Conf.PTO, PGOOpt, &PIC);
+ PassBuilder PB(Conf.DebugPassManager, TM, Conf.PTO, PGOOpt, &PIC);
AAManager AA;
// Parse a custom AA pipeline if asked to.
@@ -213,6 +237,12 @@ static void runNewPMPasses(const Config &Conf, Module &Mod, TargetMachine *TM,
CGSCCAnalysisManager CGAM(Conf.DebugPassManager);
ModuleAnalysisManager MAM(Conf.DebugPassManager);
+ std::unique_ptr<TargetLibraryInfoImpl> TLII(
+ new TargetLibraryInfoImpl(Triple(TM->getTargetTriple())));
+ if (Conf.Freestanding)
+ TLII->disableAllFunctions();
+ FAM.registerPass([&] { return TargetLibraryAnalysis(*TLII); });
+
// Register the AA manager first so that our version is the one used.
FAM.registerPass([&] { return std::move(AA); });
@@ -224,7 +254,9 @@ static void runNewPMPasses(const Config &Conf, Module &Mod, TargetMachine *TM,
PB.crossRegisterProxies(LAM, FAM, CGAM, MAM);
ModulePassManager MPM(Conf.DebugPassManager);
- // FIXME (davide): verify the input.
+
+ if (!Conf.DisableVerify)
+ MPM.addPass(VerifierPass());
PassBuilder::OptimizationLevel OL;
@@ -246,20 +278,21 @@ static void runNewPMPasses(const Config &Conf, Module &Mod, TargetMachine *TM,
}
if (IsThinLTO)
- MPM = PB.buildThinLTODefaultPipeline(OL, Conf.DebugPassManager,
- ImportSummary);
+ MPM.addPass(PB.buildThinLTODefaultPipeline(OL, ImportSummary));
else
- MPM = PB.buildLTODefaultPipeline(OL, Conf.DebugPassManager, ExportSummary);
- MPM.run(Mod, MAM);
+ MPM.addPass(PB.buildLTODefaultPipeline(OL, ExportSummary));
- // FIXME (davide): verify the output.
+ if (!Conf.DisableVerify)
+ MPM.addPass(VerifierPass());
+
+ MPM.run(Mod, MAM);
}
static void runNewPMCustomPasses(const Config &Conf, Module &Mod,
TargetMachine *TM, std::string PipelineDesc,
std::string AAPipelineDesc,
bool DisableVerify) {
- PassBuilder PB(TM);
+ PassBuilder PB(Conf.DebugPassManager, TM);
AAManager AA;
// Parse a custom AA pipeline if asked to.
@@ -275,6 +308,12 @@ static void runNewPMCustomPasses(const Config &Conf, Module &Mod,
CGSCCAnalysisManager CGAM;
ModuleAnalysisManager MAM;
+ std::unique_ptr<TargetLibraryInfoImpl> TLII(
+ new TargetLibraryInfoImpl(Triple(TM->getTargetTriple())));
+ if (Conf.Freestanding)
+ TLII->disableAllFunctions();
+ FAM.registerPass([&] { return TargetLibraryAnalysis(*TLII); });
+
// Register the AA manager first so that our version is the one used.
FAM.registerPass([&] { return std::move(AA); });
@@ -308,6 +347,8 @@ static void runOldPMPasses(const Config &Conf, Module &Mod, TargetMachine *TM,
PassManagerBuilder PMB;
PMB.LibraryInfo = new TargetLibraryInfoImpl(Triple(TM->getTargetTriple()));
+ if (Conf.Freestanding)
+ PMB.LibraryInfo->disableAllFunctions();
PMB.Inliner = createFunctionInliningPass();
PMB.ExportSummary = ExportSummary;
PMB.ImportSummary = ImportSummary;
@@ -331,9 +372,27 @@ static void runOldPMPasses(const Config &Conf, Module &Mod, TargetMachine *TM,
passes.run(Mod);
}
-bool opt(const Config &Conf, TargetMachine *TM, unsigned Task, Module &Mod,
- bool IsThinLTO, ModuleSummaryIndex *ExportSummary,
- const ModuleSummaryIndex *ImportSummary) {
+bool lto::opt(const Config &Conf, TargetMachine *TM, unsigned Task, Module &Mod,
+ bool IsThinLTO, ModuleSummaryIndex *ExportSummary,
+ const ModuleSummaryIndex *ImportSummary,
+ const std::vector<uint8_t> &CmdArgs) {
+ if (EmbedBitcode == LTOBitcodeEmbedding::EmbedPostMergePreOptimized) {
+ // FIXME: the motivation for capturing post-merge bitcode and command line
+ // is replicating the compilation environment from bitcode, without needing
+ // to understand the dependencies (the functions to be imported). This
+ // assumes a clang - based invocation, case in which we have the command
+ // line.
+ // It's not very clear how the above motivation would map in the
+ // linker-based case, so we currently don't plumb the command line args in
+ // that case.
+ if (CmdArgs.empty())
+ LLVM_DEBUG(
+ dbgs() << "Post-(Thin)LTO merge bitcode embedding was requested, but "
+ "command line arguments are not available");
+ llvm::EmbedBitcodeInModule(Mod, llvm::MemoryBufferRef(),
+ /*EmbedBitcode*/ true, /*EmbedCmdline*/ true,
+ /*Cmdline*/ CmdArgs);
+ }
// FIXME: Plumb the combined index into the new pass manager.
if (!Conf.OptPipeline.empty())
runNewPMCustomPasses(Conf, Mod, TM, Conf.OptPipeline, Conf.AAPipeline,
@@ -346,30 +405,17 @@ bool opt(const Config &Conf, TargetMachine *TM, unsigned Task, Module &Mod,
return !Conf.PostOptModuleHook || Conf.PostOptModuleHook(Task, Mod);
}
-static cl::opt<bool> EmbedBitcode(
- "lto-embed-bitcode", cl::init(false),
- cl::desc("Embed LLVM bitcode in object files produced by LTO"));
-
-static void EmitBitcodeSection(Module &M, const Config &Conf) {
- if (!EmbedBitcode)
- return;
- SmallVector<char, 0> Buffer;
- raw_svector_ostream OS(Buffer);
- WriteBitcodeToFile(M, OS);
-
- std::unique_ptr<MemoryBuffer> Buf(
- new SmallVectorMemoryBuffer(std::move(Buffer)));
- llvm::EmbedBitcodeInModule(M, Buf->getMemBufferRef(), /*EmbedBitcode*/ true,
- /*EmbedMarker*/ false, /*CmdArgs*/ nullptr);
-}
-
-void codegen(const Config &Conf, TargetMachine *TM, AddStreamFn AddStream,
- unsigned Task, Module &Mod,
- const ModuleSummaryIndex &CombinedIndex) {
+static void codegen(const Config &Conf, TargetMachine *TM,
+ AddStreamFn AddStream, unsigned Task, Module &Mod,
+ const ModuleSummaryIndex &CombinedIndex) {
if (Conf.PreCodeGenModuleHook && !Conf.PreCodeGenModuleHook(Task, Mod))
return;
- EmitBitcodeSection(Mod, Conf);
+ if (EmbedBitcode == LTOBitcodeEmbedding::EmbedOptimized)
+ llvm::EmbedBitcodeInModule(Mod, llvm::MemoryBufferRef(),
+ /*EmbedBitcode*/ true,
+ /*EmbedCmdline*/ false,
+ /*CmdArgs*/ std::vector<uint8_t>());
std::unique_ptr<ToolOutputFile> DwoOut;
SmallString<1024> DwoFile(Conf.SplitDwarfOutput);
@@ -396,6 +442,8 @@ void codegen(const Config &Conf, TargetMachine *TM, AddStreamFn AddStream,
legacy::PassManager CodeGenPasses;
CodeGenPasses.add(
createImmutableModuleSummaryIndexWrapperPass(&CombinedIndex));
+ if (Conf.PreCodeGenPassesHook)
+ Conf.PreCodeGenPassesHook(CodeGenPasses);
if (TM->addPassesToEmitFile(CodeGenPasses, *Stream->OS,
DwoOut ? &DwoOut->os() : nullptr,
Conf.CGFileType))
@@ -406,10 +454,11 @@ void codegen(const Config &Conf, TargetMachine *TM, AddStreamFn AddStream,
DwoOut->keep();
}
-void splitCodeGen(const Config &C, TargetMachine *TM, AddStreamFn AddStream,
- unsigned ParallelCodeGenParallelismLevel,
- std::unique_ptr<Module> Mod,
- const ModuleSummaryIndex &CombinedIndex) {
+static void splitCodeGen(const Config &C, TargetMachine *TM,
+ AddStreamFn AddStream,
+ unsigned ParallelCodeGenParallelismLevel,
+ std::unique_ptr<Module> Mod,
+ const ModuleSummaryIndex &CombinedIndex) {
ThreadPool CodegenThreadPool(
heavyweight_hardware_concurrency(ParallelCodeGenParallelismLevel));
unsigned ThreadCount = 0;
@@ -457,7 +506,8 @@ void splitCodeGen(const Config &C, TargetMachine *TM, AddStreamFn AddStream,
CodegenThreadPool.wait();
}
-Expected<const Target *> initAndLookupTarget(const Config &C, Module &Mod) {
+static Expected<const Target *> initAndLookupTarget(const Config &C,
+ Module &Mod) {
if (!C.OverrideTriple.empty())
Mod.setTargetTriple(C.OverrideTriple);
else if (Mod.getTargetTriple().empty())
@@ -469,7 +519,6 @@ Expected<const Target *> initAndLookupTarget(const Config &C, Module &Mod) {
return make_error<StringError>(Msg, inconvertibleErrorCode());
return T;
}
-}
Error lto::finalizeOptimizationRemarks(
std::unique_ptr<ToolOutputFile> DiagOutputFile) {
@@ -494,7 +543,8 @@ Error lto::backend(const Config &C, AddStreamFn AddStream,
if (!C.CodeGenOnly) {
if (!opt(C, TM.get(), 0, *Mod, /*IsThinLTO=*/false,
- /*ExportSummary=*/&CombinedIndex, /*ImportSummary=*/nullptr))
+ /*ExportSummary=*/&CombinedIndex, /*ImportSummary=*/nullptr,
+ /*CmdArgs*/ std::vector<uint8_t>()))
return Error::success();
}
@@ -532,7 +582,8 @@ Error lto::thinBackend(const Config &Conf, unsigned Task, AddStreamFn AddStream,
Module &Mod, const ModuleSummaryIndex &CombinedIndex,
const FunctionImporter::ImportMapTy &ImportList,
const GVSummaryMapTy &DefinedGlobals,
- MapVector<StringRef, BitcodeModule> &ModuleMap) {
+ MapVector<StringRef, BitcodeModule> &ModuleMap,
+ const std::vector<uint8_t> &CmdArgs) {
Expected<const Target *> TOrErr = initAndLookupTarget(Conf, Mod);
if (!TOrErr)
return TOrErr.takeError();
@@ -542,7 +593,8 @@ Error lto::thinBackend(const Config &Conf, unsigned Task, AddStreamFn AddStream,
// Setup optimization remarks.
auto DiagFileOrErr = lto::setupLLVMOptimizationRemarks(
Mod.getContext(), Conf.RemarksFilename, Conf.RemarksPasses,
- Conf.RemarksFormat, Conf.RemarksWithHotness, Task);
+ Conf.RemarksFormat, Conf.RemarksWithHotness, Conf.RemarksHotnessThreshold,
+ Task);
if (!DiagFileOrErr)
return DiagFileOrErr.takeError();
auto DiagnosticOutputFile = std::move(*DiagFileOrErr);
@@ -559,6 +611,21 @@ Error lto::thinBackend(const Config &Conf, unsigned Task, AddStreamFn AddStream,
if (Conf.PreOptModuleHook && !Conf.PreOptModuleHook(Task, Mod))
return finalizeOptimizationRemarks(std::move(DiagnosticOutputFile));
+ auto OptimizeAndCodegen =
+ [&](Module &Mod, TargetMachine *TM,
+ std::unique_ptr<ToolOutputFile> DiagnosticOutputFile) {
+ if (!opt(Conf, TM, Task, Mod, /*IsThinLTO=*/true,
+ /*ExportSummary=*/nullptr, /*ImportSummary=*/&CombinedIndex,
+ CmdArgs))
+ return finalizeOptimizationRemarks(std::move(DiagnosticOutputFile));
+
+ codegen(Conf, TM, AddStream, Task, Mod, CombinedIndex);
+ return finalizeOptimizationRemarks(std::move(DiagnosticOutputFile));
+ };
+
+ if (ThinLTOAssumeMerged)
+ return OptimizeAndCodegen(Mod, TM.get(), std::move(DiagnosticOutputFile));
+
// When linking an ELF shared object, dso_local should be dropped. We
// conservatively do this for -fpic.
bool ClearDSOLocalOnDeclarations =
@@ -599,10 +666,81 @@ Error lto::thinBackend(const Config &Conf, unsigned Task, AddStreamFn AddStream,
if (Conf.PostImportModuleHook && !Conf.PostImportModuleHook(Task, Mod))
return finalizeOptimizationRemarks(std::move(DiagnosticOutputFile));
- if (!opt(Conf, TM.get(), Task, Mod, /*IsThinLTO=*/true,
- /*ExportSummary=*/nullptr, /*ImportSummary=*/&CombinedIndex))
- return finalizeOptimizationRemarks(std::move(DiagnosticOutputFile));
+ return OptimizeAndCodegen(Mod, TM.get(), std::move(DiagnosticOutputFile));
+}
+
+BitcodeModule *lto::findThinLTOModule(MutableArrayRef<BitcodeModule> BMs) {
+ if (ThinLTOAssumeMerged && BMs.size() == 1)
+ return BMs.begin();
- codegen(Conf, TM.get(), AddStream, Task, Mod, CombinedIndex);
- return finalizeOptimizationRemarks(std::move(DiagnosticOutputFile));
+ for (BitcodeModule &BM : BMs) {
+ Expected<BitcodeLTOInfo> LTOInfo = BM.getLTOInfo();
+ if (LTOInfo && LTOInfo->IsThinLTO)
+ return &BM;
+ }
+ return nullptr;
+}
+
+Expected<BitcodeModule> lto::findThinLTOModule(MemoryBufferRef MBRef) {
+ Expected<std::vector<BitcodeModule>> BMsOrErr = getBitcodeModuleList(MBRef);
+ if (!BMsOrErr)
+ return BMsOrErr.takeError();
+
+ // The bitcode file may contain multiple modules, we want the one that is
+ // marked as being the ThinLTO module.
+ if (const BitcodeModule *Bm = lto::findThinLTOModule(*BMsOrErr))
+ return *Bm;
+
+ return make_error<StringError>("Could not find module summary",
+ inconvertibleErrorCode());
+}
+
+bool lto::loadReferencedModules(
+ const Module &M, const ModuleSummaryIndex &CombinedIndex,
+ FunctionImporter::ImportMapTy &ImportList,
+ MapVector<llvm::StringRef, llvm::BitcodeModule> &ModuleMap,
+ std::vector<std::unique_ptr<llvm::MemoryBuffer>>
+ &OwnedImportsLifetimeManager) {
+ if (ThinLTOAssumeMerged)
+ return true;
+ // We can simply import the values mentioned in the combined index, since
+ // we should only invoke this using the individual indexes written out
+ // via a WriteIndexesThinBackend.
+ for (const auto &GlobalList : CombinedIndex) {
+ // Ignore entries for undefined references.
+ if (GlobalList.second.SummaryList.empty())
+ continue;
+
+ auto GUID = GlobalList.first;
+ for (const auto &Summary : GlobalList.second.SummaryList) {
+ // Skip the summaries for the importing module. These are included to
+ // e.g. record required linkage changes.
+ if (Summary->modulePath() == M.getModuleIdentifier())
+ continue;
+ // Add an entry to provoke importing by thinBackend.
+ ImportList[Summary->modulePath()].insert(GUID);
+ }
+ }
+
+ for (auto &I : ImportList) {
+ ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> MBOrErr =
+ llvm::MemoryBuffer::getFile(I.first());
+ if (!MBOrErr) {
+ errs() << "Error loading imported file '" << I.first()
+ << "': " << MBOrErr.getError().message() << "\n";
+ return false;
+ }
+
+ Expected<BitcodeModule> BMOrErr = findThinLTOModule(**MBOrErr);
+ if (!BMOrErr) {
+ handleAllErrors(BMOrErr.takeError(), [&](ErrorInfoBase &EIB) {
+ errs() << "Error loading imported file '" << I.first()
+ << "': " << EIB.message() << '\n';
+ });
+ return false;
+ }
+ ModuleMap.insert({I.first(), *BMOrErr});
+ OwnedImportsLifetimeManager.push_back(std::move(*MBOrErr));
+ }
+ return true;
}
diff --git a/llvm/lib/LTO/LTOCodeGenerator.cpp b/llvm/lib/LTO/LTOCodeGenerator.cpp
index 25ab1404b4e1..027e197e1e0d 100644
--- a/llvm/lib/LTO/LTOCodeGenerator.cpp
+++ b/llvm/lib/LTO/LTOCodeGenerator.cpp
@@ -43,6 +43,7 @@
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/SubtargetFeature.h"
+#include "llvm/Remarks/HotnessThresholdParser.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/Host.h"
@@ -87,6 +88,14 @@ cl::opt<bool> RemarksWithHotness(
cl::desc("With PGO, include profile count in optimization remarks"),
cl::Hidden);
+cl::opt<Optional<uint64_t>, false, remarks::HotnessThresholdParser>
+ RemarksHotnessThreshold(
+ "lto-pass-remarks-hotness-threshold",
+ cl::desc("Minimum profile count required for an "
+ "optimization remark to be output."
+ " Use 'auto' to apply the threshold from profile summary."),
+ cl::value_desc("uint or 'auto'"), cl::init(0), cl::Hidden);
+
cl::opt<std::string>
RemarksFilename("lto-pass-remarks-output",
cl::desc("Output filename for pass remarks"),
@@ -317,22 +326,15 @@ LTOCodeGenerator::compileOptimized() {
return std::move(*BufferOrErr);
}
-bool LTOCodeGenerator::compile_to_file(const char **Name, bool DisableVerify,
- bool DisableInline,
- bool DisableGVNLoadPRE,
- bool DisableVectorization) {
- if (!optimize(DisableVerify, DisableInline, DisableGVNLoadPRE,
- DisableVectorization))
+bool LTOCodeGenerator::compile_to_file(const char **Name) {
+ if (!optimize())
return false;
return compileOptimizedToFile(Name);
}
-std::unique_ptr<MemoryBuffer>
-LTOCodeGenerator::compile(bool DisableVerify, bool DisableInline,
- bool DisableGVNLoadPRE, bool DisableVectorization) {
- if (!optimize(DisableVerify, DisableInline, DisableGVNLoadPRE,
- DisableVectorization))
+std::unique_ptr<MemoryBuffer> LTOCodeGenerator::compile() {
+ if (!optimize())
return nullptr;
return compileOptimized();
@@ -359,7 +361,7 @@ bool LTOCodeGenerator::determineTarget() {
// Construct LTOModule, hand over ownership of module and target. Use MAttr as
// the default set of features.
- SubtargetFeatures Features(MAttr);
+ SubtargetFeatures Features(join(MAttrs, ""));
Features.getDefaultSubtargetFeatures(Triple);
FeatureStr = Features.getString();
// Set a default CPU for Darwin triples.
@@ -368,16 +370,21 @@ bool LTOCodeGenerator::determineTarget() {
MCpu = "core2";
else if (Triple.getArch() == llvm::Triple::x86)
MCpu = "yonah";
+ else if (Triple.isArm64e())
+ MCpu = "apple-a12";
else if (Triple.getArch() == llvm::Triple::aarch64 ||
Triple.getArch() == llvm::Triple::aarch64_32)
MCpu = "cyclone";
}
TargetMach = createTargetMachine();
+ assert(TargetMach && "Unable to create target machine");
+
return true;
}
std::unique_ptr<TargetMachine> LTOCodeGenerator::createTargetMachine() {
+ assert(MArch && "MArch is not set!");
return std::unique_ptr<TargetMachine>(MArch->createTargetMachine(
TripleStr, MCpu, FeatureStr, Options, RelocModel, None, CGOptLevel));
}
@@ -466,8 +473,6 @@ void LTOCodeGenerator::applyScopeRestrictions() {
internalizeModule(*MergedModule, mustPreserveGV);
- MergedModule->addModuleFlag(Module::Error, "LTOPostLink", 1);
-
ScopeRestrictionsDone = true;
}
@@ -522,15 +527,13 @@ void LTOCodeGenerator::finishOptimizationRemarks() {
}
/// Optimize merged modules using various IPO passes
-bool LTOCodeGenerator::optimize(bool DisableVerify, bool DisableInline,
- bool DisableGVNLoadPRE,
- bool DisableVectorization) {
+bool LTOCodeGenerator::optimize() {
if (!this->determineTarget())
return false;
- auto DiagFileOrErr =
- lto::setupLLVMOptimizationRemarks(Context, RemarksFilename, RemarksPasses,
- RemarksFormat, RemarksWithHotness);
+ auto DiagFileOrErr = lto::setupLLVMOptimizationRemarks(
+ Context, RemarksFilename, RemarksPasses, RemarksFormat,
+ RemarksWithHotness, RemarksHotnessThreshold);
if (!DiagFileOrErr) {
errs() << "Error: " << toString(DiagFileOrErr.takeError()) << "\n";
report_fatal_error("Can't get an output file for the remarks");
@@ -559,6 +562,9 @@ bool LTOCodeGenerator::optimize(bool DisableVerify, bool DisableInline,
// Mark which symbols can not be internalized
this->applyScopeRestrictions();
+ // Write LTOPostLink flag for passes that require all the modules.
+ MergedModule->addModuleFlag(Module::Error, "LTOPostLink", 1);
+
// Instantiate the pass manager to organize the passes.
legacy::PassManager passes;
@@ -570,11 +576,9 @@ bool LTOCodeGenerator::optimize(bool DisableVerify, bool DisableInline,
Triple TargetTriple(TargetMach->getTargetTriple());
PassManagerBuilder PMB;
- PMB.DisableGVNLoadPRE = DisableGVNLoadPRE;
- PMB.LoopVectorize = !DisableVectorization;
- PMB.SLPVectorize = !DisableVectorization;
- if (!DisableInline)
- PMB.Inliner = createFunctionInliningPass();
+ PMB.LoopVectorize = true;
+ PMB.SLPVectorize = true;
+ PMB.Inliner = createFunctionInliningPass();
PMB.LibraryInfo = new TargetLibraryInfoImpl(TargetTriple);
if (Freestanding)
PMB.LibraryInfo->disableAllFunctions();
diff --git a/llvm/lib/LTO/LTOModule.cpp b/llvm/lib/LTO/LTOModule.cpp
index ebe779aea62e..1119622578df 100644
--- a/llvm/lib/LTO/LTOModule.cpp
+++ b/llvm/lib/LTO/LTOModule.cpp
@@ -46,6 +46,7 @@ using namespace llvm::object;
LTOModule::LTOModule(std::unique_ptr<Module> M, MemoryBufferRef MBRef,
llvm::TargetMachine *TM)
: Mod(std::move(M)), MBRef(MBRef), _target(TM) {
+ assert(_target && "target machine is null");
SymTab.addModule(Mod.get());
}
@@ -221,6 +222,8 @@ LTOModule::makeLTOModule(MemoryBufferRef Buffer, const TargetOptions &options,
CPU = "core2";
else if (Triple.getArch() == llvm::Triple::x86)
CPU = "yonah";
+ else if (Triple.isArm64e())
+ CPU = "apple-a12";
else if (Triple.getArch() == llvm::Triple::aarch64 ||
Triple.getArch() == llvm::Triple::aarch64_32)
CPU = "cyclone";
diff --git a/llvm/lib/LTO/ThinLTOCodeGenerator.cpp b/llvm/lib/LTO/ThinLTOCodeGenerator.cpp
index d0a1e1889c61..38f49693b62e 100644
--- a/llvm/lib/LTO/ThinLTOCodeGenerator.cpp
+++ b/llvm/lib/LTO/ThinLTOCodeGenerator.cpp
@@ -37,6 +37,7 @@
#include "llvm/LTO/SummaryBasedOptimizations.h"
#include "llvm/MC/SubtargetFeature.h"
#include "llvm/Object/IRObjectFile.h"
+#include "llvm/Remarks/HotnessThresholdParser.h"
#include "llvm/Support/CachePruning.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/Error.h"
@@ -75,6 +76,8 @@ extern cl::opt<bool> LTODiscardValueNames;
extern cl::opt<std::string> RemarksFilename;
extern cl::opt<std::string> RemarksPasses;
extern cl::opt<bool> RemarksWithHotness;
+extern cl::opt<Optional<uint64_t>, false, remarks::HotnessThresholdParser>
+ RemarksHotnessThreshold;
extern cl::opt<std::string> RemarksFormat;
}
@@ -269,16 +272,26 @@ addUsedSymbolToPreservedGUID(const lto::InputFile &File,
}
// Convert the PreservedSymbols map from "Name" based to "GUID" based.
+static void computeGUIDPreservedSymbols(const lto::InputFile &File,
+ const StringSet<> &PreservedSymbols,
+ const Triple &TheTriple,
+ DenseSet<GlobalValue::GUID> &GUIDs) {
+ // Iterate the symbols in the input file and if the input has preserved symbol
+ // compute the GUID for the symbol.
+ for (const auto &Sym : File.symbols()) {
+ if (PreservedSymbols.count(Sym.getName()) && !Sym.getIRName().empty())
+ GUIDs.insert(GlobalValue::getGUID(GlobalValue::getGlobalIdentifier(
+ Sym.getIRName(), GlobalValue::ExternalLinkage, "")));
+ }
+}
+
static DenseSet<GlobalValue::GUID>
-computeGUIDPreservedSymbols(const StringSet<> &PreservedSymbols,
+computeGUIDPreservedSymbols(const lto::InputFile &File,
+ const StringSet<> &PreservedSymbols,
const Triple &TheTriple) {
DenseSet<GlobalValue::GUID> GUIDPreservedSymbols(PreservedSymbols.size());
- for (auto &Entry : PreservedSymbols) {
- StringRef Name = Entry.first();
- if (TheTriple.isOSBinFormatMachO() && Name.size() > 0 && Name[0] == '_')
- Name = Name.drop_front();
- GUIDPreservedSymbols.insert(GlobalValue::getGUID(Name));
- }
+ computeGUIDPreservedSymbols(File, PreservedSymbols, TheTriple,
+ GUIDPreservedSymbols);
return GUIDPreservedSymbols;
}
@@ -565,9 +578,12 @@ std::unique_ptr<TargetMachine> TargetMachineBuilder::create() const {
Features.getDefaultSubtargetFeatures(TheTriple);
std::string FeatureStr = Features.getString();
- return std::unique_ptr<TargetMachine>(
+ std::unique_ptr<TargetMachine> TM(
TheTarget->createTargetMachine(TheTriple.str(), MCpu, FeatureStr, Options,
RelocModel, None, CGOptLevel));
+ assert(TM && "Cannot create target machine");
+
+ return TM;
}
/**
@@ -652,7 +668,7 @@ void ThinLTOCodeGenerator::promote(Module &TheModule, ModuleSummaryIndex &Index,
// Convert the preserved symbols set from string to GUID
auto GUIDPreservedSymbols = computeGUIDPreservedSymbols(
- PreservedSymbols, Triple(TheModule.getTargetTriple()));
+ File, PreservedSymbols, Triple(TheModule.getTargetTriple()));
// Add used symbol to the preserved symbols.
addUsedSymbolToPreservedGUID(File, GUIDPreservedSymbols);
@@ -702,7 +718,7 @@ void ThinLTOCodeGenerator::crossModuleImport(Module &TheModule,
// Convert the preserved symbols set from string to GUID
auto GUIDPreservedSymbols = computeGUIDPreservedSymbols(
- PreservedSymbols, Triple(TheModule.getTargetTriple()));
+ File, PreservedSymbols, Triple(TheModule.getTargetTriple()));
addUsedSymbolToPreservedGUID(File, GUIDPreservedSymbols);
@@ -737,7 +753,7 @@ void ThinLTOCodeGenerator::gatherImportedSummariesForModule(
// Convert the preserved symbols set from string to GUID
auto GUIDPreservedSymbols = computeGUIDPreservedSymbols(
- PreservedSymbols, Triple(TheModule.getTargetTriple()));
+ File, PreservedSymbols, Triple(TheModule.getTargetTriple()));
addUsedSymbolToPreservedGUID(File, GUIDPreservedSymbols);
@@ -770,7 +786,7 @@ void ThinLTOCodeGenerator::emitImports(Module &TheModule, StringRef OutputName,
// Convert the preserved symbols set from string to GUID
auto GUIDPreservedSymbols = computeGUIDPreservedSymbols(
- PreservedSymbols, Triple(TheModule.getTargetTriple()));
+ File, PreservedSymbols, Triple(TheModule.getTargetTriple()));
addUsedSymbolToPreservedGUID(File, GUIDPreservedSymbols);
@@ -808,7 +824,7 @@ void ThinLTOCodeGenerator::internalize(Module &TheModule,
// Convert the preserved symbols set from string to GUID
auto GUIDPreservedSymbols =
- computeGUIDPreservedSymbols(PreservedSymbols, TMBuilder.TheTriple);
+ computeGUIDPreservedSymbols(File, PreservedSymbols, TMBuilder.TheTriple);
addUsedSymbolToPreservedGUID(File, GUIDPreservedSymbols);
@@ -972,8 +988,10 @@ void ThinLTOCodeGenerator::run() {
// Convert the preserved symbols set from string to GUID, this is needed for
// computing the caching hash and the internalization.
- auto GUIDPreservedSymbols =
- computeGUIDPreservedSymbols(PreservedSymbols, TMBuilder.TheTriple);
+ DenseSet<GlobalValue::GUID> GUIDPreservedSymbols;
+ for (const auto &M : Modules)
+ computeGUIDPreservedSymbols(*M, PreservedSymbols, TMBuilder.TheTriple,
+ GUIDPreservedSymbols);
// Add used symbol from inputs to the preserved symbols.
for (const auto &M : Modules)
@@ -1042,19 +1060,11 @@ void ThinLTOCodeGenerator::run() {
ModuleToDefinedGVSummaries[ModuleIdentifier];
}
- // Compute the ordering we will process the inputs: the rough heuristic here
- // is to sort them per size so that the largest module get schedule as soon as
- // possible. This is purely a compile-time optimization.
- std::vector<int> ModulesOrdering;
- ModulesOrdering.resize(Modules.size());
- std::iota(ModulesOrdering.begin(), ModulesOrdering.end(), 0);
- llvm::sort(ModulesOrdering, [&](int LeftIndex, int RightIndex) {
- auto LSize =
- Modules[LeftIndex]->getSingleBitcodeModule().getBuffer().size();
- auto RSize =
- Modules[RightIndex]->getSingleBitcodeModule().getBuffer().size();
- return LSize > RSize;
- });
+ std::vector<BitcodeModule *> ModulesVec;
+ ModulesVec.reserve(Modules.size());
+ for (auto &Mod : Modules)
+ ModulesVec.push_back(&Mod->getSingleBitcodeModule());
+ std::vector<int> ModulesOrdering = lto::generateModulesOrdering(ModulesVec);
// Parallel optimizer + codegen
{
@@ -1097,7 +1107,7 @@ void ThinLTOCodeGenerator::run() {
Context.enableDebugTypeODRUniquing();
auto DiagFileOrErr = lto::setupLLVMOptimizationRemarks(
Context, RemarksFilename, RemarksPasses, RemarksFormat,
- RemarksWithHotness, count);
+ RemarksWithHotness, RemarksHotnessThreshold, count);
if (!DiagFileOrErr) {
errs() << "Error: " << toString(DiagFileOrErr.takeError()) << "\n";
report_fatal_error("ThinLTO: Can't get an output file for the "