diff options
Diffstat (limited to 'llvm/lib/LTO')
-rw-r--r-- | llvm/lib/LTO/Caching.cpp | 1 | ||||
-rw-r--r-- | llvm/lib/LTO/LTO.cpp | 75 | ||||
-rw-r--r-- | llvm/lib/LTO/LTOBackend.cpp | 240 | ||||
-rw-r--r-- | llvm/lib/LTO/LTOCodeGenerator.cpp | 54 | ||||
-rw-r--r-- | llvm/lib/LTO/LTOModule.cpp | 3 | ||||
-rw-r--r-- | llvm/lib/LTO/ThinLTOCodeGenerator.cpp | 68 |
6 files changed, 322 insertions, 119 deletions
diff --git a/llvm/lib/LTO/Caching.cpp b/llvm/lib/LTO/Caching.cpp index 46cac3fb1830..75a89e729f43 100644 --- a/llvm/lib/LTO/Caching.cpp +++ b/llvm/lib/LTO/Caching.cpp @@ -13,6 +13,7 @@ #include "llvm/LTO/Caching.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Support/Errc.h" +#include "llvm/Support/FileSystem.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/Path.h" #include "llvm/Support/Process.h" diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp index 6e1e3998e490..9103d11059e0 100644 --- a/llvm/lib/LTO/LTO.cpp +++ b/llvm/lib/LTO/LTO.cpp @@ -798,7 +798,7 @@ Error LTO::linkRegularLTO(RegularLTOState::AddedModule Mod, for (GlobalValue *GV : Mod.Keep) { if (LivenessFromIndex && !ThinLTO.CombinedIndex.isGUIDLive(GV->getGUID())) { if (Function *F = dyn_cast<Function>(GV)) { - OptimizationRemarkEmitter ORE(F); + OptimizationRemarkEmitter ORE(F, nullptr); ORE.emit(OptimizationRemark(DEBUG_TYPE, "deadfunction", F) << ore::NV("Function", F) << " not added to the combined module "); @@ -983,7 +983,8 @@ Error LTO::runRegularLTO(AddStreamFn AddStream) { // Setup optimization remarks. auto DiagFileOrErr = lto::setupLLVMOptimizationRemarks( RegularLTO.CombinedModule->getContext(), Conf.RemarksFilename, - Conf.RemarksPasses, Conf.RemarksFormat, Conf.RemarksWithHotness); + Conf.RemarksPasses, Conf.RemarksFormat, Conf.RemarksWithHotness, + Conf.RemarksHotnessThreshold); if (!DiagFileOrErr) return DiagFileOrErr.takeError(); @@ -1107,6 +1108,7 @@ public: const std::map<GlobalValue::GUID, GlobalValue::LinkageTypes> &ResolvedODR, MapVector<StringRef, BitcodeModule> &ModuleMap) = 0; virtual Error wait() = 0; + virtual unsigned getThreadCount() = 0; }; namespace { @@ -1221,6 +1223,10 @@ public: else return Error::success(); } + + unsigned getThreadCount() override { + return BackendThreadPool.getThreadCount(); + } }; } // end anonymous namespace @@ -1309,6 +1315,10 @@ public: } Error wait() override { return Error::success(); } + + // WriteIndexesThinBackend should always return 1 to prevent module + // re-ordering and avoid non-determinism in the final link. + unsigned getThreadCount() override { return 1; } }; } // end anonymous namespace @@ -1443,23 +1453,44 @@ Error LTO::runThinLTO(AddStreamFn AddStream, NativeObjectCache Cache, auto &ModuleMap = ThinLTO.ModulesToCompile ? *ThinLTO.ModulesToCompile : ThinLTO.ModuleMap; - // Tasks 0 through ParallelCodeGenParallelismLevel-1 are reserved for combined - // module and parallel code generation partitions. - unsigned Task = RegularLTO.ParallelCodeGenParallelismLevel; - for (auto &Mod : ModuleMap) { - if (Error E = BackendProc->start(Task, Mod.second, ImportLists[Mod.first], - ExportLists[Mod.first], - ResolvedODR[Mod.first], ThinLTO.ModuleMap)) - return E; - ++Task; - } + auto ProcessOneModule = [&](int I) -> Error { + auto &Mod = *(ModuleMap.begin() + I); + // Tasks 0 through ParallelCodeGenParallelismLevel-1 are reserved for + // combined module and parallel code generation partitions. + return BackendProc->start(RegularLTO.ParallelCodeGenParallelismLevel + I, + Mod.second, ImportLists[Mod.first], + ExportLists[Mod.first], ResolvedODR[Mod.first], + ThinLTO.ModuleMap); + }; + if (BackendProc->getThreadCount() == 1) { + // Process the modules in the order they were provided on the command-line. + // It is important for this codepath to be used for WriteIndexesThinBackend, + // to ensure the emitted LinkedObjectsFile lists ThinLTO objects in the same + // order as the inputs, which otherwise would affect the final link order. + for (int I = 0, E = ModuleMap.size(); I != E; ++I) + if (Error E = ProcessOneModule(I)) + return E; + } else { + // When executing in parallel, process largest bitsize modules first to + // improve parallelism, and avoid starving the thread pool near the end. + // This saves about 15 sec on a 36-core machine while link `clang.exe` (out + // of 100 sec). + std::vector<BitcodeModule *> ModulesVec; + ModulesVec.reserve(ModuleMap.size()); + for (auto &Mod : ModuleMap) + ModulesVec.push_back(&Mod.second); + for (int I : generateModulesOrdering(ModulesVec)) + if (Error E = ProcessOneModule(I)) + return E; + } return BackendProc->wait(); } Expected<std::unique_ptr<ToolOutputFile>> lto::setupLLVMOptimizationRemarks( LLVMContext &Context, StringRef RemarksFilename, StringRef RemarksPasses, - StringRef RemarksFormat, bool RemarksWithHotness, int Count) { + StringRef RemarksFormat, bool RemarksWithHotness, + Optional<uint64_t> RemarksHotnessThreshold, int Count) { std::string Filename = std::string(RemarksFilename); // For ThinLTO, file.opt.<format> becomes // file.opt.<format>.thin.<num>.<format>. @@ -1469,7 +1500,8 @@ Expected<std::unique_ptr<ToolOutputFile>> lto::setupLLVMOptimizationRemarks( .str(); auto ResultOrErr = llvm::setupLLVMOptimizationRemarks( - Context, Filename, RemarksPasses, RemarksFormat, RemarksWithHotness); + Context, Filename, RemarksPasses, RemarksFormat, RemarksWithHotness, + RemarksHotnessThreshold); if (Error E = ResultOrErr.takeError()) return std::move(E); @@ -1495,3 +1527,18 @@ lto::setupStatsFile(StringRef StatsFilename) { StatsFile->keep(); return std::move(StatsFile); } + +// Compute the ordering we will process the inputs: the rough heuristic here +// is to sort them per size so that the largest module get schedule as soon as +// possible. This is purely a compile-time optimization. +std::vector<int> lto::generateModulesOrdering(ArrayRef<BitcodeModule *> R) { + std::vector<int> ModulesOrdering; + ModulesOrdering.resize(R.size()); + std::iota(ModulesOrdering.begin(), ModulesOrdering.end(), 0); + llvm::sort(ModulesOrdering, [&](int LeftIndex, int RightIndex) { + auto LSize = R[LeftIndex]->getBuffer().size(); + auto RSize = R[RightIndex]->getBuffer().size(); + return LSize > RSize; + }); + return ModulesOrdering; +} diff --git a/llvm/lib/LTO/LTOBackend.cpp b/llvm/lib/LTO/LTOBackend.cpp index 0c395f9bbf28..1796d6ba60cc 100644 --- a/llvm/lib/LTO/LTOBackend.cpp +++ b/llvm/lib/LTO/LTOBackend.cpp @@ -50,6 +50,30 @@ using namespace llvm; using namespace lto; +#define DEBUG_TYPE "lto-backend" + +enum class LTOBitcodeEmbedding { + DoNotEmbed = 0, + EmbedOptimized = 1, + EmbedPostMergePreOptimized = 2 +}; + +static cl::opt<LTOBitcodeEmbedding> EmbedBitcode( + "lto-embed-bitcode", cl::init(LTOBitcodeEmbedding::DoNotEmbed), + cl::values(clEnumValN(LTOBitcodeEmbedding::DoNotEmbed, "none", + "Do not embed"), + clEnumValN(LTOBitcodeEmbedding::EmbedOptimized, "optimized", + "Embed after all optimization passes"), + clEnumValN(LTOBitcodeEmbedding::EmbedPostMergePreOptimized, + "post-merge-pre-opt", + "Embed post merge, but before optimizations")), + cl::desc("Embed LLVM bitcode in object files produced by LTO")); + +static cl::opt<bool> ThinLTOAssumeMerged( + "thinlto-assume-merged", cl::init(false), + cl::desc("Assume the input has already undergone ThinLTO function " + "importing and the other pre-optimization pipeline changes.")); + LLVM_ATTRIBUTE_NORETURN static void reportOpenError(StringRef Path, Twine Msg) { errs() << "failed to open " << Path << ": " << Msg << '\n'; errs().flush(); @@ -152,9 +176,7 @@ static void RegisterPassPlugins(ArrayRef<std::string> PassPlugins, } } -namespace { - -std::unique_ptr<TargetMachine> +static std::unique_ptr<TargetMachine> createTargetMachine(const Config &Conf, const Target *TheTarget, Module &M) { StringRef TheTriple = M.getTargetTriple(); SubtargetFeatures Features; @@ -175,9 +197,11 @@ createTargetMachine(const Config &Conf, const Target *TheTarget, Module &M) { else CodeModel = M.getCodeModel(); - return std::unique_ptr<TargetMachine>(TheTarget->createTargetMachine( + std::unique_ptr<TargetMachine> TM(TheTarget->createTargetMachine( TheTriple, Conf.CPU, Features.getString(), Conf.Options, RelocModel, CodeModel, Conf.CGOptLevel)); + assert(TM && "Failed to create target machine"); + return TM; } static void runNewPMPasses(const Config &Conf, Module &Mod, TargetMachine *TM, @@ -197,9 +221,9 @@ static void runNewPMPasses(const Config &Conf, Module &Mod, TargetMachine *TM, } PassInstrumentationCallbacks PIC; - StandardInstrumentations SI; + StandardInstrumentations SI(Conf.DebugPassManager); SI.registerCallbacks(PIC); - PassBuilder PB(TM, Conf.PTO, PGOOpt, &PIC); + PassBuilder PB(Conf.DebugPassManager, TM, Conf.PTO, PGOOpt, &PIC); AAManager AA; // Parse a custom AA pipeline if asked to. @@ -213,6 +237,12 @@ static void runNewPMPasses(const Config &Conf, Module &Mod, TargetMachine *TM, CGSCCAnalysisManager CGAM(Conf.DebugPassManager); ModuleAnalysisManager MAM(Conf.DebugPassManager); + std::unique_ptr<TargetLibraryInfoImpl> TLII( + new TargetLibraryInfoImpl(Triple(TM->getTargetTriple()))); + if (Conf.Freestanding) + TLII->disableAllFunctions(); + FAM.registerPass([&] { return TargetLibraryAnalysis(*TLII); }); + // Register the AA manager first so that our version is the one used. FAM.registerPass([&] { return std::move(AA); }); @@ -224,7 +254,9 @@ static void runNewPMPasses(const Config &Conf, Module &Mod, TargetMachine *TM, PB.crossRegisterProxies(LAM, FAM, CGAM, MAM); ModulePassManager MPM(Conf.DebugPassManager); - // FIXME (davide): verify the input. + + if (!Conf.DisableVerify) + MPM.addPass(VerifierPass()); PassBuilder::OptimizationLevel OL; @@ -246,20 +278,21 @@ static void runNewPMPasses(const Config &Conf, Module &Mod, TargetMachine *TM, } if (IsThinLTO) - MPM = PB.buildThinLTODefaultPipeline(OL, Conf.DebugPassManager, - ImportSummary); + MPM.addPass(PB.buildThinLTODefaultPipeline(OL, ImportSummary)); else - MPM = PB.buildLTODefaultPipeline(OL, Conf.DebugPassManager, ExportSummary); - MPM.run(Mod, MAM); + MPM.addPass(PB.buildLTODefaultPipeline(OL, ExportSummary)); - // FIXME (davide): verify the output. + if (!Conf.DisableVerify) + MPM.addPass(VerifierPass()); + + MPM.run(Mod, MAM); } static void runNewPMCustomPasses(const Config &Conf, Module &Mod, TargetMachine *TM, std::string PipelineDesc, std::string AAPipelineDesc, bool DisableVerify) { - PassBuilder PB(TM); + PassBuilder PB(Conf.DebugPassManager, TM); AAManager AA; // Parse a custom AA pipeline if asked to. @@ -275,6 +308,12 @@ static void runNewPMCustomPasses(const Config &Conf, Module &Mod, CGSCCAnalysisManager CGAM; ModuleAnalysisManager MAM; + std::unique_ptr<TargetLibraryInfoImpl> TLII( + new TargetLibraryInfoImpl(Triple(TM->getTargetTriple()))); + if (Conf.Freestanding) + TLII->disableAllFunctions(); + FAM.registerPass([&] { return TargetLibraryAnalysis(*TLII); }); + // Register the AA manager first so that our version is the one used. FAM.registerPass([&] { return std::move(AA); }); @@ -308,6 +347,8 @@ static void runOldPMPasses(const Config &Conf, Module &Mod, TargetMachine *TM, PassManagerBuilder PMB; PMB.LibraryInfo = new TargetLibraryInfoImpl(Triple(TM->getTargetTriple())); + if (Conf.Freestanding) + PMB.LibraryInfo->disableAllFunctions(); PMB.Inliner = createFunctionInliningPass(); PMB.ExportSummary = ExportSummary; PMB.ImportSummary = ImportSummary; @@ -331,9 +372,27 @@ static void runOldPMPasses(const Config &Conf, Module &Mod, TargetMachine *TM, passes.run(Mod); } -bool opt(const Config &Conf, TargetMachine *TM, unsigned Task, Module &Mod, - bool IsThinLTO, ModuleSummaryIndex *ExportSummary, - const ModuleSummaryIndex *ImportSummary) { +bool lto::opt(const Config &Conf, TargetMachine *TM, unsigned Task, Module &Mod, + bool IsThinLTO, ModuleSummaryIndex *ExportSummary, + const ModuleSummaryIndex *ImportSummary, + const std::vector<uint8_t> &CmdArgs) { + if (EmbedBitcode == LTOBitcodeEmbedding::EmbedPostMergePreOptimized) { + // FIXME: the motivation for capturing post-merge bitcode and command line + // is replicating the compilation environment from bitcode, without needing + // to understand the dependencies (the functions to be imported). This + // assumes a clang - based invocation, case in which we have the command + // line. + // It's not very clear how the above motivation would map in the + // linker-based case, so we currently don't plumb the command line args in + // that case. + if (CmdArgs.empty()) + LLVM_DEBUG( + dbgs() << "Post-(Thin)LTO merge bitcode embedding was requested, but " + "command line arguments are not available"); + llvm::EmbedBitcodeInModule(Mod, llvm::MemoryBufferRef(), + /*EmbedBitcode*/ true, /*EmbedCmdline*/ true, + /*Cmdline*/ CmdArgs); + } // FIXME: Plumb the combined index into the new pass manager. if (!Conf.OptPipeline.empty()) runNewPMCustomPasses(Conf, Mod, TM, Conf.OptPipeline, Conf.AAPipeline, @@ -346,30 +405,17 @@ bool opt(const Config &Conf, TargetMachine *TM, unsigned Task, Module &Mod, return !Conf.PostOptModuleHook || Conf.PostOptModuleHook(Task, Mod); } -static cl::opt<bool> EmbedBitcode( - "lto-embed-bitcode", cl::init(false), - cl::desc("Embed LLVM bitcode in object files produced by LTO")); - -static void EmitBitcodeSection(Module &M, const Config &Conf) { - if (!EmbedBitcode) - return; - SmallVector<char, 0> Buffer; - raw_svector_ostream OS(Buffer); - WriteBitcodeToFile(M, OS); - - std::unique_ptr<MemoryBuffer> Buf( - new SmallVectorMemoryBuffer(std::move(Buffer))); - llvm::EmbedBitcodeInModule(M, Buf->getMemBufferRef(), /*EmbedBitcode*/ true, - /*EmbedMarker*/ false, /*CmdArgs*/ nullptr); -} - -void codegen(const Config &Conf, TargetMachine *TM, AddStreamFn AddStream, - unsigned Task, Module &Mod, - const ModuleSummaryIndex &CombinedIndex) { +static void codegen(const Config &Conf, TargetMachine *TM, + AddStreamFn AddStream, unsigned Task, Module &Mod, + const ModuleSummaryIndex &CombinedIndex) { if (Conf.PreCodeGenModuleHook && !Conf.PreCodeGenModuleHook(Task, Mod)) return; - EmitBitcodeSection(Mod, Conf); + if (EmbedBitcode == LTOBitcodeEmbedding::EmbedOptimized) + llvm::EmbedBitcodeInModule(Mod, llvm::MemoryBufferRef(), + /*EmbedBitcode*/ true, + /*EmbedCmdline*/ false, + /*CmdArgs*/ std::vector<uint8_t>()); std::unique_ptr<ToolOutputFile> DwoOut; SmallString<1024> DwoFile(Conf.SplitDwarfOutput); @@ -396,6 +442,8 @@ void codegen(const Config &Conf, TargetMachine *TM, AddStreamFn AddStream, legacy::PassManager CodeGenPasses; CodeGenPasses.add( createImmutableModuleSummaryIndexWrapperPass(&CombinedIndex)); + if (Conf.PreCodeGenPassesHook) + Conf.PreCodeGenPassesHook(CodeGenPasses); if (TM->addPassesToEmitFile(CodeGenPasses, *Stream->OS, DwoOut ? &DwoOut->os() : nullptr, Conf.CGFileType)) @@ -406,10 +454,11 @@ void codegen(const Config &Conf, TargetMachine *TM, AddStreamFn AddStream, DwoOut->keep(); } -void splitCodeGen(const Config &C, TargetMachine *TM, AddStreamFn AddStream, - unsigned ParallelCodeGenParallelismLevel, - std::unique_ptr<Module> Mod, - const ModuleSummaryIndex &CombinedIndex) { +static void splitCodeGen(const Config &C, TargetMachine *TM, + AddStreamFn AddStream, + unsigned ParallelCodeGenParallelismLevel, + std::unique_ptr<Module> Mod, + const ModuleSummaryIndex &CombinedIndex) { ThreadPool CodegenThreadPool( heavyweight_hardware_concurrency(ParallelCodeGenParallelismLevel)); unsigned ThreadCount = 0; @@ -457,7 +506,8 @@ void splitCodeGen(const Config &C, TargetMachine *TM, AddStreamFn AddStream, CodegenThreadPool.wait(); } -Expected<const Target *> initAndLookupTarget(const Config &C, Module &Mod) { +static Expected<const Target *> initAndLookupTarget(const Config &C, + Module &Mod) { if (!C.OverrideTriple.empty()) Mod.setTargetTriple(C.OverrideTriple); else if (Mod.getTargetTriple().empty()) @@ -469,7 +519,6 @@ Expected<const Target *> initAndLookupTarget(const Config &C, Module &Mod) { return make_error<StringError>(Msg, inconvertibleErrorCode()); return T; } -} Error lto::finalizeOptimizationRemarks( std::unique_ptr<ToolOutputFile> DiagOutputFile) { @@ -494,7 +543,8 @@ Error lto::backend(const Config &C, AddStreamFn AddStream, if (!C.CodeGenOnly) { if (!opt(C, TM.get(), 0, *Mod, /*IsThinLTO=*/false, - /*ExportSummary=*/&CombinedIndex, /*ImportSummary=*/nullptr)) + /*ExportSummary=*/&CombinedIndex, /*ImportSummary=*/nullptr, + /*CmdArgs*/ std::vector<uint8_t>())) return Error::success(); } @@ -532,7 +582,8 @@ Error lto::thinBackend(const Config &Conf, unsigned Task, AddStreamFn AddStream, Module &Mod, const ModuleSummaryIndex &CombinedIndex, const FunctionImporter::ImportMapTy &ImportList, const GVSummaryMapTy &DefinedGlobals, - MapVector<StringRef, BitcodeModule> &ModuleMap) { + MapVector<StringRef, BitcodeModule> &ModuleMap, + const std::vector<uint8_t> &CmdArgs) { Expected<const Target *> TOrErr = initAndLookupTarget(Conf, Mod); if (!TOrErr) return TOrErr.takeError(); @@ -542,7 +593,8 @@ Error lto::thinBackend(const Config &Conf, unsigned Task, AddStreamFn AddStream, // Setup optimization remarks. auto DiagFileOrErr = lto::setupLLVMOptimizationRemarks( Mod.getContext(), Conf.RemarksFilename, Conf.RemarksPasses, - Conf.RemarksFormat, Conf.RemarksWithHotness, Task); + Conf.RemarksFormat, Conf.RemarksWithHotness, Conf.RemarksHotnessThreshold, + Task); if (!DiagFileOrErr) return DiagFileOrErr.takeError(); auto DiagnosticOutputFile = std::move(*DiagFileOrErr); @@ -559,6 +611,21 @@ Error lto::thinBackend(const Config &Conf, unsigned Task, AddStreamFn AddStream, if (Conf.PreOptModuleHook && !Conf.PreOptModuleHook(Task, Mod)) return finalizeOptimizationRemarks(std::move(DiagnosticOutputFile)); + auto OptimizeAndCodegen = + [&](Module &Mod, TargetMachine *TM, + std::unique_ptr<ToolOutputFile> DiagnosticOutputFile) { + if (!opt(Conf, TM, Task, Mod, /*IsThinLTO=*/true, + /*ExportSummary=*/nullptr, /*ImportSummary=*/&CombinedIndex, + CmdArgs)) + return finalizeOptimizationRemarks(std::move(DiagnosticOutputFile)); + + codegen(Conf, TM, AddStream, Task, Mod, CombinedIndex); + return finalizeOptimizationRemarks(std::move(DiagnosticOutputFile)); + }; + + if (ThinLTOAssumeMerged) + return OptimizeAndCodegen(Mod, TM.get(), std::move(DiagnosticOutputFile)); + // When linking an ELF shared object, dso_local should be dropped. We // conservatively do this for -fpic. bool ClearDSOLocalOnDeclarations = @@ -599,10 +666,81 @@ Error lto::thinBackend(const Config &Conf, unsigned Task, AddStreamFn AddStream, if (Conf.PostImportModuleHook && !Conf.PostImportModuleHook(Task, Mod)) return finalizeOptimizationRemarks(std::move(DiagnosticOutputFile)); - if (!opt(Conf, TM.get(), Task, Mod, /*IsThinLTO=*/true, - /*ExportSummary=*/nullptr, /*ImportSummary=*/&CombinedIndex)) - return finalizeOptimizationRemarks(std::move(DiagnosticOutputFile)); + return OptimizeAndCodegen(Mod, TM.get(), std::move(DiagnosticOutputFile)); +} + +BitcodeModule *lto::findThinLTOModule(MutableArrayRef<BitcodeModule> BMs) { + if (ThinLTOAssumeMerged && BMs.size() == 1) + return BMs.begin(); - codegen(Conf, TM.get(), AddStream, Task, Mod, CombinedIndex); - return finalizeOptimizationRemarks(std::move(DiagnosticOutputFile)); + for (BitcodeModule &BM : BMs) { + Expected<BitcodeLTOInfo> LTOInfo = BM.getLTOInfo(); + if (LTOInfo && LTOInfo->IsThinLTO) + return &BM; + } + return nullptr; +} + +Expected<BitcodeModule> lto::findThinLTOModule(MemoryBufferRef MBRef) { + Expected<std::vector<BitcodeModule>> BMsOrErr = getBitcodeModuleList(MBRef); + if (!BMsOrErr) + return BMsOrErr.takeError(); + + // The bitcode file may contain multiple modules, we want the one that is + // marked as being the ThinLTO module. + if (const BitcodeModule *Bm = lto::findThinLTOModule(*BMsOrErr)) + return *Bm; + + return make_error<StringError>("Could not find module summary", + inconvertibleErrorCode()); +} + +bool lto::loadReferencedModules( + const Module &M, const ModuleSummaryIndex &CombinedIndex, + FunctionImporter::ImportMapTy &ImportList, + MapVector<llvm::StringRef, llvm::BitcodeModule> &ModuleMap, + std::vector<std::unique_ptr<llvm::MemoryBuffer>> + &OwnedImportsLifetimeManager) { + if (ThinLTOAssumeMerged) + return true; + // We can simply import the values mentioned in the combined index, since + // we should only invoke this using the individual indexes written out + // via a WriteIndexesThinBackend. + for (const auto &GlobalList : CombinedIndex) { + // Ignore entries for undefined references. + if (GlobalList.second.SummaryList.empty()) + continue; + + auto GUID = GlobalList.first; + for (const auto &Summary : GlobalList.second.SummaryList) { + // Skip the summaries for the importing module. These are included to + // e.g. record required linkage changes. + if (Summary->modulePath() == M.getModuleIdentifier()) + continue; + // Add an entry to provoke importing by thinBackend. + ImportList[Summary->modulePath()].insert(GUID); + } + } + + for (auto &I : ImportList) { + ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> MBOrErr = + llvm::MemoryBuffer::getFile(I.first()); + if (!MBOrErr) { + errs() << "Error loading imported file '" << I.first() + << "': " << MBOrErr.getError().message() << "\n"; + return false; + } + + Expected<BitcodeModule> BMOrErr = findThinLTOModule(**MBOrErr); + if (!BMOrErr) { + handleAllErrors(BMOrErr.takeError(), [&](ErrorInfoBase &EIB) { + errs() << "Error loading imported file '" << I.first() + << "': " << EIB.message() << '\n'; + }); + return false; + } + ModuleMap.insert({I.first(), *BMOrErr}); + OwnedImportsLifetimeManager.push_back(std::move(*MBOrErr)); + } + return true; } diff --git a/llvm/lib/LTO/LTOCodeGenerator.cpp b/llvm/lib/LTO/LTOCodeGenerator.cpp index 25ab1404b4e1..027e197e1e0d 100644 --- a/llvm/lib/LTO/LTOCodeGenerator.cpp +++ b/llvm/lib/LTO/LTOCodeGenerator.cpp @@ -43,6 +43,7 @@ #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/SubtargetFeature.h" +#include "llvm/Remarks/HotnessThresholdParser.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/Host.h" @@ -87,6 +88,14 @@ cl::opt<bool> RemarksWithHotness( cl::desc("With PGO, include profile count in optimization remarks"), cl::Hidden); +cl::opt<Optional<uint64_t>, false, remarks::HotnessThresholdParser> + RemarksHotnessThreshold( + "lto-pass-remarks-hotness-threshold", + cl::desc("Minimum profile count required for an " + "optimization remark to be output." + " Use 'auto' to apply the threshold from profile summary."), + cl::value_desc("uint or 'auto'"), cl::init(0), cl::Hidden); + cl::opt<std::string> RemarksFilename("lto-pass-remarks-output", cl::desc("Output filename for pass remarks"), @@ -317,22 +326,15 @@ LTOCodeGenerator::compileOptimized() { return std::move(*BufferOrErr); } -bool LTOCodeGenerator::compile_to_file(const char **Name, bool DisableVerify, - bool DisableInline, - bool DisableGVNLoadPRE, - bool DisableVectorization) { - if (!optimize(DisableVerify, DisableInline, DisableGVNLoadPRE, - DisableVectorization)) +bool LTOCodeGenerator::compile_to_file(const char **Name) { + if (!optimize()) return false; return compileOptimizedToFile(Name); } -std::unique_ptr<MemoryBuffer> -LTOCodeGenerator::compile(bool DisableVerify, bool DisableInline, - bool DisableGVNLoadPRE, bool DisableVectorization) { - if (!optimize(DisableVerify, DisableInline, DisableGVNLoadPRE, - DisableVectorization)) +std::unique_ptr<MemoryBuffer> LTOCodeGenerator::compile() { + if (!optimize()) return nullptr; return compileOptimized(); @@ -359,7 +361,7 @@ bool LTOCodeGenerator::determineTarget() { // Construct LTOModule, hand over ownership of module and target. Use MAttr as // the default set of features. - SubtargetFeatures Features(MAttr); + SubtargetFeatures Features(join(MAttrs, "")); Features.getDefaultSubtargetFeatures(Triple); FeatureStr = Features.getString(); // Set a default CPU for Darwin triples. @@ -368,16 +370,21 @@ bool LTOCodeGenerator::determineTarget() { MCpu = "core2"; else if (Triple.getArch() == llvm::Triple::x86) MCpu = "yonah"; + else if (Triple.isArm64e()) + MCpu = "apple-a12"; else if (Triple.getArch() == llvm::Triple::aarch64 || Triple.getArch() == llvm::Triple::aarch64_32) MCpu = "cyclone"; } TargetMach = createTargetMachine(); + assert(TargetMach && "Unable to create target machine"); + return true; } std::unique_ptr<TargetMachine> LTOCodeGenerator::createTargetMachine() { + assert(MArch && "MArch is not set!"); return std::unique_ptr<TargetMachine>(MArch->createTargetMachine( TripleStr, MCpu, FeatureStr, Options, RelocModel, None, CGOptLevel)); } @@ -466,8 +473,6 @@ void LTOCodeGenerator::applyScopeRestrictions() { internalizeModule(*MergedModule, mustPreserveGV); - MergedModule->addModuleFlag(Module::Error, "LTOPostLink", 1); - ScopeRestrictionsDone = true; } @@ -522,15 +527,13 @@ void LTOCodeGenerator::finishOptimizationRemarks() { } /// Optimize merged modules using various IPO passes -bool LTOCodeGenerator::optimize(bool DisableVerify, bool DisableInline, - bool DisableGVNLoadPRE, - bool DisableVectorization) { +bool LTOCodeGenerator::optimize() { if (!this->determineTarget()) return false; - auto DiagFileOrErr = - lto::setupLLVMOptimizationRemarks(Context, RemarksFilename, RemarksPasses, - RemarksFormat, RemarksWithHotness); + auto DiagFileOrErr = lto::setupLLVMOptimizationRemarks( + Context, RemarksFilename, RemarksPasses, RemarksFormat, + RemarksWithHotness, RemarksHotnessThreshold); if (!DiagFileOrErr) { errs() << "Error: " << toString(DiagFileOrErr.takeError()) << "\n"; report_fatal_error("Can't get an output file for the remarks"); @@ -559,6 +562,9 @@ bool LTOCodeGenerator::optimize(bool DisableVerify, bool DisableInline, // Mark which symbols can not be internalized this->applyScopeRestrictions(); + // Write LTOPostLink flag for passes that require all the modules. + MergedModule->addModuleFlag(Module::Error, "LTOPostLink", 1); + // Instantiate the pass manager to organize the passes. legacy::PassManager passes; @@ -570,11 +576,9 @@ bool LTOCodeGenerator::optimize(bool DisableVerify, bool DisableInline, Triple TargetTriple(TargetMach->getTargetTriple()); PassManagerBuilder PMB; - PMB.DisableGVNLoadPRE = DisableGVNLoadPRE; - PMB.LoopVectorize = !DisableVectorization; - PMB.SLPVectorize = !DisableVectorization; - if (!DisableInline) - PMB.Inliner = createFunctionInliningPass(); + PMB.LoopVectorize = true; + PMB.SLPVectorize = true; + PMB.Inliner = createFunctionInliningPass(); PMB.LibraryInfo = new TargetLibraryInfoImpl(TargetTriple); if (Freestanding) PMB.LibraryInfo->disableAllFunctions(); diff --git a/llvm/lib/LTO/LTOModule.cpp b/llvm/lib/LTO/LTOModule.cpp index ebe779aea62e..1119622578df 100644 --- a/llvm/lib/LTO/LTOModule.cpp +++ b/llvm/lib/LTO/LTOModule.cpp @@ -46,6 +46,7 @@ using namespace llvm::object; LTOModule::LTOModule(std::unique_ptr<Module> M, MemoryBufferRef MBRef, llvm::TargetMachine *TM) : Mod(std::move(M)), MBRef(MBRef), _target(TM) { + assert(_target && "target machine is null"); SymTab.addModule(Mod.get()); } @@ -221,6 +222,8 @@ LTOModule::makeLTOModule(MemoryBufferRef Buffer, const TargetOptions &options, CPU = "core2"; else if (Triple.getArch() == llvm::Triple::x86) CPU = "yonah"; + else if (Triple.isArm64e()) + CPU = "apple-a12"; else if (Triple.getArch() == llvm::Triple::aarch64 || Triple.getArch() == llvm::Triple::aarch64_32) CPU = "cyclone"; diff --git a/llvm/lib/LTO/ThinLTOCodeGenerator.cpp b/llvm/lib/LTO/ThinLTOCodeGenerator.cpp index d0a1e1889c61..38f49693b62e 100644 --- a/llvm/lib/LTO/ThinLTOCodeGenerator.cpp +++ b/llvm/lib/LTO/ThinLTOCodeGenerator.cpp @@ -37,6 +37,7 @@ #include "llvm/LTO/SummaryBasedOptimizations.h" #include "llvm/MC/SubtargetFeature.h" #include "llvm/Object/IRObjectFile.h" +#include "llvm/Remarks/HotnessThresholdParser.h" #include "llvm/Support/CachePruning.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Error.h" @@ -75,6 +76,8 @@ extern cl::opt<bool> LTODiscardValueNames; extern cl::opt<std::string> RemarksFilename; extern cl::opt<std::string> RemarksPasses; extern cl::opt<bool> RemarksWithHotness; +extern cl::opt<Optional<uint64_t>, false, remarks::HotnessThresholdParser> + RemarksHotnessThreshold; extern cl::opt<std::string> RemarksFormat; } @@ -269,16 +272,26 @@ addUsedSymbolToPreservedGUID(const lto::InputFile &File, } // Convert the PreservedSymbols map from "Name" based to "GUID" based. +static void computeGUIDPreservedSymbols(const lto::InputFile &File, + const StringSet<> &PreservedSymbols, + const Triple &TheTriple, + DenseSet<GlobalValue::GUID> &GUIDs) { + // Iterate the symbols in the input file and if the input has preserved symbol + // compute the GUID for the symbol. + for (const auto &Sym : File.symbols()) { + if (PreservedSymbols.count(Sym.getName()) && !Sym.getIRName().empty()) + GUIDs.insert(GlobalValue::getGUID(GlobalValue::getGlobalIdentifier( + Sym.getIRName(), GlobalValue::ExternalLinkage, ""))); + } +} + static DenseSet<GlobalValue::GUID> -computeGUIDPreservedSymbols(const StringSet<> &PreservedSymbols, +computeGUIDPreservedSymbols(const lto::InputFile &File, + const StringSet<> &PreservedSymbols, const Triple &TheTriple) { DenseSet<GlobalValue::GUID> GUIDPreservedSymbols(PreservedSymbols.size()); - for (auto &Entry : PreservedSymbols) { - StringRef Name = Entry.first(); - if (TheTriple.isOSBinFormatMachO() && Name.size() > 0 && Name[0] == '_') - Name = Name.drop_front(); - GUIDPreservedSymbols.insert(GlobalValue::getGUID(Name)); - } + computeGUIDPreservedSymbols(File, PreservedSymbols, TheTriple, + GUIDPreservedSymbols); return GUIDPreservedSymbols; } @@ -565,9 +578,12 @@ std::unique_ptr<TargetMachine> TargetMachineBuilder::create() const { Features.getDefaultSubtargetFeatures(TheTriple); std::string FeatureStr = Features.getString(); - return std::unique_ptr<TargetMachine>( + std::unique_ptr<TargetMachine> TM( TheTarget->createTargetMachine(TheTriple.str(), MCpu, FeatureStr, Options, RelocModel, None, CGOptLevel)); + assert(TM && "Cannot create target machine"); + + return TM; } /** @@ -652,7 +668,7 @@ void ThinLTOCodeGenerator::promote(Module &TheModule, ModuleSummaryIndex &Index, // Convert the preserved symbols set from string to GUID auto GUIDPreservedSymbols = computeGUIDPreservedSymbols( - PreservedSymbols, Triple(TheModule.getTargetTriple())); + File, PreservedSymbols, Triple(TheModule.getTargetTriple())); // Add used symbol to the preserved symbols. addUsedSymbolToPreservedGUID(File, GUIDPreservedSymbols); @@ -702,7 +718,7 @@ void ThinLTOCodeGenerator::crossModuleImport(Module &TheModule, // Convert the preserved symbols set from string to GUID auto GUIDPreservedSymbols = computeGUIDPreservedSymbols( - PreservedSymbols, Triple(TheModule.getTargetTriple())); + File, PreservedSymbols, Triple(TheModule.getTargetTriple())); addUsedSymbolToPreservedGUID(File, GUIDPreservedSymbols); @@ -737,7 +753,7 @@ void ThinLTOCodeGenerator::gatherImportedSummariesForModule( // Convert the preserved symbols set from string to GUID auto GUIDPreservedSymbols = computeGUIDPreservedSymbols( - PreservedSymbols, Triple(TheModule.getTargetTriple())); + File, PreservedSymbols, Triple(TheModule.getTargetTriple())); addUsedSymbolToPreservedGUID(File, GUIDPreservedSymbols); @@ -770,7 +786,7 @@ void ThinLTOCodeGenerator::emitImports(Module &TheModule, StringRef OutputName, // Convert the preserved symbols set from string to GUID auto GUIDPreservedSymbols = computeGUIDPreservedSymbols( - PreservedSymbols, Triple(TheModule.getTargetTriple())); + File, PreservedSymbols, Triple(TheModule.getTargetTriple())); addUsedSymbolToPreservedGUID(File, GUIDPreservedSymbols); @@ -808,7 +824,7 @@ void ThinLTOCodeGenerator::internalize(Module &TheModule, // Convert the preserved symbols set from string to GUID auto GUIDPreservedSymbols = - computeGUIDPreservedSymbols(PreservedSymbols, TMBuilder.TheTriple); + computeGUIDPreservedSymbols(File, PreservedSymbols, TMBuilder.TheTriple); addUsedSymbolToPreservedGUID(File, GUIDPreservedSymbols); @@ -972,8 +988,10 @@ void ThinLTOCodeGenerator::run() { // Convert the preserved symbols set from string to GUID, this is needed for // computing the caching hash and the internalization. - auto GUIDPreservedSymbols = - computeGUIDPreservedSymbols(PreservedSymbols, TMBuilder.TheTriple); + DenseSet<GlobalValue::GUID> GUIDPreservedSymbols; + for (const auto &M : Modules) + computeGUIDPreservedSymbols(*M, PreservedSymbols, TMBuilder.TheTriple, + GUIDPreservedSymbols); // Add used symbol from inputs to the preserved symbols. for (const auto &M : Modules) @@ -1042,19 +1060,11 @@ void ThinLTOCodeGenerator::run() { ModuleToDefinedGVSummaries[ModuleIdentifier]; } - // Compute the ordering we will process the inputs: the rough heuristic here - // is to sort them per size so that the largest module get schedule as soon as - // possible. This is purely a compile-time optimization. - std::vector<int> ModulesOrdering; - ModulesOrdering.resize(Modules.size()); - std::iota(ModulesOrdering.begin(), ModulesOrdering.end(), 0); - llvm::sort(ModulesOrdering, [&](int LeftIndex, int RightIndex) { - auto LSize = - Modules[LeftIndex]->getSingleBitcodeModule().getBuffer().size(); - auto RSize = - Modules[RightIndex]->getSingleBitcodeModule().getBuffer().size(); - return LSize > RSize; - }); + std::vector<BitcodeModule *> ModulesVec; + ModulesVec.reserve(Modules.size()); + for (auto &Mod : Modules) + ModulesVec.push_back(&Mod->getSingleBitcodeModule()); + std::vector<int> ModulesOrdering = lto::generateModulesOrdering(ModulesVec); // Parallel optimizer + codegen { @@ -1097,7 +1107,7 @@ void ThinLTOCodeGenerator::run() { Context.enableDebugTypeODRUniquing(); auto DiagFileOrErr = lto::setupLLVMOptimizationRemarks( Context, RemarksFilename, RemarksPasses, RemarksFormat, - RemarksWithHotness, count); + RemarksWithHotness, RemarksHotnessThreshold, count); if (!DiagFileOrErr) { errs() << "Error: " << toString(DiagFileOrErr.takeError()) << "\n"; report_fatal_error("ThinLTO: Can't get an output file for the " |