diff options
Diffstat (limited to 'ELF')
48 files changed, 7273 insertions, 6036 deletions
diff --git a/ELF/CMakeLists.txt b/ELF/CMakeLists.txt index 2e9d2b941fd9..41da497abe26 100644 --- a/ELF/CMakeLists.txt +++ b/ELF/CMakeLists.txt @@ -11,16 +11,19 @@ add_lld_library(lldELF DriverUtils.cpp EhFrame.cpp Error.cpp + Filesystem.cpp GdbIndex.cpp ICF.cpp InputFiles.cpp InputSection.cpp LTO.cpp LinkerScript.cpp + MapFile.cpp MarkLive.cpp Mips.cpp OutputSections.cpp Relocations.cpp + ScriptLexer.cpp ScriptParser.cpp Strings.cpp SymbolTable.cpp @@ -53,7 +56,7 @@ add_lld_library(lldELF LINK_LIBS lldConfig lldCore - ${PTHREAD_LIB} + ${LLVM_PTHREAD_LIB} DEPENDS ELFOptionsTableGen diff --git a/ELF/Config.h b/ELF/Config.h index b7706205a5b6..c8eecec7439c 100644 --- a/ELF/Config.h +++ b/ELF/Config.h @@ -13,7 +13,10 @@ #include "llvm/ADT/MapVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSet.h" +#include "llvm/Support/CachePruning.h" +#include "llvm/Support/CodeGen.h" #include "llvm/Support/ELF.h" +#include "llvm/Support/Endian.h" #include <vector> @@ -34,14 +37,14 @@ enum ELFKind { // For --build-id. enum class BuildIdKind { None, Fast, Md5, Sha1, Hexstring, Uuid }; -// For --discard-{all,locals,none} and --retain-symbols-file. -enum class DiscardPolicy { Default, All, Locals, RetainFile, None }; +// For --discard-{all,locals,none}. +enum class DiscardPolicy { Default, All, Locals, None }; // For --strip-{all,debug}. enum class StripPolicy { None, All, Debug }; // For --unresolved-symbols. -enum class UnresolvedPolicy { NoUndef, ReportError, Warn, Ignore }; +enum class UnresolvedPolicy { ReportError, Warn, WarnAll, Ignore, IgnoreAll }; // For --sort-section and linkerscript sorting rules. enum class SortSectionPolicy { Default, None, Alignment, Name, Priority }; @@ -58,11 +61,10 @@ struct SymbolVersion { // This struct contains symbols version definition that // can be found in version script if it is used for link. struct VersionDefinition { - VersionDefinition(llvm::StringRef Name, uint16_t Id) : Name(Name), Id(Id) {} llvm::StringRef Name; - uint16_t Id; + uint16_t Id = 0; std::vector<SymbolVersion> Globals; - size_t NameOff; // Offset in string table. + size_t NameOff = 0; // Offset in the string table }; // This struct contains the global configuration for the linker. @@ -72,6 +74,7 @@ struct VersionDefinition { struct Configuration { InputFile *FirstElf = nullptr; uint8_t OSABI = 0; + llvm::CachePruningPolicy ThinLTOCachePolicy; llvm::StringMap<uint64_t> SectionStartMap; llvm::StringRef DynamicLinker; llvm::StringRef Entry; @@ -80,10 +83,12 @@ struct Configuration { llvm::StringRef Init; llvm::StringRef LTOAAPipeline; llvm::StringRef LTONewPmPasses; + llvm::StringRef MapFile; llvm::StringRef OutputFile; + llvm::StringRef OptRemarksFilename; llvm::StringRef SoName; llvm::StringRef Sysroot; - llvm::StringSet<> RetainSymbolsFile; + llvm::StringRef ThinLTOCacheDir; std::string RPath; std::vector<VersionDefinition> VersionDefinitions; std::vector<llvm::StringRef> AuxiliaryList; @@ -94,6 +99,7 @@ struct Configuration { std::vector<SymbolVersion> VersionScriptLocals; std::vector<uint8_t> BuildIdVector; bool AllowMultipleDefinition; + bool ArchiveWithoutSymbolsSeen = false; bool AsNeeded = false; bool Bsymbolic; bool BsymbolicFunctions; @@ -102,30 +108,29 @@ struct Configuration { bool Demangle = true; bool DisableVerify; bool EhFrameHdr; + bool EmitRelocs; bool EnableNewDtags; bool ExportDynamic; bool FatalWarnings; bool GcSections; bool GdbIndex; - bool GnuHash = false; + bool GnuHash; bool ICF; - bool Mips64EL = false; bool MipsN32Abi = false; bool NoGnuUnique; bool NoUndefinedVersion; bool Nostdlib; bool OFormatBinary; - bool OMagic; - bool Pic; + bool Omagic; + bool OptRemarksWithHotness; bool Pie; bool PrintGcSections; - bool Rela; bool Relocatable; bool SaveTemps; bool SingleRoRx; bool Shared; bool Static = false; - bool SysvHash = true; + bool SysvHash; bool Target1Rel; bool Threads; bool Trace; @@ -134,17 +139,20 @@ struct Configuration { bool WarnMissingEntry; bool ZCombreloc; bool ZExecstack; + bool ZNocopyreloc; bool ZNodelete; + bool ZNodlopen; bool ZNow; bool ZOrigin; bool ZRelro; + bool ZText; bool ExitEarly; bool ZWxneeded; DiscardPolicy Discard; SortSectionPolicy SortSection; - StripPolicy Strip = StripPolicy::None; + StripPolicy Strip; UnresolvedPolicy UnresolvedSymbols; - Target2Policy Target2 = Target2Policy::GotRel; + Target2Policy Target2; BuildIdKind BuildId = BuildIdKind::None; ELFKind EKind = ELFNoneKind; uint16_t DefaultSymbolVersion = llvm::ELF::VER_NDX_GLOBAL; @@ -157,6 +165,58 @@ struct Configuration { unsigned LTOO; unsigned Optimize; unsigned ThinLTOJobs; + + // The following config options do not directly correspond to any + // particualr command line options. + + // True if we need to pass through relocations in input files to the + // output file. Usually false because we consume relocations. + bool CopyRelocs; + + // True if the target is ELF64. False if ELF32. + bool Is64; + + // True if the target is little-endian. False if big-endian. + bool IsLE; + + // endianness::little if IsLE is true. endianness::big otherwise. + llvm::support::endianness Endianness; + + // True if the target is the little-endian MIPS64. + // + // The reason why we have this variable only for the MIPS is because + // we use this often. Some ELF headers for MIPS64EL are in a + // mixed-endian (which is horrible and I'd say that's a serious spec + // bug), and we need to know whether we are reading MIPS ELF files or + // not in various places. + // + // (Note that MIPS64EL is not a typo for MIPS64LE. This is the official + // name whatever that means. A fun hypothesis is that "EL" is short for + // little-endian written in the little-endian order, but I don't know + // if that's true.) + bool IsMips64EL; + + // The ELF spec defines two types of relocation table entries, RELA and + // REL. RELA is a triplet of (offset, info, addend) while REL is a + // tuple of (offset, info). Addends for REL are implicit and read from + // the location where the relocations are applied. So, REL is more + // compact than RELA but requires a bit of more work to process. + // + // (From the linker writer's view, this distinction is not necessary. + // If the ELF had chosen whichever and sticked with it, it would have + // been easier to write code to process relocations, but it's too late + // to change the spec.) + // + // Each ABI defines its relocation type. IsRela is true if target + // uses RELA. As far as we know, all 64-bit ABIs are using RELA. A + // few 32-bit ABIs are using RELA too. + bool IsRela; + + // True if we are creating position-independent code. + bool Pic; + + // 4 for ELF32, 8 for ELF64. + int Wordsize; }; // The only instance of Configuration struct. diff --git a/ELF/Driver.cpp b/ELF/Driver.cpp index 50b701175d3e..47ecd607a48f 100644 --- a/ELF/Driver.cpp +++ b/ELF/Driver.cpp @@ -6,15 +6,34 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// +// +// The driver drives the entire linking process. It is responsible for +// parsing command line options and doing whatever it is instructed to do. +// +// One notable thing in the LLD's driver when compared to other linkers is +// that the LLD's driver is agnostic on the host operating system. +// Other linkers usually have implicit default values (such as a dynamic +// linker path or library paths) for each host OS. +// +// I don't think implicit default values are useful because they are +// usually explicitly specified by the compiler driver. They can even +// be harmful when you are doing cross-linking. Therefore, in LLD, we +// simply trust the compiler driver to pass all required options and +// don't try to make effort on our side. +// +//===----------------------------------------------------------------------===// #include "Driver.h" #include "Config.h" #include "Error.h" +#include "Filesystem.h" #include "ICF.h" #include "InputFiles.h" #include "InputSection.h" #include "LinkerScript.h" #include "Memory.h" +#include "OutputSections.h" +#include "ScriptParser.h" #include "Strings.h" #include "SymbolTable.h" #include "Target.h" @@ -48,16 +67,19 @@ BumpPtrAllocator elf::BAlloc; StringSaver elf::Saver{BAlloc}; std::vector<SpecificAllocBase *> elf::SpecificAllocBase::Instances; +static void setConfigs(); + bool elf::link(ArrayRef<const char *> Args, bool CanExitEarly, raw_ostream &Error) { ErrorCount = 0; ErrorOS = &Error; Argv0 = Args[0]; + InputSections.clear(); Tar = nullptr; Config = make<Configuration>(); Driver = make<LinkerDriver>(); - ScriptConfig = make<ScriptConfiguration>(); + Script = make<LinkerScript>(); Driver->main(Args, CanExitEarly); freeArena(); @@ -78,10 +100,8 @@ static std::tuple<ELFKind, uint16_t, uint8_t> parseEmulation(StringRef Emul) { .Cases("aarch64elf", "aarch64linux", {ELF64LEKind, EM_AARCH64}) .Case("armelf_linux_eabi", {ELF32LEKind, EM_ARM}) .Case("elf32_x86_64", {ELF32LEKind, EM_X86_64}) - .Case("elf32btsmip", {ELF32BEKind, EM_MIPS}) - .Case("elf32ltsmip", {ELF32LEKind, EM_MIPS}) - .Case("elf32btsmipn32", {ELF32BEKind, EM_MIPS}) - .Case("elf32ltsmipn32", {ELF32LEKind, EM_MIPS}) + .Cases("elf32btsmip", "elf32btsmipn32", {ELF32BEKind, EM_MIPS}) + .Cases("elf32ltsmip", "elf32ltsmipn32", {ELF32LEKind, EM_MIPS}) .Case("elf32ppc", {ELF32BEKind, EM_PPC}) .Case("elf64btsmip", {ELF64BEKind, EM_MIPS}) .Case("elf64ltsmip", {ELF64LEKind, EM_MIPS}) @@ -133,7 +153,7 @@ LinkerDriver::getArchiveMembers(MemoryBufferRef MB) { // Opens and parses a file. Path has to be resolved already. // Newly created memory buffers are owned by this driver. -void LinkerDriver::addFile(StringRef Path) { +void LinkerDriver::addFile(StringRef Path, bool WithLOption) { using namespace sys::fs; Optional<MemoryBufferRef> Buffer = readFile(Path); @@ -164,6 +184,19 @@ void LinkerDriver::addFile(StringRef Path) { return; } Files.push_back(createSharedFile(MBRef)); + + // DSOs usually have DT_SONAME tags in their ELF headers, and the + // sonames are used to identify DSOs. But if they are missing, + // they are identified by filenames. We don't know whether the new + // file has a DT_SONAME or not because we haven't parsed it yet. + // Here, we set the default soname for the file because we might + // need it later. + // + // If a file was specified by -lfoo, the directory part is not + // significant, as a user did not specify it. This behavior is + // compatible with GNU. + Files.back()->DefaultSoName = + WithLOption ? sys::path::filename(Path) : Path; return; default: if (InLib) @@ -176,7 +209,7 @@ void LinkerDriver::addFile(StringRef Path) { // Add a given library by searching it from input search paths. void LinkerDriver::addLibrary(StringRef Name) { if (Optional<std::string> Path = searchLibrary(Name)) - addFile(*Path); + addFile(*Path, /*WithLOption=*/true); else error("unable to find library -l" + Name); } @@ -281,11 +314,27 @@ void LinkerDriver::main(ArrayRef<const char *> ArgsArr, bool CanExitEarly) { return; } - // GNU linkers disagree here. Though both -version and -v are mentioned - // in help to print the version information, GNU ld just normally exits, - // while gold can continue linking. We are compatible with ld.bfd here. - if (Args.hasArg(OPT_version) || Args.hasArg(OPT_v)) - outs() << getLLDVersion() << "\n"; + // Handle -v or -version. + // + // A note about "compatible with GNU linkers" message: this is a hack for + // scripts generated by GNU Libtool 2.4.6 (released in February 2014 and + // still the newest version in March 2017) or earlier to recognize LLD as + // a GNU compatible linker. As long as an output for the -v option + // contains "GNU" or "with BFD", they recognize us as GNU-compatible. + // + // This is somewhat ugly hack, but in reality, we had no choice other + // than doing this. Considering the very long release cycle of Libtool, + // it is not easy to improve it to recognize LLD as a GNU compatible + // linker in a timely manner. Even if we can make it, there are still a + // lot of "configure" scripts out there that are generated by old version + // of Libtool. We cannot convince every software developer to migrate to + // the latest version and re-generate scripts. So we have this hack. + if (Args.hasArg(OPT_v) || Args.hasArg(OPT_version)) + message(getLLDVersion() + " (compatible with GNU linkers)"); + + // ld.bfd always exits after printing out the version string. + // ld.gold proceeds if a given option is -v. Because gold's behavior + // is more permissive than ld.bfd, we chose what gold does here. if (Args.hasArg(OPT_version)) return; @@ -311,6 +360,7 @@ void LinkerDriver::main(ArrayRef<const char *> ArgsArr, bool CanExitEarly) { initLLVM(Args); createFiles(Args); inferMachineType(); + setConfigs(); checkOptions(Args); if (ErrorCount) return; @@ -333,26 +383,68 @@ void LinkerDriver::main(ArrayRef<const char *> ArgsArr, bool CanExitEarly) { } } -static UnresolvedPolicy getUnresolvedSymbolOption(opt::InputArgList &Args) { +static bool getArg(opt::InputArgList &Args, unsigned K1, unsigned K2, + bool Default) { + if (auto *Arg = Args.getLastArg(K1, K2)) + return Arg->getOption().getID() == K1; + return Default; +} + +static std::vector<StringRef> getArgs(opt::InputArgList &Args, int Id) { + std::vector<StringRef> V; + for (auto *Arg : Args.filtered(Id)) + V.push_back(Arg->getValue()); + return V; +} + +static std::string getRPath(opt::InputArgList &Args) { + std::vector<StringRef> V = getArgs(Args, OPT_rpath); + return llvm::join(V.begin(), V.end(), ":"); +} + +// Determines what we should do if there are remaining unresolved +// symbols after the name resolution. +static UnresolvedPolicy getUnresolvedSymbolPolicy(opt::InputArgList &Args) { + // -noinhibit-exec or -r imply some default values. if (Args.hasArg(OPT_noinhibit_exec)) - return UnresolvedPolicy::Warn; - if (Args.hasArg(OPT_no_undefined) || hasZOption(Args, "defs")) - return UnresolvedPolicy::NoUndef; - if (Config->Relocatable) - return UnresolvedPolicy::Ignore; + return UnresolvedPolicy::WarnAll; + if (Args.hasArg(OPT_relocatable)) + return UnresolvedPolicy::IgnoreAll; - if (auto *Arg = Args.getLastArg(OPT_unresolved_symbols)) { - StringRef S = Arg->getValue(); - if (S == "ignore-all" || S == "ignore-in-object-files") - return UnresolvedPolicy::Ignore; - if (S == "ignore-in-shared-libs" || S == "report-all") - return UnresolvedPolicy::ReportError; - error("unknown --unresolved-symbols value: " + S); + UnresolvedPolicy ErrorOrWarn = getArg(Args, OPT_error_unresolved_symbols, + OPT_warn_unresolved_symbols, true) + ? UnresolvedPolicy::ReportError + : UnresolvedPolicy::Warn; + + // Process the last of -unresolved-symbols, -no-undefined or -z defs. + for (auto *Arg : llvm::reverse(Args)) { + switch (Arg->getOption().getID()) { + case OPT_unresolved_symbols: { + StringRef S = Arg->getValue(); + if (S == "ignore-all" || S == "ignore-in-object-files") + return UnresolvedPolicy::Ignore; + if (S == "ignore-in-shared-libs" || S == "report-all") + return ErrorOrWarn; + error("unknown --unresolved-symbols value: " + S); + continue; + } + case OPT_no_undefined: + return ErrorOrWarn; + case OPT_z: + if (StringRef(Arg->getValue()) == "defs") + return ErrorOrWarn; + continue; + } } - return UnresolvedPolicy::ReportError; + + // -shared implies -unresolved-symbols=ignore-all because missing + // symbols are likely to be resolved at runtime using other DSOs. + if (Config->Shared) + return UnresolvedPolicy::Ignore; + return ErrorOrWarn; } -static Target2Policy getTarget2Option(opt::InputArgList &Args) { +static Target2Policy getTarget2(opt::InputArgList &Args) { if (auto *Arg = Args.getLastArg(OPT_target2)) { StringRef S = Arg->getValue(); if (S == "rel") @@ -376,16 +468,10 @@ static bool isOutputFormatBinary(opt::InputArgList &Args) { return false; } -static bool getArg(opt::InputArgList &Args, unsigned K1, unsigned K2, - bool Default) { - if (auto *Arg = Args.getLastArg(K1, K2)) - return Arg->getOption().getID() == K1; - return Default; -} - -static DiscardPolicy getDiscardOption(opt::InputArgList &Args) { - if (Config->Relocatable) +static DiscardPolicy getDiscard(opt::InputArgList &Args) { + if (Args.hasArg(OPT_relocatable)) return DiscardPolicy::None; + auto *Arg = Args.getLastArg(OPT_discard_all, OPT_discard_locals, OPT_discard_none); if (!Arg) @@ -397,13 +483,23 @@ static DiscardPolicy getDiscardOption(opt::InputArgList &Args) { return DiscardPolicy::None; } -static StripPolicy getStripOption(opt::InputArgList &Args) { - if (auto *Arg = Args.getLastArg(OPT_strip_all, OPT_strip_debug)) { - if (Arg->getOption().getID() == OPT_strip_all) - return StripPolicy::All; - return StripPolicy::Debug; - } - return StripPolicy::None; +static StringRef getDynamicLinker(opt::InputArgList &Args) { + auto *Arg = Args.getLastArg(OPT_dynamic_linker, OPT_no_dynamic_linker); + if (!Arg || Arg->getOption().getID() == OPT_no_dynamic_linker) + return ""; + return Arg->getValue(); +} + +static StripPolicy getStrip(opt::InputArgList &Args) { + if (Args.hasArg(OPT_relocatable)) + return StripPolicy::None; + + auto *Arg = Args.getLastArg(OPT_strip_all, OPT_strip_debug); + if (!Arg) + return StripPolicy::None; + if (Arg->getOption().getID() == OPT_strip_all) + return StripPolicy::All; + return StripPolicy::Debug; } static uint64_t parseSectionAddress(StringRef S, opt::Arg *Arg) { @@ -433,7 +529,7 @@ static StringMap<uint64_t> getSectionStartMap(opt::InputArgList &Args) { return Ret; } -static SortSectionPolicy getSortKind(opt::InputArgList &Args) { +static SortSectionPolicy getSortSection(opt::InputArgList &Args) { StringRef S = getString(Args, OPT_sort_section); if (S == "alignment") return SortSectionPolicy::Alignment; @@ -444,6 +540,17 @@ static SortSectionPolicy getSortKind(opt::InputArgList &Args) { return SortSectionPolicy::Default; } +static std::pair<bool, bool> getHashStyle(opt::InputArgList &Args) { + StringRef S = getString(Args, OPT_hash_style, "sysv"); + if (S == "sysv") + return {true, false}; + if (S == "gnu") + return {false, true}; + if (S != "both") + error("unknown -hash-style: " + S); + return {true, true}; +} + static std::vector<StringRef> getLines(MemoryBufferRef MB) { SmallVector<StringRef, 0> Arr; MB.getBuffer().split(Arr, '\n'); @@ -459,116 +566,112 @@ static std::vector<StringRef> getLines(MemoryBufferRef MB) { // Initializes Config members by the command line options. void LinkerDriver::readConfigs(opt::InputArgList &Args) { - for (auto *Arg : Args.filtered(OPT_L)) - Config->SearchPaths.push_back(Arg->getValue()); - - std::vector<StringRef> RPaths; - for (auto *Arg : Args.filtered(OPT_rpath)) - RPaths.push_back(Arg->getValue()); - if (!RPaths.empty()) - Config->RPath = llvm::join(RPaths.begin(), RPaths.end(), ":"); - - if (auto *Arg = Args.getLastArg(OPT_m)) { - // Parse ELF{32,64}{LE,BE} and CPU type. - StringRef S = Arg->getValue(); - std::tie(Config->EKind, Config->EMachine, Config->OSABI) = - parseEmulation(S); - Config->MipsN32Abi = (S == "elf32btsmipn32" || S == "elf32ltsmipn32"); - Config->Emulation = S; - } - Config->AllowMultipleDefinition = Args.hasArg(OPT_allow_multiple_definition); + Config->AuxiliaryList = getArgs(Args, OPT_auxiliary); Config->Bsymbolic = Args.hasArg(OPT_Bsymbolic); Config->BsymbolicFunctions = Args.hasArg(OPT_Bsymbolic_functions); + Config->DefineCommon = getArg(Args, OPT_define_common, OPT_no_define_common, + !Args.hasArg(OPT_relocatable)); Config->Demangle = getArg(Args, OPT_demangle, OPT_no_demangle, true); Config->DisableVerify = Args.hasArg(OPT_disable_verify); + Config->Discard = getDiscard(Args); + Config->DynamicLinker = getDynamicLinker(Args); Config->EhFrameHdr = Args.hasArg(OPT_eh_frame_hdr); + Config->EmitRelocs = Args.hasArg(OPT_emit_relocs); Config->EnableNewDtags = !Args.hasArg(OPT_disable_new_dtags); - Config->ExportDynamic = Args.hasArg(OPT_export_dynamic); - Config->FatalWarnings = Args.hasArg(OPT_fatal_warnings); + Config->Entry = getString(Args, OPT_entry); + Config->ExportDynamic = + getArg(Args, OPT_export_dynamic, OPT_no_export_dynamic, false); + Config->FatalWarnings = + getArg(Args, OPT_fatal_warnings, OPT_no_fatal_warnings, false); + Config->Fini = getString(Args, OPT_fini, "_fini"); Config->GcSections = getArg(Args, OPT_gc_sections, OPT_no_gc_sections, false); Config->GdbIndex = Args.hasArg(OPT_gdb_index); Config->ICF = Args.hasArg(OPT_icf); + Config->Init = getString(Args, OPT_init, "_init"); + Config->LTOAAPipeline = getString(Args, OPT_lto_aa_pipeline); + Config->LTONewPmPasses = getString(Args, OPT_lto_newpm_passes); + Config->LTOO = getInteger(Args, OPT_lto_O, 2); + Config->LTOPartitions = getInteger(Args, OPT_lto_partitions, 1); + Config->MapFile = getString(Args, OPT_Map); Config->NoGnuUnique = Args.hasArg(OPT_no_gnu_unique); Config->NoUndefinedVersion = Args.hasArg(OPT_no_undefined_version); Config->Nostdlib = Args.hasArg(OPT_nostdlib); - Config->OMagic = Args.hasArg(OPT_omagic); + Config->OFormatBinary = isOutputFormatBinary(Args); + Config->Omagic = Args.hasArg(OPT_omagic); + Config->OptRemarksFilename = getString(Args, OPT_opt_remarks_filename); + Config->OptRemarksWithHotness = Args.hasArg(OPT_opt_remarks_with_hotness); + Config->Optimize = getInteger(Args, OPT_O, 1); + Config->OutputFile = getString(Args, OPT_o); Config->Pie = getArg(Args, OPT_pie, OPT_nopie, false); Config->PrintGcSections = Args.hasArg(OPT_print_gc_sections); + Config->RPath = getRPath(Args); Config->Relocatable = Args.hasArg(OPT_relocatable); - Config->DefineCommon = getArg(Args, OPT_define_common, OPT_no_define_common, - !Config->Relocatable); - Config->Discard = getDiscardOption(Args); Config->SaveTemps = Args.hasArg(OPT_save_temps); - Config->SingleRoRx = Args.hasArg(OPT_no_rosegment); + Config->SearchPaths = getArgs(Args, OPT_L); + Config->SectionStartMap = getSectionStartMap(Args); Config->Shared = Args.hasArg(OPT_shared); + Config->SingleRoRx = Args.hasArg(OPT_no_rosegment); + Config->SoName = getString(Args, OPT_soname); + Config->SortSection = getSortSection(Args); + Config->Strip = getStrip(Args); + Config->Sysroot = getString(Args, OPT_sysroot); Config->Target1Rel = getArg(Args, OPT_target1_rel, OPT_target1_abs, false); + Config->Target2 = getTarget2(Args); + Config->ThinLTOCacheDir = getString(Args, OPT_thinlto_cache_dir); + Config->ThinLTOCachePolicy = + check(parseCachePruningPolicy(getString(Args, OPT_thinlto_cache_policy)), + "--thinlto-cache-policy: invalid cache policy"); + Config->ThinLTOJobs = getInteger(Args, OPT_thinlto_jobs, -1u); Config->Threads = getArg(Args, OPT_threads, OPT_no_threads, true); Config->Trace = Args.hasArg(OPT_trace); + Config->Undefined = getArgs(Args, OPT_undefined); + Config->UnresolvedSymbols = getUnresolvedSymbolPolicy(Args); Config->Verbose = Args.hasArg(OPT_verbose); Config->WarnCommon = Args.hasArg(OPT_warn_common); + Config->ZCombreloc = !hasZOption(Args, "nocombreloc"); + Config->ZExecstack = hasZOption(Args, "execstack"); + Config->ZNocopyreloc = hasZOption(Args, "nocopyreloc"); + Config->ZNodelete = hasZOption(Args, "nodelete"); + Config->ZNodlopen = hasZOption(Args, "nodlopen"); + Config->ZNow = hasZOption(Args, "now"); + Config->ZOrigin = hasZOption(Args, "origin"); + Config->ZRelro = !hasZOption(Args, "norelro"); + Config->ZStackSize = getZOptionValue(Args, "stack-size", 0); + Config->ZText = !hasZOption(Args, "notext"); + Config->ZWxneeded = hasZOption(Args, "wxneeded"); - Config->DynamicLinker = getString(Args, OPT_dynamic_linker); - Config->Entry = getString(Args, OPT_entry); - Config->Fini = getString(Args, OPT_fini, "_fini"); - Config->Init = getString(Args, OPT_init, "_init"); - Config->LTOAAPipeline = getString(Args, OPT_lto_aa_pipeline); - Config->LTONewPmPasses = getString(Args, OPT_lto_newpm_passes); - Config->OutputFile = getString(Args, OPT_o); - Config->SoName = getString(Args, OPT_soname); - Config->Sysroot = getString(Args, OPT_sysroot); - - Config->Optimize = getInteger(Args, OPT_O, 1); - Config->LTOO = getInteger(Args, OPT_lto_O, 2); if (Config->LTOO > 3) error("invalid optimization level for LTO: " + getString(Args, OPT_lto_O)); - Config->LTOPartitions = getInteger(Args, OPT_lto_partitions, 1); if (Config->LTOPartitions == 0) error("--lto-partitions: number of threads must be > 0"); - Config->ThinLTOJobs = getInteger(Args, OPT_thinlto_jobs, -1u); if (Config->ThinLTOJobs == 0) error("--thinlto-jobs: number of threads must be > 0"); - Config->ZCombreloc = !hasZOption(Args, "nocombreloc"); - Config->ZExecstack = hasZOption(Args, "execstack"); - Config->ZNodelete = hasZOption(Args, "nodelete"); - Config->ZNow = hasZOption(Args, "now"); - Config->ZOrigin = hasZOption(Args, "origin"); - Config->ZRelro = !hasZOption(Args, "norelro"); - Config->ZStackSize = getZOptionValue(Args, "stack-size", -1); - Config->ZWxneeded = hasZOption(Args, "wxneeded"); + if (auto *Arg = Args.getLastArg(OPT_m)) { + // Parse ELF{32,64}{LE,BE} and CPU type. + StringRef S = Arg->getValue(); + std::tie(Config->EKind, Config->EMachine, Config->OSABI) = + parseEmulation(S); + Config->MipsN32Abi = (S == "elf32btsmipn32" || S == "elf32ltsmipn32"); + Config->Emulation = S; + } - Config->OFormatBinary = isOutputFormatBinary(Args); - Config->SectionStartMap = getSectionStartMap(Args); - Config->SortSection = getSortKind(Args); - Config->Target2 = getTarget2Option(Args); - Config->UnresolvedSymbols = getUnresolvedSymbolOption(Args); + if (Args.hasArg(OPT_print_map)) + Config->MapFile = "-"; // --omagic is an option to create old-fashioned executables in which // .text segments are writable. Today, the option is still in use to // create special-purpose programs such as boot loaders. It doesn't // make sense to create PT_GNU_RELRO for such executables. - if (Config->OMagic) + if (Config->Omagic) Config->ZRelro = false; - if (!Config->Relocatable) - Config->Strip = getStripOption(Args); - - // Config->Pic is true if we are generating position-independent code. - Config->Pic = Config->Pie || Config->Shared; - - if (auto *Arg = Args.getLastArg(OPT_hash_style)) { - StringRef S = Arg->getValue(); - if (S == "gnu") { - Config->GnuHash = true; - Config->SysvHash = false; - } else if (S == "both") { - Config->GnuHash = true; - } else if (S != "sysv") - error("unknown hash style: " + S); - } + std::tie(Config->SysvHash, Config->GnuHash) = getHashStyle(Args); - // Parse --build-id or --build-id=<style>. + // Parse --build-id or --build-id=<style>. We handle "tree" as a + // synonym for "sha1" because all of our hash functions including + // -build-id=sha1 are tree hashes for performance reasons. if (Args.hasArg(OPT_build_id)) Config->BuildId = BuildIdKind::Fast; if (auto *Arg = Args.getLastArg(OPT_build_id_eq)) { @@ -589,15 +692,10 @@ void LinkerDriver::readConfigs(opt::InputArgList &Args) { } } - for (auto *Arg : Args.filtered(OPT_auxiliary)) - Config->AuxiliaryList.push_back(Arg->getValue()); if (!Config->Shared && !Config->AuxiliaryList.empty()) error("-f may not be used without -shared"); - for (auto *Arg : Args.filtered(OPT_undefined)) - Config->Undefined.push_back(Arg->getValue()); - - if (auto *Arg = Args.getLastArg(OPT_dynamic_list)) + for (auto *Arg : Args.filtered(OPT_dynamic_list)) if (Optional<MemoryBufferRef> Buffer = readFile(Arg->getValue())) readDynamicList(*Buffer); @@ -605,13 +703,14 @@ void LinkerDriver::readConfigs(opt::InputArgList &Args) { if (Optional<MemoryBufferRef> Buffer = readFile(Arg->getValue())) Config->SymbolOrderingFile = getLines(*Buffer); - // If --retain-symbol-file is used, we'll retail only the symbols listed in + // If --retain-symbol-file is used, we'll keep only the symbols listed in // the file and discard all others. if (auto *Arg = Args.getLastArg(OPT_retain_symbols_file)) { - Config->Discard = DiscardPolicy::RetainFile; + Config->DefaultSymbolVersion = VER_NDX_LOCAL; if (Optional<MemoryBufferRef> Buffer = readFile(Arg->getValue())) for (StringRef S : getLines(*Buffer)) - Config->RetainSymbolsFile.insert(S); + Config->VersionScriptGlobals.push_back( + {S, /*IsExternCpp*/ false, /*HasWildcard*/ false}); } for (auto *Arg : Args.filtered(OPT_export_dynamic_symbol)) @@ -627,11 +726,37 @@ void LinkerDriver::readConfigs(opt::InputArgList &Args) { Config->DefaultSymbolVersion = VER_NDX_LOCAL; } + if (getArg(Args, OPT_export_dynamic, OPT_no_export_dynamic, false)) + Config->DefaultSymbolVersion = VER_NDX_GLOBAL; + if (auto *Arg = Args.getLastArg(OPT_version_script)) if (Optional<MemoryBufferRef> Buffer = readFile(Arg->getValue())) readVersionScript(*Buffer); } +// Some Config members do not directly correspond to any particular +// command line options, but computed based on other Config values. +// This function initialize such members. See Config.h for the details +// of these values. +static void setConfigs() { + ELFKind Kind = Config->EKind; + uint16_t Machine = Config->EMachine; + + // There is an ILP32 ABI for x86-64, although it's not very popular. + // It is called the x32 ABI. + bool IsX32 = (Kind == ELF32LEKind && Machine == EM_X86_64); + + Config->CopyRelocs = (Config->Relocatable || Config->EmitRelocs); + Config->Is64 = (Kind == ELF64LEKind || Kind == ELF64BEKind); + Config->IsLE = (Kind == ELF32LEKind || Kind == ELF64LEKind); + Config->Endianness = + Config->IsLE ? support::endianness::little : support::endianness::big; + Config->IsMips64EL = (Kind == ELF64LEKind && Machine == EM_MIPS); + Config->IsRela = Config->Is64 || IsX32 || Config->MipsN32Abi; + Config->Pic = Config->Pie || Config->Shared; + Config->Wordsize = Config->Is64 ? 8 : 4; +} + // Returns a value of "-format" option. static bool getBinaryOption(StringRef S) { if (S == "binary") @@ -650,7 +775,7 @@ void LinkerDriver::createFiles(opt::InputArgList &Args) { addLibrary(Arg->getValue()); break; case OPT_INPUT: - addFile(Arg->getValue()); + addFile(Arg->getValue(), /*WithLOption=*/false); break; case OPT_alias_script_T: case OPT_script: @@ -744,12 +869,7 @@ template <class ELFT> void LinkerDriver::link(opt::InputArgList &Args) { SymbolTable<ELFT> Symtab; elf::Symtab<ELFT>::X = &Symtab; Target = createTarget(); - ScriptBase = Script<ELFT>::X = make<LinkerScript<ELFT>>(); - Config->Rela = - ELFT::Is64Bits || Config->EMachine == EM_X86_64 || Config->MipsN32Abi; - Config->Mips64EL = - (Config->EMachine == EM_MIPS && Config->EKind == ELF64LEKind); Config->MaxPageSize = getMaxPageSize(Args); Config->ImageBase = getImageBase(Args); @@ -757,6 +877,14 @@ template <class ELFT> void LinkerDriver::link(opt::InputArgList &Args) { if (Config->OutputFile.empty()) Config->OutputFile = "a.out"; + // Fail early if the output file or map file is not writable. If a user has a + // long link, e.g. due to a large LTO link, they do not wish to run it and + // find that it failed because there was a mistake in their command-line. + if (!isFileWritable(Config->OutputFile, "output file")) + return; + if (!isFileWritable(Config->MapFile, "map file")) + return; + // Use default entry point name if no name was given via the command // line nor linker scripts. For some reason, MIPS entry point name is // different from others. @@ -792,6 +920,11 @@ template <class ELFT> void LinkerDriver::link(opt::InputArgList &Args) { if (ErrorCount) return; + // Some symbols (such as __ehdr_start) are defined lazily only when there + // are undefined symbols for them, so we add these to trigger that logic. + for (StringRef Sym : Script->Opt.ReferencedSymbols) + Symtab.addUndefined(Sym); + for (auto *Arg : Args.filtered(OPT_wrap)) Symtab.wrap(Arg->getValue()); @@ -799,12 +932,12 @@ template <class ELFT> void LinkerDriver::link(opt::InputArgList &Args) { // Beyond this point, no new files are added. // Aggregate all input sections into one place. for (elf::ObjectFile<ELFT> *F : Symtab.getObjectFiles()) - for (InputSectionBase<ELFT> *S : F->getSections()) - if (S && S != &InputSection<ELFT>::Discarded) - Symtab.Sections.push_back(S); + for (InputSectionBase *S : F->getSections()) + if (S && S != &InputSection::Discarded) + InputSections.push_back(S); for (BinaryFile *F : Symtab.getBinaryFiles()) - for (InputSectionData *S : F->getSections()) - Symtab.Sections.push_back(cast<InputSection<ELFT>>(S)); + for (InputSectionBase *S : F->getSections()) + InputSections.push_back(cast<InputSection>(S)); // Do size optimizations: garbage collection and identical code folding. if (Config->GcSections) @@ -814,15 +947,15 @@ template <class ELFT> void LinkerDriver::link(opt::InputArgList &Args) { // MergeInputSection::splitIntoPieces needs to be called before // any call of MergeInputSection::getOffset. Do that. - forEach(Symtab.Sections.begin(), Symtab.Sections.end(), - [](InputSectionBase<ELFT> *S) { - if (!S->Live) - return; - if (Decompressor::isCompressedELFSection(S->Flags, S->Name)) - S->uncompress(); - if (auto *MS = dyn_cast<MergeInputSection<ELFT>>(S)) - MS->splitIntoPieces(); - }); + parallelForEach(InputSections.begin(), InputSections.end(), + [](InputSectionBase *S) { + if (!S->Live) + return; + if (Decompressor::isCompressedELFSection(S->Flags, S->Name)) + S->uncompress(); + if (auto *MS = dyn_cast<MergeInputSection>(S)) + MS->splitIntoPieces(); + }); // Write the result to the file. writeResult<ELFT>(); diff --git a/ELF/Driver.h b/ELF/Driver.h index 8bb2093e86ca..6a75a8942ca0 100644 --- a/ELF/Driver.h +++ b/ELF/Driver.h @@ -27,7 +27,7 @@ extern class LinkerDriver *Driver; class LinkerDriver { public: void main(ArrayRef<const char *> Args, bool CanExitEarly); - void addFile(StringRef Path); + void addFile(StringRef Path, bool WithLOption); void addLibrary(StringRef Name); private: diff --git a/ELF/DriverUtils.cpp b/ELF/DriverUtils.cpp index 3a20cd76efe2..f4eadeee9e43 100644 --- a/ELF/DriverUtils.cpp +++ b/ELF/DriverUtils.cpp @@ -16,7 +16,6 @@ #include "Driver.h" #include "Error.h" #include "Memory.h" -#include "ScriptParser.h" #include "lld/Config/Version.h" #include "lld/Core/Reproduce.h" #include "llvm/ADT/Optional.h" @@ -54,12 +53,10 @@ ELFOptTable::ELFOptTable() : OptTable(OptInfo) {} // Parse -color-diagnostics={auto,always,never} or -no-color-diagnostics. static bool getColorDiagnostics(opt::InputArgList &Args) { - bool Default = (ErrorOS == &errs() && Process::StandardErrHasColors()); - auto *Arg = Args.getLastArg(OPT_color_diagnostics, OPT_color_diagnostics_eq, OPT_no_color_diagnostics); if (!Arg) - return Default; + return ErrorOS->has_colors(); if (Arg->getOption().getID() == OPT_color_diagnostics) return true; if (Arg->getOption().getID() == OPT_no_color_diagnostics) @@ -67,7 +64,7 @@ static bool getColorDiagnostics(opt::InputArgList &Args) { StringRef S = Arg->getValue(); if (S == "auto") - return Default; + return ErrorOS->has_colors(); if (S == "always") return true; if (S != "never") @@ -120,6 +117,20 @@ opt::InputArgList ELFOptTable::parse(ArrayRef<const char *> Argv) { void elf::printHelp(const char *Argv0) { ELFOptTable Table; Table.PrintHelp(outs(), Argv0, "lld", false); + outs() << "\n"; + + // Scripts generated by Libtool versions up to at least 2.4.6 (the most + // recent version as of March 2017) expect /: supported targets:.* elf/ + // in a message for the -help option. If it doesn't match, the scripts + // assume that the linker doesn't support very basic features such as + // shared libraries. Therefore, we need to print out at least "elf". + // Here, we print out all the targets that we support. + outs() << Argv0 << ": supported targets: " + << "elf32-i386 elf32-iamcu elf32-littlearm elf32-ntradbigmips " + << "elf32-ntradlittlemips elf32-powerpc elf32-tradbigmips " + << "elf32-tradlittlemips elf32-x86-64 " + << "elf64-amdgpu elf64-littleaarch64 elf64-powerpc elf64-tradbigmips " + << "elf64-tradlittlemips elf64-x86-64\n"; } // Reconstructs command line arguments so that so that you can re-run @@ -136,6 +147,13 @@ std::string elf::createResponseFile(const opt::InputArgList &Args) { case OPT_INPUT: OS << quote(rewritePath(Arg->getValue())) << "\n"; break; + case OPT_o: + // If -o path contains directories, "lld @response.txt" will likely + // fail because the archive we are creating doesn't contain empty + // directories for the output path (-o doesn't create directories). + // Strip directories to prevent the issue. + OS << "-o " << quote(sys::path::filename(Arg->getValue())) << "\n"; + break; case OPT_L: case OPT_dynamic_list: case OPT_rpath: diff --git a/ELF/EhFrame.cpp b/ELF/EhFrame.cpp index 2428473d9012..90be30a5f0f9 100644 --- a/ELF/EhFrame.cpp +++ b/ELF/EhFrame.cpp @@ -38,13 +38,14 @@ using namespace lld::elf; namespace { template <class ELFT> class EhReader { public: - EhReader(InputSectionBase<ELFT> *S, ArrayRef<uint8_t> D) : IS(S), D(D) {} + EhReader(InputSectionBase *S, ArrayRef<uint8_t> D) : IS(S), D(D) {} size_t readEhRecordSize(); uint8_t getFdeEncoding(); private: template <class P> void failOn(const P *Loc, const Twine &Msg) { - fatal(IS->getLocation((const uint8_t *)Loc - IS->Data.data()) + ": " + Msg); + fatal("corrupted .eh_frame: " + Msg + "\n>>> defined in " + + IS->getObjMsg<ELFT>((const uint8_t *)Loc - IS->Data.data())); } uint8_t readByte(); @@ -53,15 +54,16 @@ private: void skipLeb128(); void skipAugP(); - InputSectionBase<ELFT> *IS; + InputSectionBase *IS; ArrayRef<uint8_t> D; }; } template <class ELFT> -size_t elf::readEhRecordSize(InputSectionBase<ELFT> *S, size_t Off) { +size_t elf::readEhRecordSize(InputSectionBase *S, size_t Off) { return EhReader<ELFT>(S, S->Data.slice(Off)).readEhRecordSize(); } + // .eh_frame section is a sequence of records. Each record starts with // a 4 byte length field. This function reads the length. template <class ELFT> size_t EhReader<ELFT>::readEhRecordSize() { @@ -121,11 +123,11 @@ template <class ELFT> void EhReader<ELFT>::skipLeb128() { failOn(ErrPos, "corrupted CIE (failed to read LEB128)"); } -template <class ELFT> static size_t getAugPSize(unsigned Enc) { +static size_t getAugPSize(unsigned Enc) { switch (Enc & 0x0f) { case DW_EH_PE_absptr: case DW_EH_PE_signed: - return ELFT::Is64Bits ? 8 : 4; + return Config->Wordsize; case DW_EH_PE_udata2: case DW_EH_PE_sdata2: return 2; @@ -143,7 +145,7 @@ template <class ELFT> void EhReader<ELFT>::skipAugP() { uint8_t Enc = readByte(); if ((Enc & 0xf0) == DW_EH_PE_aligned) failOn(D.data() - 1, "DW_EH_PE_aligned encoding is not supported"); - size_t Size = getAugPSize<ELFT>(Enc); + size_t Size = getAugPSize(Enc); if (Size == 0) failOn(D.data() - 1, "unknown FDE encoding"); if (Size >= D.size()) @@ -152,7 +154,7 @@ template <class ELFT> void EhReader<ELFT>::skipAugP() { } template <class ELFT> uint8_t elf::getFdeEncoding(EhSectionPiece *P) { - auto *IS = static_cast<InputSectionBase<ELFT> *>(P->ID); + auto *IS = static_cast<InputSectionBase *>(P->ID); return EhReader<ELFT>(IS, P->data()).getFdeEncoding(); } @@ -199,14 +201,10 @@ template <class ELFT> uint8_t EhReader<ELFT>::getFdeEncoding() { return DW_EH_PE_absptr; } -template size_t elf::readEhRecordSize<ELF32LE>(InputSectionBase<ELF32LE> *S, - size_t Off); -template size_t elf::readEhRecordSize<ELF32BE>(InputSectionBase<ELF32BE> *S, - size_t Off); -template size_t elf::readEhRecordSize<ELF64LE>(InputSectionBase<ELF64LE> *S, - size_t Off); -template size_t elf::readEhRecordSize<ELF64BE>(InputSectionBase<ELF64BE> *S, - size_t Off); +template size_t elf::readEhRecordSize<ELF32LE>(InputSectionBase *S, size_t Off); +template size_t elf::readEhRecordSize<ELF32BE>(InputSectionBase *S, size_t Off); +template size_t elf::readEhRecordSize<ELF64LE>(InputSectionBase *S, size_t Off); +template size_t elf::readEhRecordSize<ELF64BE>(InputSectionBase *S, size_t Off); template uint8_t elf::getFdeEncoding<ELF32LE>(EhSectionPiece *P); template uint8_t elf::getFdeEncoding<ELF32BE>(EhSectionPiece *P); diff --git a/ELF/EhFrame.h b/ELF/EhFrame.h index cadc93d3a2e4..4e2b6f83a294 100644 --- a/ELF/EhFrame.h +++ b/ELF/EhFrame.h @@ -14,11 +14,10 @@ namespace lld { namespace elf { -template <class ELFT> class InputSectionBase; +class InputSectionBase; struct EhSectionPiece; -template <class ELFT> -size_t readEhRecordSize(InputSectionBase<ELFT> *S, size_t Off); +template <class ELFT> size_t readEhRecordSize(InputSectionBase *S, size_t Off); template <class ELFT> uint8_t getFdeEncoding(EhSectionPiece *P); } } diff --git a/ELF/Error.cpp b/ELF/Error.cpp index d9b41f9c599e..2c61b58dfed5 100644 --- a/ELF/Error.cpp +++ b/ELF/Error.cpp @@ -20,10 +20,10 @@ #include <unistd.h> #endif -using namespace lld::elf; using namespace llvm; -namespace lld { +using namespace lld; +using namespace lld::elf; uint64_t elf::ErrorCount; raw_ostream *elf::ErrorOS; @@ -33,6 +33,18 @@ StringRef elf::Argv0; // but outs() or errs() are not thread-safe. We protect them using a mutex. static std::mutex Mu; +// Prints "\n" or does nothing, depending on Msg contents of +// the previous call of this function. +static void newline(const Twine &Msg) { + // True if the previous error message contained "\n". + // We want to separate multi-line error messages with a newline. + static bool Flag; + + if (Flag) + *ErrorOS << "\n"; + Flag = (StringRef(Msg.str()).find('\n') != StringRef::npos); +} + static void print(StringRef S, raw_ostream::Colors C) { *ErrorOS << Argv0 + ": "; if (Config->ColorDiagnostics) { @@ -45,9 +57,16 @@ static void print(StringRef S, raw_ostream::Colors C) { } void elf::log(const Twine &Msg) { - std::lock_guard<std::mutex> Lock(Mu); - if (Config->Verbose) + if (Config->Verbose) { + std::lock_guard<std::mutex> Lock(Mu); outs() << Argv0 << ": " << Msg << "\n"; + } +} + +void elf::message(const Twine &Msg) { + std::lock_guard<std::mutex> Lock(Mu); + outs() << Msg << "\n"; + outs().flush(); } void elf::warn(const Twine &Msg) { @@ -55,13 +74,16 @@ void elf::warn(const Twine &Msg) { error(Msg); return; } + std::lock_guard<std::mutex> Lock(Mu); + newline(Msg); print("warning: ", raw_ostream::MAGENTA); *ErrorOS << Msg << "\n"; } void elf::error(const Twine &Msg) { std::lock_guard<std::mutex> Lock(Mu); + newline(Msg); if (Config->ErrorLimit == 0 || ErrorCount < Config->ErrorLimit) { print("error: ", raw_ostream::RED); @@ -77,10 +99,6 @@ void elf::error(const Twine &Msg) { ++ErrorCount; } -void elf::error(std::error_code EC, const Twine &Prefix) { - error(Prefix + ": " + EC.message()); -} - void elf::exitLld(int Val) { // Dealloc/destroy ManagedStatic variables before calling // _exit(). In a non-LTO build, this is a nop. In an LTO @@ -93,18 +111,6 @@ void elf::exitLld(int Val) { } void elf::fatal(const Twine &Msg) { - std::lock_guard<std::mutex> Lock(Mu); - print("error: ", raw_ostream::RED); - *ErrorOS << Msg << "\n"; + error(Msg); exitLld(1); } - -void elf::fatal(std::error_code EC, const Twine &Prefix) { - fatal(Prefix + ": " + EC.message()); -} - -void elf::fatal(Error &E, const Twine &Prefix) { - fatal(Prefix + ": " + llvm::toString(std::move(E))); -} - -} // namespace lld diff --git a/ELF/Error.h b/ELF/Error.h index f18cf456da6d..dd6e37c99b15 100644 --- a/ELF/Error.h +++ b/ELF/Error.h @@ -15,10 +15,14 @@ // Error prints out an error message and increment a global variable // ErrorCount to record the fact that we met an error condition. It does // not exit, so it is safe for a lld-as-a-library use case. It is generally -// useful because it can report more than one errors in a single run. +// useful because it can report more than one error in a single run. // // Warn doesn't do anything but printing out a given message. // +// It is not recommended to use llvm::outs() or llvm::errs() directly +// in LLD because they are not thread-safe. The functions declared in +// this file are mutually excluded, so you want to use them instead. +// //===----------------------------------------------------------------------===// #ifndef LLD_ELF_ERROR_H @@ -36,15 +40,12 @@ extern llvm::raw_ostream *ErrorOS; extern llvm::StringRef Argv0; void log(const Twine &Msg); +void message(const Twine &Msg); void warn(const Twine &Msg); - void error(const Twine &Msg); -void error(std::error_code EC, const Twine &Prefix); +LLVM_ATTRIBUTE_NORETURN void fatal(const Twine &Msg); LLVM_ATTRIBUTE_NORETURN void exitLld(int Val); -LLVM_ATTRIBUTE_NORETURN void fatal(const Twine &Msg); -LLVM_ATTRIBUTE_NORETURN void fatal(std::error_code EC, const Twine &Prefix); -LLVM_ATTRIBUTE_NORETURN void fatal(Error &E, const Twine &Prefix); // check() functions are convenient functions to strip errors // from error-or-value objects. @@ -68,7 +69,7 @@ template <class T> T check(ErrorOr<T> E, const Twine &Prefix) { template <class T> T check(Expected<T> E, const Twine &Prefix) { if (!E) - fatal(Prefix + ": " + errorToErrorCode(E.takeError()).message()); + fatal(Prefix + ": " + toString(E.takeError())); return std::move(*E); } diff --git a/ELF/Filesystem.cpp b/ELF/Filesystem.cpp new file mode 100644 index 000000000000..75f7bda75a23 --- /dev/null +++ b/ELF/Filesystem.cpp @@ -0,0 +1,79 @@ +//===- Filesystem.cpp -----------------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains a few utility functions to handle files. +// +//===----------------------------------------------------------------------===// + +#include "Filesystem.h" +#include "Config.h" +#include "Error.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/FileOutputBuffer.h" +#include <thread> + +using namespace llvm; + +using namespace lld; +using namespace lld::elf; + +// Removes a given file asynchronously. This is a performance hack, +// so remove this when operating systems are improved. +// +// On Linux (and probably on other Unix-like systems), unlink(2) is a +// noticeably slow system call. As of 2016, unlink takes 250 +// milliseconds to remove a 1 GB file on ext4 filesystem on my machine. +// +// To create a new result file, we first remove existing file. So, if +// you repeatedly link a 1 GB program in a regular compile-link-debug +// cycle, every cycle wastes 250 milliseconds only to remove a file. +// Since LLD can link a 1 GB binary in about 5 seconds, that waste +// actually counts. +// +// This function spawns a background thread to call unlink. +// The calling thread returns almost immediately. +void elf::unlinkAsync(StringRef Path) { + if (!Config->Threads || !sys::fs::exists(Config->OutputFile)) + return; + + // First, rename Path to avoid race condition. We cannot remove + // Path from a different thread because we are now going to create + // Path as a new file. If we do that in a different thread, the new + // thread can remove the new file. + SmallString<128> TempPath; + if (sys::fs::createUniqueFile(Path + "tmp%%%%%%%%", TempPath)) + return; + if (sys::fs::rename(Path, TempPath)) { + sys::fs::remove(TempPath); + return; + } + + // Remove TempPath in background. + std::thread([=] { ::remove(TempPath.str().str().c_str()); }).detach(); +} + +// Returns true if a given file seems to be writable. +// +// Determining whether a file is writable or not is amazingly hard, +// and after all the only reliable way of doing that is to actually +// create a file. But we don't want to do that in this function +// because LLD shouldn't update any file if it will end in a failure. +// We also don't want to reimplement heuristics. So we'll let +// FileOutputBuffer do the work. +// +// FileOutputBuffer doesn't touch a desitnation file until commit() +// is called. We use that class without calling commit() to predict +// if the given file is writable. +bool elf::isFileWritable(StringRef Path, StringRef Desc) { + if (auto EC = FileOutputBuffer::create(Path, 1).getError()) { + error("cannot open " + Desc + " " + Path + ": " + EC.message()); + return false; + } + return true; +} diff --git a/ELF/Filesystem.h b/ELF/Filesystem.h new file mode 100644 index 000000000000..a33dc3651a4a --- /dev/null +++ b/ELF/Filesystem.h @@ -0,0 +1,22 @@ +//===- Filesystem.h ---------------------------------------------*- C++ -*-===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_ELF_FILESYSTEM_H +#define LLD_ELF_FILESYSTEM_H + +#include "lld/Core/LLVM.h" + +namespace lld { +namespace elf { +void unlinkAsync(StringRef Path); +bool isFileWritable(StringRef Path, StringRef FileDescription); +} +} + +#endif diff --git a/ELF/GdbIndex.cpp b/ELF/GdbIndex.cpp index 762144dd0a96..99e02d0025b0 100644 --- a/ELF/GdbIndex.cpp +++ b/ELF/GdbIndex.cpp @@ -7,199 +7,43 @@ // //===----------------------------------------------------------------------===// // -// File contains classes for implementation of --gdb-index command line option. +// The -gdb-index option instructs the linker to emit a .gdb_index section. +// The section contains information to make gdb startup faster. +// The format of the section is described at +// https://sourceware.org/gdb/onlinedocs/gdb/Index-Section-Format.html. // -// If that option is used, linker should emit a .gdb_index section that allows -// debugger to locate and read .dwo files, containing neccessary debug -// information. -// More information about implementation can be found in DWARF specification, -// latest version is available at http://dwarfstd.org. -// -// .gdb_index section format: -// (Information is based on/taken from -// https://sourceware.org/gdb/onlinedocs/gdb/Index-Section-Format.html (*)) -// -// A mapped index consists of several areas, laid out in order: -// 1) The file header. -// 2) "The CU (compilation unit) list. This is a sequence of pairs of 64-bit -// little-endian values, sorted by the CU offset. The first element in each -// pair is the offset of a CU in the .debug_info section. The second element -// in each pair is the length of that CU. References to a CU elsewhere in the -// map are done using a CU index, which is just the 0-based index into this -// table. Note that if there are type CUs, then conceptually CUs and type CUs -// form a single list for the purposes of CU indices."(*) -// 3) The types CU list. Depricated as .debug_types does not appear in the DWARF -// v5 specification. -// 4) The address area. The address area is a sequence of address -// entries, where each entrie contains low address, high address and CU -// index. -// 5) "The symbol table. This is an open-addressed hash table. The size of the -// hash table is always a power of 2. Each slot in the hash table consists of -// a pair of offset_type values. The first value is the offset of the -// symbol's name in the constant pool. The second value is the offset of the -// CU vector in the constant pool."(*) -// 6) "The constant pool. This is simply a bunch of bytes. It is organized so -// that alignment is correct: CU vectors are stored first, followed by -// strings." (*) -// -// For constructing the .gdb_index section following steps should be performed: -// 1) For file header nothing special should be done. It contains the offsets to -// the areas below. -// 2) Scan the compilation unit headers of the .debug_info sections to build a -// list of compilation units. -// 3) CU Types are no longer needed as DWARF skeleton type units never made it -// into the standard. lld does nothing to support parsing of .debug_types -// and generates empty types CU area in .gdb_index section. -// 4) Address area entries are extracted from DW_TAG_compile_unit DIEs of -// .debug_info sections. -// 5) For building the symbol table linker extracts the public names from the -// .debug_gnu_pubnames and .debug_gnu_pubtypes sections. Then it builds the -// hashtable in according to .gdb_index format specification. -// 6) Constant pool is populated at the same time as symbol table. //===----------------------------------------------------------------------===// #include "GdbIndex.h" +#include "Memory.h" #include "llvm/DebugInfo/DWARF/DWARFDebugPubTable.h" #include "llvm/Object/ELFObjectFile.h" using namespace llvm; using namespace llvm::object; +using namespace lld; using namespace lld::elf; -template <class ELFT> -GdbIndexBuilder<ELFT>::GdbIndexBuilder(InputSection<ELFT> *DebugInfoSec) - : DebugInfoSec(DebugInfoSec) { - if (Expected<std::unique_ptr<object::ObjectFile>> Obj = - object::ObjectFile::createObjectFile(DebugInfoSec->getFile()->MB)) - Dwarf.reset(new DWARFContextInMemory(*Obj.get(), this)); - else - error(toString(DebugInfoSec->getFile()) + ": error creating DWARF context"); -} - -template <class ELFT> -std::vector<std::pair<typename ELFT::uint, typename ELFT::uint>> -GdbIndexBuilder<ELFT>::readCUList() { - std::vector<std::pair<uintX_t, uintX_t>> Ret; - for (std::unique_ptr<DWARFCompileUnit> &CU : Dwarf->compile_units()) - Ret.push_back( - {DebugInfoSec->OutSecOff + CU->getOffset(), CU->getLength() + 4}); - return Ret; -} - -template <class ELFT> -std::vector<std::pair<StringRef, uint8_t>> -GdbIndexBuilder<ELFT>::readPubNamesAndTypes() { - const bool IsLE = ELFT::TargetEndianness == llvm::support::little; - StringRef Data[] = {Dwarf->getGnuPubNamesSection(), - Dwarf->getGnuPubTypesSection()}; - - std::vector<std::pair<StringRef, uint8_t>> Ret; - for (StringRef D : Data) { - DWARFDebugPubTable PubTable(D, IsLE, true); - for (const DWARFDebugPubTable::Set &S : PubTable.getData()) - for (const DWARFDebugPubTable::Entry &E : S.Entries) - Ret.push_back({E.Name, E.Descriptor.toBits()}); - } - return Ret; -} - std::pair<bool, GdbSymbol *> GdbHashTab::add(uint32_t Hash, size_t Offset) { - if (Size * 4 / 3 >= Table.size()) - expand(); - - GdbSymbol **Slot = findSlot(Hash, Offset); - bool New = false; - if (*Slot == nullptr) { - ++Size; - *Slot = new (Alloc) GdbSymbol(Hash, Offset); - New = true; + GdbSymbol *&Sym = Map[Offset]; + if (Sym) + return {false, Sym}; + Sym = make<GdbSymbol>(Hash, Offset); + return {true, Sym}; +} + +void GdbHashTab::finalizeContents() { + uint32_t Size = std::max<uint32_t>(1024, NextPowerOf2(Map.size() * 4 / 3)); + uint32_t Mask = Size - 1; + Table.resize(Size); + + for (auto &P : Map) { + GdbSymbol *Sym = P.second; + uint32_t I = Sym->NameHash & Mask; + uint32_t Step = ((Sym->NameHash * 17) & Mask) | 1; + + while (Table[I]) + I = (I + Step) & Mask; + Table[I] = Sym; } - return {New, *Slot}; -} - -void GdbHashTab::expand() { - if (Table.empty()) { - Table.resize(InitialSize); - return; - } - std::vector<GdbSymbol *> NewTable(Table.size() * 2); - NewTable.swap(Table); - - for (GdbSymbol *Sym : NewTable) { - if (!Sym) - continue; - GdbSymbol **Slot = findSlot(Sym->NameHash, Sym->NameOffset); - *Slot = Sym; - } -} - -// Methods finds a slot for symbol with given hash. The step size used to find -// the next candidate slot when handling a hash collision is specified in -// .gdb_index section format. The hash value for a table entry is computed by -// applying an iterative hash function to the symbol's name. -GdbSymbol **GdbHashTab::findSlot(uint32_t Hash, size_t Offset) { - uint32_t Index = Hash & (Table.size() - 1); - uint32_t Step = ((Hash * 17) & (Table.size() - 1)) | 1; - - for (;;) { - GdbSymbol *S = Table[Index]; - if (!S || ((S->NameOffset == Offset) && (S->NameHash == Hash))) - return &Table[Index]; - Index = (Index + Step) & (Table.size() - 1); - } -} - -template <class ELFT> -static InputSectionBase<ELFT> * -findSection(ArrayRef<InputSectionBase<ELFT> *> Arr, uint64_t Offset) { - for (InputSectionBase<ELFT> *S : Arr) - if (S && S != &InputSection<ELFT>::Discarded) - if (Offset >= S->Offset && Offset < S->Offset + S->getSize()) - return S; - return nullptr; -} - -template <class ELFT> -std::vector<AddressEntry<ELFT>> -GdbIndexBuilder<ELFT>::readAddressArea(size_t CurrentCU) { - std::vector<AddressEntry<ELFT>> Ret; - for (const auto &CU : Dwarf->compile_units()) { - DWARFAddressRangesVector Ranges; - CU->collectAddressRanges(Ranges); - - ArrayRef<InputSectionBase<ELFT> *> Sections = - DebugInfoSec->getFile()->getSections(); - - for (std::pair<uint64_t, uint64_t> &R : Ranges) - if (InputSectionBase<ELFT> *S = findSection(Sections, R.first)) - Ret.push_back( - {S, R.first - S->Offset, R.second - S->Offset, CurrentCU}); - ++CurrentCU; - } - return Ret; -} - -// We return file offset as load address for allocatable sections. That is -// currently used for collecting address ranges in readAddressArea(). We are -// able then to find section index that range belongs to. -template <class ELFT> -uint64_t GdbIndexBuilder<ELFT>::getSectionLoadAddress( - const object::SectionRef &Sec) const { - if (static_cast<const ELFSectionRef &>(Sec).getFlags() & ELF::SHF_ALLOC) - return static_cast<const ELFSectionRef &>(Sec).getOffset(); - return 0; -} - -template <class ELFT> -std::unique_ptr<LoadedObjectInfo> GdbIndexBuilder<ELFT>::clone() const { - return {}; -} - -namespace lld { -namespace elf { -template class GdbIndexBuilder<ELF32LE>; -template class GdbIndexBuilder<ELF32BE>; -template class GdbIndexBuilder<ELF64LE>; -template class GdbIndexBuilder<ELF64BE>; -} } diff --git a/ELF/GdbIndex.h b/ELF/GdbIndex.h index c761ea173a8d..a36b92714def 100644 --- a/ELF/GdbIndex.h +++ b/ELF/GdbIndex.h @@ -17,48 +17,16 @@ namespace lld { namespace elf { -template <class ELFT> class InputSection; +class InputSection; // Struct represents single entry of address area of gdb index. -template <class ELFT> struct AddressEntry { - InputSectionBase<ELFT> *Section; +struct AddressEntry { + InputSectionBase *Section; uint64_t LowAddress; uint64_t HighAddress; size_t CuIndex; }; -// GdbIndexBuilder is a helper class used for extracting data required -// for building .gdb_index section from objects. -template <class ELFT> class GdbIndexBuilder : public llvm::LoadedObjectInfo { - typedef typename ELFT::uint uintX_t; - - InputSection<ELFT> *DebugInfoSec; - - std::unique_ptr<llvm::DWARFContext> Dwarf; - -public: - GdbIndexBuilder(InputSection<ELFT> *DebugInfoSec); - - // Extracts the compilation units. Each first element of pair is a offset of a - // CU in the .debug_info section and second is the length of that CU. - std::vector<std::pair<uintX_t, uintX_t>> readCUList(); - - // Extracts the vector of address area entries. Accepts global index of last - // parsed CU. - std::vector<AddressEntry<ELFT>> readAddressArea(size_t CurrentCU); - - // Method extracts public names and types. It returns list of name and - // gnu_pub* kind pairs. - std::vector<std::pair<StringRef, uint8_t>> readPubNamesAndTypes(); - -private: - // Method returns section file offset as a load addres for DWARF parser. That - // allows to find the target section index for address ranges. - uint64_t - getSectionLoadAddress(const llvm::object::SectionRef &Sec) const override; - std::unique_ptr<llvm::LoadedObjectInfo> clone() const override; -}; - // Element of GdbHashTab hash table. struct GdbSymbol { GdbSymbol(uint32_t Hash, size_t Offset) @@ -75,22 +43,13 @@ class GdbHashTab final { public: std::pair<bool, GdbSymbol *> add(uint32_t Hash, size_t Offset); + void finalizeContents(); size_t getCapacity() { return Table.size(); } GdbSymbol *getSymbol(size_t I) { return Table[I]; } private: - void expand(); - - GdbSymbol **findSlot(uint32_t Hash, size_t Offset); - - llvm::BumpPtrAllocator Alloc; + llvm::DenseMap<size_t, GdbSymbol *> Map; std::vector<GdbSymbol *> Table; - - // Size keeps the amount of filled entries in Table. - size_t Size = 0; - - // Initial size must be a power of 2. - static const int32_t InitialSize = 1024; }; } // namespace elf diff --git a/ELF/ICF.cpp b/ELF/ICF.cpp index 32cd0f8a185c..dcf01ea80011 100644 --- a/ELF/ICF.cpp +++ b/ELF/ICF.cpp @@ -77,7 +77,6 @@ #include "Config.h" #include "SymbolTable.h" #include "Threads.h" - #include "llvm/ADT/Hashing.h" #include "llvm/Object/ELF.h" #include "llvm/Support/ELF.h" @@ -102,11 +101,11 @@ private: bool constantEq(ArrayRef<RelTy> RelsA, ArrayRef<RelTy> RelsB); template <class RelTy> - bool variableEq(const InputSection<ELFT> *A, ArrayRef<RelTy> RelsA, - const InputSection<ELFT> *B, ArrayRef<RelTy> RelsB); + bool variableEq(const InputSection *A, ArrayRef<RelTy> RelsA, + const InputSection *B, ArrayRef<RelTy> RelsB); - bool equalsConstant(const InputSection<ELFT> *A, const InputSection<ELFT> *B); - bool equalsVariable(const InputSection<ELFT> *A, const InputSection<ELFT> *B); + bool equalsConstant(const InputSection *A, const InputSection *B); + bool equalsVariable(const InputSection *A, const InputSection *B); size_t findBoundary(size_t Begin, size_t End); @@ -115,7 +114,7 @@ private: void forEachClass(std::function<void(size_t, size_t)> Fn); - std::vector<InputSection<ELFT> *> Sections; + std::vector<InputSection *> Sections; // We repeat the main loop while `Repeat` is true. std::atomic<bool> Repeat; @@ -154,17 +153,17 @@ private: // Returns a hash value for S. Note that the information about // relocation targets is not included in the hash value. -template <class ELFT> static uint32_t getHash(InputSection<ELFT> *S) { +template <class ELFT> static uint32_t getHash(InputSection *S) { return hash_combine(S->Flags, S->getSize(), S->NumRelocations); } // Returns true if section S is subject of ICF. -template <class ELFT> static bool isEligible(InputSection<ELFT> *S) { +static bool isEligible(InputSection *S) { // .init and .fini contains instructions that must be executed to // initialize and finalize the process. They cannot and should not // be merged. - return S->Live && (S->Flags & SHF_ALLOC) && !(S->Flags & SHF_WRITE) && - S->Name != ".init" && S->Name != ".fini"; + return S->Live && (S->Flags & SHF_ALLOC) && (S->Flags & SHF_EXECINSTR) && + !(S->Flags & SHF_WRITE) && S->Name != ".init" && S->Name != ".fini"; } // Split an equivalence class into smaller classes. @@ -181,17 +180,17 @@ void ICF<ELFT>::segregate(size_t Begin, size_t End, bool Constant) { while (Begin < End) { // Divide [Begin, End) into two. Let Mid be the start index of the // second group. - auto Bound = std::stable_partition( - Sections.begin() + Begin + 1, Sections.begin() + End, - [&](InputSection<ELFT> *S) { - if (Constant) - return equalsConstant(Sections[Begin], S); - return equalsVariable(Sections[Begin], S); - }); + auto Bound = + std::stable_partition(Sections.begin() + Begin + 1, + Sections.begin() + End, [&](InputSection *S) { + if (Constant) + return equalsConstant(Sections[Begin], S); + return equalsVariable(Sections[Begin], S); + }); size_t Mid = Bound - Sections.begin(); // Now we split [Begin, End) into [Begin, Mid) and [Mid, End) by - // updating the sections in [Begin, End). We use Mid as an equivalence + // updating the sections in [Begin, Mid). We use Mid as an equivalence // class ID because every group ends with a unique index. for (size_t I = Begin; I < Mid; ++I) Sections[I]->Class[Next] = Mid; @@ -210,7 +209,7 @@ template <class RelTy> bool ICF<ELFT>::constantEq(ArrayRef<RelTy> RelsA, ArrayRef<RelTy> RelsB) { auto Eq = [](const RelTy &A, const RelTy &B) { return A.r_offset == B.r_offset && - A.getType(Config->Mips64EL) == B.getType(Config->Mips64EL) && + A.getType(Config->IsMips64EL) == B.getType(Config->IsMips64EL) && getAddend<ELFT>(A) == getAddend<ELFT>(B); }; @@ -221,40 +220,43 @@ bool ICF<ELFT>::constantEq(ArrayRef<RelTy> RelsA, ArrayRef<RelTy> RelsB) { // Compare "non-moving" part of two InputSections, namely everything // except relocation targets. template <class ELFT> -bool ICF<ELFT>::equalsConstant(const InputSection<ELFT> *A, - const InputSection<ELFT> *B) { +bool ICF<ELFT>::equalsConstant(const InputSection *A, const InputSection *B) { if (A->NumRelocations != B->NumRelocations || A->Flags != B->Flags || A->getSize() != B->getSize() || A->Data != B->Data) return false; if (A->AreRelocsRela) - return constantEq(A->relas(), B->relas()); - return constantEq(A->rels(), B->rels()); + return constantEq(A->template relas<ELFT>(), B->template relas<ELFT>()); + return constantEq(A->template rels<ELFT>(), B->template rels<ELFT>()); } // Compare two lists of relocations. Returns true if all pairs of // relocations point to the same section in terms of ICF. template <class ELFT> template <class RelTy> -bool ICF<ELFT>::variableEq(const InputSection<ELFT> *A, ArrayRef<RelTy> RelsA, - const InputSection<ELFT> *B, ArrayRef<RelTy> RelsB) { +bool ICF<ELFT>::variableEq(const InputSection *A, ArrayRef<RelTy> RelsA, + const InputSection *B, ArrayRef<RelTy> RelsB) { auto Eq = [&](const RelTy &RA, const RelTy &RB) { // The two sections must be identical. - SymbolBody &SA = A->getFile()->getRelocTargetSym(RA); - SymbolBody &SB = B->getFile()->getRelocTargetSym(RB); + SymbolBody &SA = A->template getFile<ELFT>()->getRelocTargetSym(RA); + SymbolBody &SB = B->template getFile<ELFT>()->getRelocTargetSym(RB); if (&SA == &SB) return true; - // Or, the two sections must be in the same equivalence class. - auto *DA = dyn_cast<DefinedRegular<ELFT>>(&SA); - auto *DB = dyn_cast<DefinedRegular<ELFT>>(&SB); + auto *DA = dyn_cast<DefinedRegular>(&SA); + auto *DB = dyn_cast<DefinedRegular>(&SB); if (!DA || !DB) return false; if (DA->Value != DB->Value) return false; - auto *X = dyn_cast<InputSection<ELFT>>(DA->Section); - auto *Y = dyn_cast<InputSection<ELFT>>(DB->Section); + // Either both symbols must be absolute... + if (!DA->Section || !DB->Section) + return !DA->Section && !DB->Section; + + // Or the two sections must be in the same equivalence class. + auto *X = dyn_cast<InputSection>(DA->Section); + auto *Y = dyn_cast<InputSection>(DB->Section); if (!X || !Y) return false; @@ -271,11 +273,11 @@ bool ICF<ELFT>::variableEq(const InputSection<ELFT> *A, ArrayRef<RelTy> RelsA, // Compare "moving" part of two InputSections, namely relocation targets. template <class ELFT> -bool ICF<ELFT>::equalsVariable(const InputSection<ELFT> *A, - const InputSection<ELFT> *B) { +bool ICF<ELFT>::equalsVariable(const InputSection *A, const InputSection *B) { if (A->AreRelocsRela) - return variableEq(A, A->relas(), B, B->relas()); - return variableEq(A, A->rels(), B, B->rels()); + return variableEq(A, A->template relas<ELFT>(), B, + B->template relas<ELFT>()); + return variableEq(A, A->template rels<ELFT>(), B, B->template rels<ELFT>()); } template <class ELFT> size_t ICF<ELFT>::findBoundary(size_t Begin, size_t End) { @@ -291,7 +293,7 @@ template <class ELFT> size_t ICF<ELFT>::findBoundary(size_t Begin, size_t End) { // groups of sections, grouped by the class. // // This function calls Fn on every group that starts within [Begin, End). -// Note that a group must starts in that range but doesn't necessarily +// Note that a group must start in that range but doesn't necessarily // have to end before End. template <class ELFT> void ICF<ELFT>::forEachClassRange(size_t Begin, size_t End, @@ -323,8 +325,9 @@ void ICF<ELFT>::forEachClass(std::function<void(size_t, size_t)> Fn) { // Split sections into 256 shards and call Fn in parallel. size_t NumShards = 256; size_t Step = Sections.size() / NumShards; - forLoop(0, NumShards, - [&](size_t I) { forEachClassRange(I * Step, (I + 1) * Step, Fn); }); + parallelFor(0, NumShards, [&](size_t I) { + forEachClassRange(I * Step, (I + 1) * Step, Fn); + }); forEachClassRange(Step * NumShards, Sections.size(), Fn); ++Cnt; } @@ -332,20 +335,20 @@ void ICF<ELFT>::forEachClass(std::function<void(size_t, size_t)> Fn) { // The main function of ICF. template <class ELFT> void ICF<ELFT>::run() { // Collect sections to merge. - for (InputSectionBase<ELFT> *Sec : Symtab<ELFT>::X->Sections) - if (auto *S = dyn_cast<InputSection<ELFT>>(Sec)) + for (InputSectionBase *Sec : InputSections) + if (auto *S = dyn_cast<InputSection>(Sec)) if (isEligible(S)) Sections.push_back(S); // Initially, we use hash values to partition sections. - for (InputSection<ELFT> *S : Sections) + for (InputSection *S : Sections) // Set MSB to 1 to avoid collisions with non-hash IDs. - S->Class[0] = getHash(S) | (1 << 31); + S->Class[0] = getHash<ELFT>(S) | (1 << 31); // From now on, sections in Sections vector are ordered so that sections // in the same equivalence class are consecutive in the vector. std::stable_sort(Sections.begin(), Sections.end(), - [](InputSection<ELFT> *A, InputSection<ELFT> *B) { + [](InputSection *A, InputSection *B) { return A->Class[0] < B->Class[0]; }); @@ -372,6 +375,15 @@ template <class ELFT> void ICF<ELFT>::run() { Sections[Begin]->replace(Sections[I]); } }); + + // Mark ARM Exception Index table sections that refer to folded code + // sections as not live. These sections have an implict dependency + // via the link order dependency. + if (Config->EMachine == EM_ARM) + for (InputSectionBase *Sec : InputSections) + if (auto *S = dyn_cast<InputSection>(Sec)) + if (S->Flags & SHF_LINK_ORDER) + S->Live = S->getLinkOrderDep()->Live; } // ICF entry point function. diff --git a/ELF/InputFiles.cpp b/ELF/InputFiles.cpp index f3afb1c34562..d651fbcad253 100644 --- a/ELF/InputFiles.cpp +++ b/ELF/InputFiles.cpp @@ -16,7 +16,6 @@ #include "Symbols.h" #include "SyntheticSections.h" #include "llvm/ADT/STLExtras.h" -#include "llvm/Bitcode/BitcodeReader.h" #include "llvm/CodeGen/Analysis.h" #include "llvm/DebugInfo/DWARF/DWARFContext.h" #include "llvm/IR/LLVMContext.h" @@ -38,6 +37,8 @@ using namespace lld::elf; TarWriter *elf::Tar; +InputFile::InputFile(Kind K, MemoryBufferRef M) : MB(M), FileKind(K) {} + namespace { // In ELF object file all section addresses are zero. If we have multiple // .text sections (when using -ffunction-section or comdat group) then @@ -56,14 +57,13 @@ public: } Optional<MemoryBufferRef> elf::readFile(StringRef Path) { - if (Config->Verbose) - outs() << Path << "\n"; - + log(Path); auto MBOrErr = MemoryBuffer::getFile(Path); if (auto EC = MBOrErr.getError()) { - error(EC, "cannot open " + Path); + error("cannot open " + Path + ": " + EC.message()); return None; } + std::unique_ptr<MemoryBuffer> &MB = *MBOrErr; MemoryBufferRef MBRef = MB->getMemBufferRef(); make<std::unique_ptr<MemoryBuffer>>(std::move(MB)); // take MB ownership @@ -75,15 +75,13 @@ Optional<MemoryBufferRef> elf::readFile(StringRef Path) { template <class ELFT> void elf::ObjectFile<ELFT>::initializeDwarfLine() { std::unique_ptr<object::ObjectFile> Obj = - check(object::ObjectFile::createObjectFile(this->MB), - "createObjectFile failed"); + check(object::ObjectFile::createObjectFile(this->MB), toString(this)); ObjectInfo ObjInfo; DWARFContextInMemory Dwarf(*Obj, &ObjInfo); DwarfLine.reset(new DWARFDebugLine(&Dwarf.getLineSection().Relocs)); - DataExtractor LineData(Dwarf.getLineSection().Data, - ELFT::TargetEndianness == support::little, - ELFT::Is64Bits ? 8 : 4); + DataExtractor LineData(Dwarf.getLineSection().Data, Config->IsLE, + Config->Wordsize); // The second parameter is offset in .debug_line section // for compilation unit (CU) of interest. We have only one @@ -94,34 +92,49 @@ template <class ELFT> void elf::ObjectFile<ELFT>::initializeDwarfLine() { // Returns source line information for a given offset // using DWARF debug info. template <class ELFT> -std::string elf::ObjectFile<ELFT>::getLineInfo(InputSectionBase<ELFT> *S, - uintX_t Offset) { +Optional<DILineInfo> elf::ObjectFile<ELFT>::getDILineInfo(InputSectionBase *S, + uint64_t Offset) { if (!DwarfLine) initializeDwarfLine(); // The offset to CU is 0. const DWARFDebugLine::LineTable *Tbl = DwarfLine->getLineTable(0); if (!Tbl) - return ""; + return None; // Use fake address calcuated by adding section file offset and offset in // section. See comments for ObjectInfo class. DILineInfo Info; Tbl->getFileLineInfoForAddress( - S->Offset + Offset, nullptr, + S->getOffsetInFile() + Offset, nullptr, DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath, Info); if (Info.Line == 0) - return ""; - return Info.FileName + ":" + std::to_string(Info.Line); + return None; + return Info; +} + +// Returns source line information for a given offset +// using DWARF debug info. +template <class ELFT> +std::string elf::ObjectFile<ELFT>::getLineInfo(InputSectionBase *S, + uint64_t Offset) { + if (Optional<DILineInfo> Info = getDILineInfo(S, Offset)) + return Info->FileName + ":" + std::to_string(Info->Line); + return ""; } // Returns "(internal)", "foo.a(bar.o)" or "baz.o". std::string lld::toString(const InputFile *F) { if (!F) return "(internal)"; - if (!F->ArchiveName.empty()) - return (F->ArchiveName + "(" + F->getName() + ")").str(); - return F->getName(); + + if (F->ToStringCache.empty()) { + if (F->ArchiveName.empty()) + F->ToStringCache = F->getName(); + else + F->ToStringCache = (F->ArchiveName + "(" + F->getName() + ")").str(); + } + return F->ToStringCache; } template <class ELFT> static ELFKind getELFKind() { @@ -144,18 +157,20 @@ typename ELFT::SymRange ELFFileBase<ELFT>::getGlobalSymbols() { template <class ELFT> uint32_t ELFFileBase<ELFT>::getSectionIndex(const Elf_Sym &Sym) const { - return check(getObj().getSectionIndex(&Sym, Symbols, SymtabSHNDX)); + return check(getObj().getSectionIndex(&Sym, Symbols, SymtabSHNDX), + toString(this)); } template <class ELFT> void ELFFileBase<ELFT>::initSymtab(ArrayRef<Elf_Shdr> Sections, const Elf_Shdr *Symtab) { FirstNonLocal = Symtab->sh_info; - Symbols = check(getObj().symbols(Symtab)); + Symbols = check(getObj().symbols(Symtab), toString(this)); if (FirstNonLocal == 0 || FirstNonLocal > Symbols.size()) fatal(toString(this) + ": invalid sh_info in symbol table"); - StringTable = check(getObj().getStringTableForSymtab(*Symtab, Sections)); + StringTable = check(getObj().getStringTableForSymtab(*Symtab, Sections), + toString(this)); } template <class ELFT> @@ -163,11 +178,6 @@ elf::ObjectFile<ELFT>::ObjectFile(MemoryBufferRef M) : ELFFileBase<ELFT>(Base::ObjectKind, M) {} template <class ELFT> -ArrayRef<SymbolBody *> elf::ObjectFile<ELFT>::getNonLocalSymbols() { - return makeArrayRef(this->SymbolBodies).slice(this->FirstNonLocal); -} - -template <class ELFT> ArrayRef<SymbolBody *> elf::ObjectFile<ELFT>::getLocalSymbols() { if (this->SymbolBodies.empty()) return this->SymbolBodies; @@ -196,19 +206,20 @@ StringRef elf::ObjectFile<ELFT>::getShtGroupSignature(ArrayRef<Elf_Shdr> Sections, const Elf_Shdr &Sec) { if (this->Symbols.empty()) - this->initSymtab(Sections, - check(object::getSection<ELFT>(Sections, Sec.sh_link))); - const Elf_Sym *Sym = - check(object::getSymbol<ELFT>(this->Symbols, Sec.sh_info)); - return check(Sym->getName(this->StringTable)); + this->initSymtab( + Sections, + check(object::getSection<ELFT>(Sections, Sec.sh_link), toString(this))); + const Elf_Sym *Sym = check( + object::getSymbol<ELFT>(this->Symbols, Sec.sh_info), toString(this)); + return check(Sym->getName(this->StringTable), toString(this)); } template <class ELFT> ArrayRef<typename elf::ObjectFile<ELFT>::Elf_Word> elf::ObjectFile<ELFT>::getShtGroupEntries(const Elf_Shdr &Sec) { const ELFFile<ELFT> &Obj = this->getObj(); - ArrayRef<Elf_Word> Entries = - check(Obj.template getSectionContentsAsArray<Elf_Word>(&Sec)); + ArrayRef<Elf_Word> Entries = check( + Obj.template getSectionContentsAsArray<Elf_Word>(&Sec), toString(this)); if (Entries.empty() || Entries[0] != GRP_COMDAT) fatal(toString(this) + ": unsupported SHT_GROUP format"); return Entries.slice(1); @@ -242,14 +253,14 @@ bool elf::ObjectFile<ELFT>::shouldMerge(const Elf_Shdr &Sec) { // the section does not hold a table of fixed-size entries". We know // that Rust 1.13 produces a string mergeable section with a zero // sh_entsize. Here we just accept it rather than being picky about it. - uintX_t EntSize = Sec.sh_entsize; + uint64_t EntSize = Sec.sh_entsize; if (EntSize == 0) return false; if (Sec.sh_size % EntSize) fatal(toString(this) + ": SHF_MERGE section size must be a multiple of sh_entsize"); - uintX_t Flags = Sec.sh_flags; + uint64_t Flags = Sec.sh_flags; if (!(Flags & SHF_MERGE)) return false; if (Flags & SHF_WRITE) @@ -270,76 +281,79 @@ bool elf::ObjectFile<ELFT>::shouldMerge(const Elf_Shdr &Sec) { template <class ELFT> void elf::ObjectFile<ELFT>::initializeSections( DenseSet<CachedHashStringRef> &ComdatGroups) { - ArrayRef<Elf_Shdr> ObjSections = check(this->getObj().sections()); + ArrayRef<Elf_Shdr> ObjSections = + check(this->getObj().sections(), toString(this)); const ELFFile<ELFT> &Obj = this->getObj(); uint64_t Size = ObjSections.size(); - Sections.resize(Size); + this->Sections.resize(Size); unsigned I = -1; - StringRef SectionStringTable = check(Obj.getSectionStringTable(ObjSections)); + StringRef SectionStringTable = + check(Obj.getSectionStringTable(ObjSections), toString(this)); for (const Elf_Shdr &Sec : ObjSections) { ++I; - if (Sections[I] == &InputSection<ELFT>::Discarded) + if (this->Sections[I] == &InputSection::Discarded) continue; // SHF_EXCLUDE'ed sections are discarded by the linker. However, // if -r is given, we'll let the final link discard such sections. // This is compatible with GNU. if ((Sec.sh_flags & SHF_EXCLUDE) && !Config->Relocatable) { - Sections[I] = &InputSection<ELFT>::Discarded; + this->Sections[I] = &InputSection::Discarded; continue; } switch (Sec.sh_type) { case SHT_GROUP: - Sections[I] = &InputSection<ELFT>::Discarded; - if (ComdatGroups.insert(CachedHashStringRef( - getShtGroupSignature(ObjSections, Sec))) + this->Sections[I] = &InputSection::Discarded; + if (ComdatGroups + .insert( + CachedHashStringRef(getShtGroupSignature(ObjSections, Sec))) .second) continue; for (uint32_t SecIndex : getShtGroupEntries(Sec)) { if (SecIndex >= Size) - fatal(toString(this) + ": invalid section index in group: " + - Twine(SecIndex)); - Sections[SecIndex] = &InputSection<ELFT>::Discarded; + fatal(toString(this) + + ": invalid section index in group: " + Twine(SecIndex)); + this->Sections[SecIndex] = &InputSection::Discarded; } break; case SHT_SYMTAB: this->initSymtab(ObjSections, &Sec); break; case SHT_SYMTAB_SHNDX: - this->SymtabSHNDX = check(Obj.getSHNDXTable(Sec, ObjSections)); + this->SymtabSHNDX = + check(Obj.getSHNDXTable(Sec, ObjSections), toString(this)); break; case SHT_STRTAB: case SHT_NULL: break; default: - Sections[I] = createInputSection(Sec, SectionStringTable); + this->Sections[I] = createInputSection(Sec, SectionStringTable); } // .ARM.exidx sections have a reverse dependency on the InputSection they // have a SHF_LINK_ORDER dependency, this is identified by the sh_link. if (Sec.sh_flags & SHF_LINK_ORDER) { - if (Sec.sh_link >= Sections.size()) + if (Sec.sh_link >= this->Sections.size()) fatal(toString(this) + ": invalid sh_link index: " + Twine(Sec.sh_link)); - auto *IS = cast<InputSection<ELFT>>(Sections[Sec.sh_link]); - IS->DependentSection = Sections[I]; + this->Sections[Sec.sh_link]->DependentSections.push_back( + this->Sections[I]); } } } template <class ELFT> -InputSectionBase<ELFT> * -elf::ObjectFile<ELFT>::getRelocTarget(const Elf_Shdr &Sec) { +InputSectionBase *elf::ObjectFile<ELFT>::getRelocTarget(const Elf_Shdr &Sec) { uint32_t Idx = Sec.sh_info; - if (Idx >= Sections.size()) + if (Idx >= this->Sections.size()) fatal(toString(this) + ": invalid relocated section index: " + Twine(Idx)); - InputSectionBase<ELFT> *Target = Sections[Idx]; + InputSectionBase *Target = this->Sections[Idx]; // Strictly speaking, a relocation section must be included in the // group of the section it relocates. However, LLVM 3.3 and earlier // would fail to do so, so we gracefully handle that case. - if (Target == &InputSection<ELFT>::Discarded) + if (Target == &InputSection::Discarded) return nullptr; if (!Target) @@ -348,11 +362,11 @@ elf::ObjectFile<ELFT>::getRelocTarget(const Elf_Shdr &Sec) { } template <class ELFT> -InputSectionBase<ELFT> * +InputSectionBase * elf::ObjectFile<ELFT>::createInputSection(const Elf_Shdr &Sec, StringRef SectionStringTable) { - StringRef Name = - check(this->getObj().getSectionName(&Sec, SectionStringTable)); + StringRef Name = check( + this->getObj().getSectionName(&Sec, SectionStringTable), toString(this)); switch (Sec.sh_type) { case SHT_ARM_ATTRIBUTES: @@ -361,62 +375,91 @@ elf::ObjectFile<ELFT>::createInputSection(const Elf_Shdr &Sec, // attribute section for dlopen to work. // In a full implementation we would merge all attribute sections. if (In<ELFT>::ARMAttributes == nullptr) { - In<ELFT>::ARMAttributes = make<InputSection<ELFT>>(this, &Sec, Name); + In<ELFT>::ARMAttributes = make<InputSection>(this, &Sec, Name); return In<ELFT>::ARMAttributes; } - return &InputSection<ELFT>::Discarded; + return &InputSection::Discarded; case SHT_RELA: case SHT_REL: { + // Find the relocation target section and associate this + // section with it. Target can be discarded, for example + // if it is a duplicated member of SHT_GROUP section, we + // do not create or proccess relocatable sections then. + InputSectionBase *Target = getRelocTarget(Sec); + if (!Target) + return nullptr; + // This section contains relocation information. // If -r is given, we do not interpret or apply relocation // but just copy relocation sections to output. if (Config->Relocatable) - return make<InputSection<ELFT>>(this, &Sec, Name); + return make<InputSection>(this, &Sec, Name); - // Find the relocation target section and associate this - // section with it. - InputSectionBase<ELFT> *Target = getRelocTarget(Sec); - if (!Target) - return nullptr; if (Target->FirstRelocation) fatal(toString(this) + ": multiple relocation sections to one section are not supported"); - if (!isa<InputSection<ELFT>>(Target) && !isa<EhInputSection<ELFT>>(Target)) + if (isa<MergeInputSection>(Target)) fatal(toString(this) + ": relocations pointing to SHF_MERGE are not supported"); size_t NumRelocations; if (Sec.sh_type == SHT_RELA) { - ArrayRef<Elf_Rela> Rels = check(this->getObj().relas(&Sec)); + ArrayRef<Elf_Rela> Rels = + check(this->getObj().relas(&Sec), toString(this)); Target->FirstRelocation = Rels.begin(); NumRelocations = Rels.size(); Target->AreRelocsRela = true; } else { - ArrayRef<Elf_Rel> Rels = check(this->getObj().rels(&Sec)); + ArrayRef<Elf_Rel> Rels = check(this->getObj().rels(&Sec), toString(this)); Target->FirstRelocation = Rels.begin(); NumRelocations = Rels.size(); Target->AreRelocsRela = false; } assert(isUInt<31>(NumRelocations)); Target->NumRelocations = NumRelocations; + + // Relocation sections processed by the linker are usually removed + // from the output, so returning `nullptr` for the normal case. + // However, if -emit-relocs is given, we need to leave them in the output. + // (Some post link analysis tools need this information.) + if (Config->EmitRelocs) { + InputSection *RelocSec = make<InputSection>(this, &Sec, Name); + // We will not emit relocation section if target was discarded. + Target->DependentSections.push_back(RelocSec); + return RelocSec; + } return nullptr; } } - // .note.GNU-stack is a marker section to control the presence of - // PT_GNU_STACK segment in outputs. Since the presence of the segment - // is controlled only by the command line option (-z execstack) in LLD, - // .note.GNU-stack is ignored. + // The GNU linker uses .note.GNU-stack section as a marker indicating + // that the code in the object file does not expect that the stack is + // executable (in terms of NX bit). If all input files have the marker, + // the GNU linker adds a PT_GNU_STACK segment to tells the loader to + // make the stack non-executable. Most object files have this section as + // of 2017. + // + // But making the stack non-executable is a norm today for security + // reasons. Failure to do so may result in a serious security issue. + // Therefore, we make LLD always add PT_GNU_STACK unless it is + // explicitly told to do otherwise (by -z execstack). Because the stack + // executable-ness is controlled solely by command line options, + // .note.GNU-stack sections are simply ignored. if (Name == ".note.GNU-stack") - return &InputSection<ELFT>::Discarded; + return &InputSection::Discarded; + // Split stacks is a feature to support a discontiguous stack. At least + // as of 2017, it seems that the feature is not being used widely. + // Only GNU gold supports that. We don't. For the details about that, + // see https://gcc.gnu.org/wiki/SplitStacks if (Name == ".note.GNU-split-stack") { - error("objects using splitstacks are not supported"); - return &InputSection<ELFT>::Discarded; + error(toString(this) + + ": object file compiled with -fsplit-stack is not supported"); + return &InputSection::Discarded; } if (Config->Strip != StripPolicy::None && Name.startswith(".debug")) - return &InputSection<ELFT>::Discarded; + return &InputSection::Discarded; // The linkonce feature is a sort of proto-comdat. Some glibc i386 object // files contain definitions of symbol "__x86.get_pc_thunk.bx" in linkonce @@ -424,17 +467,17 @@ elf::ObjectFile<ELFT>::createInputSection(const Elf_Shdr &Sec, // FIXME: This is glibc PR20543, we should remove this hack once that has been // fixed for a while. if (Name.startswith(".gnu.linkonce.")) - return &InputSection<ELFT>::Discarded; + return &InputSection::Discarded; // The linker merges EH (exception handling) frames and creates a // .eh_frame_hdr section for runtime. So we handle them with a special // class. For relocatable outputs, they are just passed through. if (Name == ".eh_frame" && !Config->Relocatable) - return make<EhInputSection<ELFT>>(this, &Sec, Name); + return make<EhInputSection>(this, &Sec, Name); if (shouldMerge(Sec)) - return make<MergeInputSection<ELFT>>(this, &Sec, Name); - return make<InputSection<ELFT>>(this, &Sec, Name); + return make<MergeInputSection>(this, &Sec, Name); + return make<InputSection>(this, &Sec, Name); } template <class ELFT> void elf::ObjectFile<ELFT>::initializeSymbols() { @@ -444,12 +487,11 @@ template <class ELFT> void elf::ObjectFile<ELFT>::initializeSymbols() { } template <class ELFT> -InputSectionBase<ELFT> * -elf::ObjectFile<ELFT>::getSection(const Elf_Sym &Sym) const { +InputSectionBase *elf::ObjectFile<ELFT>::getSection(const Elf_Sym &Sym) const { uint32_t Index = this->getSectionIndex(Sym); - if (Index >= Sections.size()) + if (Index >= this->Sections.size()) fatal(toString(this) + ": invalid section index: " + Twine(Index)); - InputSectionBase<ELFT> *S = Sections[Index]; + InputSectionBase *S = this->Sections[Index]; // We found that GNU assembler 2.17.50 [FreeBSD] 2007-07-03 could // generate broken objects. STT_SECTION/STT_NOTYPE symbols can be @@ -463,7 +505,7 @@ elf::ObjectFile<ELFT>::getSection(const Elf_Sym &Sym) const { fatal(toString(this) + ": invalid section index: " + Twine(Index)); } - if (S == &InputSection<ELFT>::Discarded) + if (S == &InputSection::Discarded) return S; return S->Repl; } @@ -471,30 +513,29 @@ elf::ObjectFile<ELFT>::getSection(const Elf_Sym &Sym) const { template <class ELFT> SymbolBody *elf::ObjectFile<ELFT>::createSymbolBody(const Elf_Sym *Sym) { int Binding = Sym->getBinding(); - InputSectionBase<ELFT> *Sec = getSection(*Sym); + InputSectionBase *Sec = getSection(*Sym); uint8_t StOther = Sym->st_other; uint8_t Type = Sym->getType(); - uintX_t Value = Sym->st_value; - uintX_t Size = Sym->st_size; + uint64_t Value = Sym->st_value; + uint64_t Size = Sym->st_size; if (Binding == STB_LOCAL) { if (Sym->getType() == STT_FILE) - SourceFile = check(Sym->getName(this->StringTable)); + SourceFile = check(Sym->getName(this->StringTable), toString(this)); if (this->StringTable.size() <= Sym->st_name) fatal(toString(this) + ": invalid symbol name offset"); StringRefZ Name = this->StringTable.data() + Sym->st_name; if (Sym->st_shndx == SHN_UNDEF) - return new (BAlloc) - Undefined<ELFT>(Name, /*IsLocal=*/true, StOther, Type, this); + return make<Undefined>(Name, /*IsLocal=*/true, StOther, Type, this); - return new (BAlloc) DefinedRegular<ELFT>(Name, /*IsLocal=*/true, StOther, - Type, Value, Size, Sec, this); + return make<DefinedRegular>(Name, /*IsLocal=*/true, StOther, Type, Value, + Size, Sec, this); } - StringRef Name = check(Sym->getName(this->StringTable)); + StringRef Name = check(Sym->getName(this->StringTable), toString(this)); switch (Sym->st_shndx) { case SHN_UNDEF: @@ -517,7 +558,7 @@ SymbolBody *elf::ObjectFile<ELFT>::createSymbolBody(const Elf_Sym *Sym) { case STB_GLOBAL: case STB_WEAK: case STB_GNU_UNIQUE: - if (Sec == &InputSection<ELFT>::Discarded) + if (Sec == &InputSection::Discarded) return elf::Symtab<ELFT>::X ->addUndefined(Name, /*IsLocal=*/false, Binding, StOther, Type, /*CanOmitFromDynSym=*/false, this) @@ -533,27 +574,34 @@ template <class ELFT> void ArchiveFile::parse() { MB.getBufferIdentifier() + ": failed to parse archive"); // Read the symbol table to construct Lazy objects. - for (const Archive::Symbol &Sym : File->symbols()) + for (const Archive::Symbol &Sym : File->symbols()) { Symtab<ELFT>::X->addLazyArchive(this, Sym); + } + + if (File->symbols().begin() == File->symbols().end()) + Config->ArchiveWithoutSymbolsSeen = true; } // Returns a buffer pointing to a member file containing a given symbol. std::pair<MemoryBufferRef, uint64_t> ArchiveFile::getMember(const Archive::Symbol *Sym) { Archive::Child C = - check(Sym->getMember(), - "could not get the member for symbol " + Sym->getName()); + check(Sym->getMember(), toString(this) + + ": could not get the member for symbol " + + Sym->getName()); if (!Seen.insert(C.getChildOffset()).second) return {MemoryBufferRef(), 0}; MemoryBufferRef Ret = check(C.getMemoryBufferRef(), - "could not get the buffer for the member defining symbol " + + toString(this) + + ": could not get the buffer for the member defining symbol " + Sym->getName()); if (C.getParent()->isThin() && Tar) - Tar->append(relativeToRoot(check(C.getFullName())), Ret.getBuffer()); + Tar->append(relativeToRoot(check(C.getFullName(), toString(this))), + Ret.getBuffer()); if (C.getParent()->isThin()) return {Ret, 0}; return {Ret, C.getChildOffset()}; @@ -567,16 +615,24 @@ template <class ELFT> const typename ELFT::Shdr * SharedFile<ELFT>::getSection(const Elf_Sym &Sym) const { return check( - this->getObj().getSection(&Sym, this->Symbols, this->SymtabSHNDX)); + this->getObj().getSection(&Sym, this->Symbols, this->SymtabSHNDX), + toString(this)); +} + +template <class ELFT> StringRef SharedFile<ELFT>::getSoName() const { + if (SoName.empty()) + return this->DefaultSoName; + return SoName; } // Partially parse the shared object file so that we can call // getSoName on this object. template <class ELFT> void SharedFile<ELFT>::parseSoName() { const Elf_Shdr *DynamicSec = nullptr; - const ELFFile<ELFT> Obj = this->getObj(); - ArrayRef<Elf_Shdr> Sections = check(Obj.sections()); + ArrayRef<Elf_Shdr> Sections = check(Obj.sections(), toString(this)); + + // Search for .dynsym, .dynamic, .symtab, .gnu.version and .gnu.version_d. for (const Elf_Shdr &Sec : Sections) { switch (Sec.sh_type) { default: @@ -588,7 +644,8 @@ template <class ELFT> void SharedFile<ELFT>::parseSoName() { DynamicSec = &Sec; break; case SHT_SYMTAB_SHNDX: - this->SymtabSHNDX = check(Obj.getSHNDXTable(Sec, Sections)); + this->SymtabSHNDX = + check(Obj.getSHNDXTable(Sec, Sections), toString(this)); break; case SHT_GNU_versym: this->VersymSec = &Sec; @@ -602,20 +659,15 @@ template <class ELFT> void SharedFile<ELFT>::parseSoName() { if (this->VersymSec && this->Symbols.empty()) error("SHT_GNU_versym should be associated with symbol table"); - // DSOs are identified by soname, and they usually contain - // DT_SONAME tag in their header. But if they are missing, - // filenames are used as default sonames. - SoName = sys::path::filename(this->getName()); - + // Search for a DT_SONAME tag to initialize this->SoName. if (!DynamicSec) return; - ArrayRef<Elf_Dyn> Arr = check(Obj.template getSectionContentsAsArray<Elf_Dyn>(DynamicSec), - toString(this) + ": getSectionContentsAsArray failed"); + toString(this)); for (const Elf_Dyn &Dyn : Arr) { if (Dyn.d_tag == DT_SONAME) { - uintX_t Val = Dyn.getVal(); + uint64_t Val = Dyn.getVal(); if (Val >= this->StringTable.size()) fatal(toString(this) + ": invalid DT_SONAME entry"); SoName = StringRef(this->StringTable.data() + Val); @@ -681,7 +733,7 @@ template <class ELFT> void SharedFile<ELFT>::parseRest() { bool Hidden = VersymIndex & VERSYM_HIDDEN; VersymIndex = VersymIndex & ~VERSYM_HIDDEN; - StringRef Name = check(Sym.getName(this->StringTable)); + StringRef Name = check(Sym.getName(this->StringTable), toString(this)); if (Sym.isUndefined()) { Undefs.push_back(Name); continue; @@ -707,19 +759,18 @@ template <class ELFT> void SharedFile<ELFT>::parseRest() { } } -static ELFKind getBitcodeELFKind(MemoryBufferRef MB) { - Triple T(check(getBitcodeTargetTriple(MB))); +static ELFKind getBitcodeELFKind(const Triple &T) { if (T.isLittleEndian()) return T.isArch64Bit() ? ELF64LEKind : ELF32LEKind; return T.isArch64Bit() ? ELF64BEKind : ELF32BEKind; } -static uint8_t getBitcodeMachineKind(MemoryBufferRef MB) { - Triple T(check(getBitcodeTargetTriple(MB))); +static uint8_t getBitcodeMachineKind(StringRef Path, const Triple &T) { switch (T.getArch()) { case Triple::aarch64: return EM_AARCH64; case Triple::arm: + case Triple::thumb: return EM_ARM; case Triple::mips: case Triple::mipsel: @@ -735,14 +786,32 @@ static uint8_t getBitcodeMachineKind(MemoryBufferRef MB) { case Triple::x86_64: return EM_X86_64; default: - fatal(MB.getBufferIdentifier() + - ": could not infer e_machine from bitcode target triple " + T.str()); + fatal(Path + ": could not infer e_machine from bitcode target triple " + + T.str()); } } -BitcodeFile::BitcodeFile(MemoryBufferRef MB) : InputFile(BitcodeKind, MB) { - EKind = getBitcodeELFKind(MB); - EMachine = getBitcodeMachineKind(MB); +BitcodeFile::BitcodeFile(MemoryBufferRef MB, StringRef ArchiveName, + uint64_t OffsetInArchive) + : InputFile(BitcodeKind, MB) { + this->ArchiveName = ArchiveName; + + // Here we pass a new MemoryBufferRef which is identified by ArchiveName + // (the fully resolved path of the archive) + member name + offset of the + // member in the archive. + // ThinLTO uses the MemoryBufferRef identifier to access its internal + // data structures and if two archives define two members with the same name, + // this causes a collision which result in only one of the objects being + // taken into consideration at LTO time (which very likely causes undefined + // symbols later in the link stage). + MemoryBufferRef MBRef(MB.getBuffer(), + Saver.save(ArchiveName + MB.getBufferIdentifier() + + utostr(OffsetInArchive))); + Obj = check(lto::InputFile::create(MBRef), toString(this)); + + Triple T(Obj->getTargetTriple()); + EKind = getBitcodeELFKind(T); + EMachine = getBitcodeMachineKind(MB.getBufferIdentifier(), T); } static uint8_t mapVisibility(GlobalValue::VisibilityTypes GvVisibility) { @@ -762,25 +831,24 @@ static Symbol *createBitcodeSymbol(const std::vector<bool> &KeptComdats, const lto::InputFile::Symbol &ObjSym, BitcodeFile *F) { StringRef NameRef = Saver.save(ObjSym.getName()); - uint32_t Flags = ObjSym.getFlags(); - uint32_t Binding = (Flags & BasicSymbolRef::SF_Weak) ? STB_WEAK : STB_GLOBAL; + uint32_t Binding = ObjSym.isWeak() ? STB_WEAK : STB_GLOBAL; uint8_t Type = ObjSym.isTLS() ? STT_TLS : STT_NOTYPE; uint8_t Visibility = mapVisibility(ObjSym.getVisibility()); bool CanOmitFromDynSym = ObjSym.canBeOmittedFromSymbolTable(); - int C = check(ObjSym.getComdatIndex()); + int C = ObjSym.getComdatIndex(); if (C != -1 && !KeptComdats[C]) return Symtab<ELFT>::X->addUndefined(NameRef, /*IsLocal=*/false, Binding, Visibility, Type, CanOmitFromDynSym, F); - if (Flags & BasicSymbolRef::SF_Undefined) + if (ObjSym.isUndefined()) return Symtab<ELFT>::X->addUndefined(NameRef, /*IsLocal=*/false, Binding, Visibility, Type, CanOmitFromDynSym, F); - if (Flags & BasicSymbolRef::SF_Common) + if (ObjSym.isCommon()) return Symtab<ELFT>::X->addCommon(NameRef, ObjSym.getCommonSize(), ObjSym.getCommonAlignment(), Binding, Visibility, STT_OBJECT, F); @@ -791,24 +859,9 @@ static Symbol *createBitcodeSymbol(const std::vector<bool> &KeptComdats, template <class ELFT> void BitcodeFile::parse(DenseSet<CachedHashStringRef> &ComdatGroups) { - - // Here we pass a new MemoryBufferRef which is identified by ArchiveName - // (the fully resolved path of the archive) + member name + offset of the - // member in the archive. - // ThinLTO uses the MemoryBufferRef identifier to access its internal - // data structures and if two archives define two members with the same name, - // this causes a collision which result in only one of the objects being - // taken into consideration at LTO time (which very likely causes undefined - // symbols later in the link stage). - Obj = check(lto::InputFile::create(MemoryBufferRef( - MB.getBuffer(), Saver.save(ArchiveName + MB.getBufferIdentifier() + - utostr(OffsetInArchive))))); - std::vector<bool> KeptComdats; - for (StringRef S : Obj->getComdatTable()) { - StringRef N = Saver.save(S); - KeptComdats.push_back(ComdatGroups.insert(CachedHashStringRef(N)).second); - } + for (StringRef S : Obj->getComdatTable()) + KeptComdats.push_back(ComdatGroups.insert(CachedHashStringRef(S)).second); for (const lto::InputFile::Symbol &ObjSym : Obj->symbols()) Symbols.push_back(createBitcodeSymbol<ELFT>(KeptComdats, ObjSym, this)); @@ -857,8 +910,8 @@ template <class ELFT> void BinaryFile::parse() { StringRef EndName = Saver.save(Twine(Filename) + "_end"); StringRef SizeName = Saver.save(Twine(Filename) + "_size"); - auto *Section = make<InputSection<ELFT>>(SHF_ALLOC | SHF_WRITE, SHT_PROGBITS, - 8, Data, ".data"); + auto *Section = + make<InputSection>(SHF_ALLOC | SHF_WRITE, SHT_PROGBITS, 8, Data, ".data"); Sections.push_back(Section); elf::Symtab<ELFT>::X->addRegular(StartName, STV_DEFAULT, STT_OBJECT, 0, 0, @@ -878,10 +931,10 @@ static bool isBitcode(MemoryBufferRef MB) { InputFile *elf::createObjectFile(MemoryBufferRef MB, StringRef ArchiveName, uint64_t OffsetInArchive) { - InputFile *F = - isBitcode(MB) ? make<BitcodeFile>(MB) : createELFFile<ObjectFile>(MB); + InputFile *F = isBitcode(MB) + ? make<BitcodeFile>(MB, ArchiveName, OffsetInArchive) + : createELFFile<ObjectFile>(MB); F->ArchiveName = ArchiveName; - F->OffsetInArchive = OffsetInArchive; return F; } @@ -907,27 +960,31 @@ template <class ELFT> std::vector<StringRef> LazyObjectFile::getElfSymbols() { typedef typename ELFT::SymRange Elf_Sym_Range; const ELFFile<ELFT> Obj(this->MB.getBuffer()); - ArrayRef<Elf_Shdr> Sections = check(Obj.sections()); + ArrayRef<Elf_Shdr> Sections = check(Obj.sections(), toString(this)); for (const Elf_Shdr &Sec : Sections) { if (Sec.sh_type != SHT_SYMTAB) continue; - Elf_Sym_Range Syms = check(Obj.symbols(&Sec)); + + Elf_Sym_Range Syms = check(Obj.symbols(&Sec), toString(this)); uint32_t FirstNonLocal = Sec.sh_info; - StringRef StringTable = check(Obj.getStringTableForSymtab(Sec, Sections)); + StringRef StringTable = + check(Obj.getStringTableForSymtab(Sec, Sections), toString(this)); std::vector<StringRef> V; + for (const Elf_Sym &Sym : Syms.slice(FirstNonLocal)) if (Sym.st_shndx != SHN_UNDEF) - V.push_back(check(Sym.getName(StringTable))); + V.push_back(check(Sym.getName(StringTable), toString(this))); return V; } return {}; } std::vector<StringRef> LazyObjectFile::getBitcodeSymbols() { - std::unique_ptr<lto::InputFile> Obj = check(lto::InputFile::create(this->MB)); + std::unique_ptr<lto::InputFile> Obj = + check(lto::InputFile::create(this->MB), toString(this)); std::vector<StringRef> V; for (const lto::InputFile::Symbol &Sym : Obj->symbols()) - if (!(Sym.getFlags() & BasicSymbolRef::SF_Undefined)) + if (!Sym.isUndefined()) V.push_back(Saver.save(Sym.getName())); return V; } diff --git a/ELF/InputFiles.h b/ELF/InputFiles.h index 95888061d877..40a8b23c5ef4 100644 --- a/ELF/InputFiles.h +++ b/ELF/InputFiles.h @@ -30,6 +30,7 @@ namespace llvm { class DWARFDebugLine; class TarWriter; +struct DILineInfo; namespace lto { class InputFile; } @@ -74,25 +75,34 @@ public: StringRef getName() const { return MB.getBufferIdentifier(); } MemoryBufferRef MB; + // Returns sections. It is a runtime error to call this function + // on files that don't have the notion of sections. + ArrayRef<InputSectionBase *> getSections() const { + assert(FileKind == ObjectKind || FileKind == BinaryKind); + return Sections; + } + // Filename of .a which contained this file. If this file was // not in an archive file, it is the empty string. We use this // string for creating error messages. StringRef ArchiveName; - // If this file is in an archive, the member contains the offset of - // the file in the archive. Otherwise, it's just zero. We store this - // field so that we can pass it to lib/LTO in order to disambiguate - // between objects. - uint64_t OffsetInArchive; - // If this is an architecture-specific file, the following members // have ELF type (i.e. ELF{32,64}{LE,BE}) and target machine type. ELFKind EKind = ELFNoneKind; uint16_t EMachine = llvm::ELF::EM_NONE; uint8_t OSABI = 0; + // For SharedKind inputs, the string to use in DT_NEEDED when the library + // has no soname. + std::string DefaultSoName; + + // Cache for toString(). Only toString() should use this member. + mutable std::string ToStringCache; + protected: - InputFile(Kind K, MemoryBufferRef M) : MB(M), FileKind(K) {} + InputFile(Kind K, MemoryBufferRef M); + std::vector<InputSectionBase *> Sections; private: const Kind FileKind; @@ -136,9 +146,7 @@ template <class ELFT> class ObjectFile : public ELFFileBase<ELFT> { typedef typename ELFT::Rela Elf_Rela; typedef typename ELFT::Sym Elf_Sym; typedef typename ELFT::Shdr Elf_Shdr; - typedef typename ELFT::SymRange Elf_Sym_Range; typedef typename ELFT::Word Elf_Word; - typedef typename ELFT::uint uintX_t; StringRef getShtGroupSignature(ArrayRef<Elf_Shdr> Sections, const Elf_Shdr &Sec); @@ -151,13 +159,11 @@ public: ArrayRef<SymbolBody *> getSymbols(); ArrayRef<SymbolBody *> getLocalSymbols(); - ArrayRef<SymbolBody *> getNonLocalSymbols(); explicit ObjectFile(MemoryBufferRef M); void parse(llvm::DenseSet<llvm::CachedHashStringRef> &ComdatGroups); - ArrayRef<InputSectionBase<ELFT> *> getSections() const { return Sections; } - InputSectionBase<ELFT> *getSection(const Elf_Sym &Sym) const; + InputSectionBase *getSection(const Elf_Sym &Sym) const; SymbolBody &getSymbolBody(uint32_t SymbolIndex) const { if (SymbolIndex >= SymbolBodies.size()) @@ -167,13 +173,14 @@ public: template <typename RelT> SymbolBody &getRelocTargetSym(const RelT &Rel) const { - uint32_t SymIndex = Rel.getSymbol(Config->Mips64EL); + uint32_t SymIndex = Rel.getSymbol(Config->IsMips64EL); return getSymbolBody(SymIndex); } // Returns source line information for a given offset. // If no information is available, returns "". - std::string getLineInfo(InputSectionBase<ELFT> *S, uintX_t Offset); + std::string getLineInfo(InputSectionBase *S, uint64_t Offset); + llvm::Optional<llvm::DILineInfo> getDILineInfo(InputSectionBase *, uint64_t); // MIPS GP0 value defined by this file. This value represents the gp value // used to create the relocatable object and required to support @@ -190,16 +197,13 @@ private: initializeSections(llvm::DenseSet<llvm::CachedHashStringRef> &ComdatGroups); void initializeSymbols(); void initializeDwarfLine(); - InputSectionBase<ELFT> *getRelocTarget(const Elf_Shdr &Sec); - InputSectionBase<ELFT> *createInputSection(const Elf_Shdr &Sec, - StringRef SectionStringTable); + InputSectionBase *getRelocTarget(const Elf_Shdr &Sec); + InputSectionBase *createInputSection(const Elf_Shdr &Sec, + StringRef SectionStringTable); bool shouldMerge(const Elf_Shdr &Sec); SymbolBody *createSymbolBody(const Elf_Sym *Sym); - // List of all sections defined by this file. - std::vector<InputSectionBase<ELFT> *> Sections; - // List of all symbols referenced or defined by this file. std::vector<SymbolBody *> SymbolBodies; @@ -256,7 +260,8 @@ private: class BitcodeFile : public InputFile { public: - explicit BitcodeFile(MemoryBufferRef M); + BitcodeFile(MemoryBufferRef M, StringRef ArchiveName, + uint64_t OffsetInArchive); static bool classof(const InputFile *F) { return F->kind() == BitcodeKind; } template <class ELFT> void parse(llvm::DenseSet<llvm::CachedHashStringRef> &ComdatGroups); @@ -276,8 +281,6 @@ template <class ELFT> class SharedFile : public ELFFileBase<ELFT> { typedef typename ELFT::SymRange Elf_Sym_Range; typedef typename ELFT::Verdef Elf_Verdef; typedef typename ELFT::Versym Elf_Versym; - typedef typename ELFT::Word Elf_Word; - typedef typename ELFT::uint uintX_t; std::vector<StringRef> Undefs; StringRef SoName; @@ -285,7 +288,7 @@ template <class ELFT> class SharedFile : public ELFFileBase<ELFT> { const Elf_Shdr *VerdefSec = nullptr; public: - StringRef getSoName() const { return SoName; } + StringRef getSoName() const; const Elf_Shdr *getSection(const Elf_Sym &Sym) const; llvm::ArrayRef<StringRef> getUndefinedSymbols() { return Undefs; } @@ -322,10 +325,6 @@ public: explicit BinaryFile(MemoryBufferRef M) : InputFile(BinaryKind, M) {} static bool classof(const InputFile *F) { return F->kind() == BinaryKind; } template <class ELFT> void parse(); - ArrayRef<InputSectionData *> getSections() const { return Sections; } - -private: - std::vector<InputSectionData *> Sections; }; InputFile *createObjectFile(MemoryBufferRef MB, StringRef ArchiveName = "", diff --git a/ELF/InputSection.cpp b/ELF/InputSection.cpp index 6b1e92891b98..aff57551a8b3 100644 --- a/ELF/InputSection.cpp +++ b/ELF/InputSection.cpp @@ -22,6 +22,7 @@ #include "llvm/Object/Decompressor.h" #include "llvm/Support/Compression.h" #include "llvm/Support/Endian.h" +#include "llvm/Support/Path.h" #include <mutex> using namespace llvm; @@ -29,16 +30,17 @@ using namespace llvm::ELF; using namespace llvm::object; using namespace llvm::support; using namespace llvm::support::endian; +using namespace llvm::sys; using namespace lld; using namespace lld::elf; +std::vector<InputSectionBase *> elf::InputSections; + // Returns a string to construct an error message. -template <class ELFT> -std::string lld::toString(const InputSectionBase<ELFT> *Sec) { +std::string lld::toString(const InputSectionBase *Sec) { // File can be absent if section is synthetic. - std::string FileName = - Sec->getFile() ? Sec->getFile()->getName() : "<internal>"; + std::string FileName = Sec->File ? Sec->File->getName() : "<internal>"; return (FileName + ":(" + Sec->Name + ")").str(); } @@ -50,91 +52,118 @@ static ArrayRef<uint8_t> getSectionContents(elf::ObjectFile<ELFT> *File, return check(File->getObj().getSectionContents(Hdr)); } -template <class ELFT> -InputSectionBase<ELFT>::InputSectionBase(elf::ObjectFile<ELFT> *File, - uintX_t Flags, uint32_t Type, - uintX_t Entsize, uint32_t Link, - uint32_t Info, uintX_t Addralign, - ArrayRef<uint8_t> Data, StringRef Name, - Kind SectionKind) - : InputSectionData(SectionKind, Name, Data, - !Config->GcSections || !(Flags & SHF_ALLOC)), - File(File), Flags(Flags), Entsize(Entsize), Type(Type), Link(Link), - Info(Info), Repl(this) { +InputSectionBase::InputSectionBase(InputFile *File, uint64_t Flags, + uint32_t Type, uint64_t Entsize, + uint32_t Link, uint32_t Info, + uint32_t Alignment, ArrayRef<uint8_t> Data, + StringRef Name, Kind SectionKind) + : SectionBase(SectionKind, Name, Flags, Entsize, Alignment, Type, Info, + Link), + File(File), Data(Data), Repl(this) { + Live = !Config->GcSections || !(Flags & SHF_ALLOC); + Assigned = false; NumRelocations = 0; AreRelocsRela = false; // The ELF spec states that a value of 0 means the section has // no alignment constraits. - uint64_t V = std::max<uint64_t>(Addralign, 1); + uint32_t V = std::max<uint64_t>(Alignment, 1); if (!isPowerOf2_64(V)) fatal(toString(File) + ": section sh_addralign is not a power of 2"); + this->Alignment = V; +} +// GNU assembler 2.24 and LLVM 4.0.0's MC (the newest release as of +// March 2017) fail to infer section types for sections starting with +// ".init_array." or ".fini_array.". They set SHT_PROGBITS instead of +// SHF_INIT_ARRAY. As a result, the following assembler directive +// creates ".init_array.100" with SHT_PROGBITS, for example. +// +// .section .init_array.100, "aw" +// +// This function forces SHT_{INIT,FINI}_ARRAY so that we can handle +// incorrect inputs as if they were correct from the beginning. +static uint64_t getType(uint64_t Type, StringRef Name) { + if (Type == SHT_PROGBITS && Name.startswith(".init_array.")) + return SHT_INIT_ARRAY; + if (Type == SHT_PROGBITS && Name.startswith(".fini_array.")) + return SHT_FINI_ARRAY; + return Type; +} + +template <class ELFT> +InputSectionBase::InputSectionBase(elf::ObjectFile<ELFT> *File, + const typename ELFT::Shdr *Hdr, + StringRef Name, Kind SectionKind) + : InputSectionBase(File, Hdr->sh_flags & ~SHF_INFO_LINK, + getType(Hdr->sh_type, Name), Hdr->sh_entsize, + Hdr->sh_link, Hdr->sh_info, Hdr->sh_addralign, + getSectionContents(File, Hdr), Name, SectionKind) { // We reject object files having insanely large alignments even though // they are allowed by the spec. I think 4GB is a reasonable limitation. // We might want to relax this in the future. - if (V > UINT32_MAX) + if (Hdr->sh_addralign > UINT32_MAX) fatal(toString(File) + ": section sh_addralign is too large"); - Alignment = V; - - // If it is not a mergeable section, overwrite the flag so that the flag - // is consistent with the class. This inconsistency could occur when - // string merging is disabled using -O0 flag. - if (!Config->Relocatable && !isa<MergeInputSection<ELFT>>(this)) - this->Flags &= ~(SHF_MERGE | SHF_STRINGS); } -template <class ELFT> -InputSectionBase<ELFT>::InputSectionBase(elf::ObjectFile<ELFT> *File, - const Elf_Shdr *Hdr, StringRef Name, - Kind SectionKind) - : InputSectionBase(File, Hdr->sh_flags & ~SHF_INFO_LINK, Hdr->sh_type, - Hdr->sh_entsize, Hdr->sh_link, Hdr->sh_info, - Hdr->sh_addralign, getSectionContents(File, Hdr), Name, - SectionKind) { - this->Offset = Hdr->sh_offset; -} - -template <class ELFT> size_t InputSectionBase<ELFT>::getSize() const { - if (auto *S = dyn_cast<SyntheticSection<ELFT>>(this)) +size_t InputSectionBase::getSize() const { + if (auto *S = dyn_cast<SyntheticSection>(this)) return S->getSize(); - if (auto *D = dyn_cast<InputSection<ELFT>>(this)) - if (D->getThunksSize() > 0) - return D->getThunkOff() + D->getThunksSize(); - return Data.size(); } -template <class ELFT> -typename ELFT::uint InputSectionBase<ELFT>::getOffset(uintX_t Offset) const { +uint64_t InputSectionBase::getOffsetInFile() const { + const uint8_t *FileStart = (const uint8_t *)File->MB.getBufferStart(); + const uint8_t *SecStart = Data.begin(); + return SecStart - FileStart; +} + +uint64_t SectionBase::getOffset(uint64_t Offset) const { switch (kind()) { + case Output: { + auto *OS = cast<OutputSection>(this); + // For output sections we treat offset -1 as the end of the section. + return Offset == uint64_t(-1) ? OS->Size : Offset; + } case Regular: - return cast<InputSection<ELFT>>(this)->OutSecOff + Offset; - case Synthetic: + return cast<InputSection>(this)->OutSecOff + Offset; + case Synthetic: { + auto *IS = cast<InputSection>(this); // For synthetic sections we treat offset -1 as the end of the section. - // The same approach is used for synthetic symbols (DefinedSynthetic). - return cast<InputSection<ELFT>>(this)->OutSecOff + - (Offset == uintX_t(-1) ? getSize() : Offset); + return IS->OutSecOff + (Offset == uint64_t(-1) ? IS->getSize() : Offset); + } case EHFrame: // The file crtbeginT.o has relocations pointing to the start of an empty // .eh_frame that is known to be the first in the link. It does that to // identify the start of the output .eh_frame. return Offset; case Merge: - return cast<MergeInputSection<ELFT>>(this)->getOffset(Offset); + const MergeInputSection *MS = cast<MergeInputSection>(this); + if (MS->MergeSec) + return MS->MergeSec->OutSecOff + MS->getOffset(Offset); + return MS->getOffset(Offset); } llvm_unreachable("invalid section kind"); } +OutputSection *SectionBase::getOutputSection() { + if (auto *IS = dyn_cast<InputSection>(this)) + return IS->OutSec; + if (auto *MS = dyn_cast<MergeInputSection>(this)) + return MS->MergeSec ? MS->MergeSec->OutSec : nullptr; + if (auto *EH = dyn_cast<EhInputSection>(this)) + return EH->EHSec->OutSec; + return cast<OutputSection>(this); +} + // Uncompress section contents. Note that this function is called // from parallel_for_each, so it must be thread-safe. -template <class ELFT> void InputSectionBase<ELFT>::uncompress() { - Decompressor Decompressor = check(Decompressor::create( - Name, toStringRef(Data), ELFT::TargetEndianness == llvm::support::little, - ELFT::Is64Bits)); +void InputSectionBase::uncompress() { + Decompressor Dec = check(Decompressor::create(Name, toStringRef(Data), + Config->IsLE, Config->Is64)); - size_t Size = Decompressor.getDecompressedSize(); + size_t Size = Dec.getDecompressedSize(); char *OutputBuf; { static std::mutex Mu; @@ -142,41 +171,44 @@ template <class ELFT> void InputSectionBase<ELFT>::uncompress() { OutputBuf = BAlloc.Allocate<char>(Size); } - if (Error E = Decompressor.decompress({OutputBuf, Size})) - fatal(E, toString(this)); + if (Error E = Dec.decompress({OutputBuf, Size})) + fatal(toString(this) + + ": decompress failed: " + llvm::toString(std::move(E))); Data = ArrayRef<uint8_t>((uint8_t *)OutputBuf, Size); } -template <class ELFT> -typename ELFT::uint -InputSectionBase<ELFT>::getOffset(const DefinedRegular<ELFT> &Sym) const { +uint64_t SectionBase::getOffset(const DefinedRegular &Sym) const { return getOffset(Sym.Value); } -template <class ELFT> -InputSectionBase<ELFT> *InputSectionBase<ELFT>::getLinkOrderDep() const { +InputSectionBase *InputSectionBase::getLinkOrderDep() const { if ((Flags & SHF_LINK_ORDER) && Link != 0) - return getFile()->getSections()[Link]; + return File->getSections()[Link]; return nullptr; } // Returns a source location string. Used to construct an error message. template <class ELFT> -std::string InputSectionBase<ELFT>::getLocation(typename ELFT::uint Offset) { +std::string InputSectionBase::getLocation(uint64_t Offset) { + // We don't have file for synthetic sections. + if (getFile<ELFT>() == nullptr) + return (Config->OutputFile + ":(" + Name + "+0x" + utohexstr(Offset) + ")") + .str(); + // First check if we can get desired values from debugging information. - std::string LineInfo = File->getLineInfo(this, Offset); + std::string LineInfo = getFile<ELFT>()->getLineInfo(this, Offset); if (!LineInfo.empty()) return LineInfo; // File->SourceFile contains STT_FILE symbol that contains a // source file name. If it's missing, we use an object file name. - std::string SrcFile = File->SourceFile; + std::string SrcFile = getFile<ELFT>()->SourceFile; if (SrcFile.empty()) SrcFile = toString(File); // Find a function symbol that encloses a given location. - for (SymbolBody *B : File->getSymbols()) - if (auto *D = dyn_cast<DefinedRegular<ELFT>>(B)) + for (SymbolBody *B : getFile<ELFT>()->getSymbols()) + if (auto *D = dyn_cast<DefinedRegular>(B)) if (D->Section == this && D->Type == STT_FUNC) if (D->Value <= Offset && Offset < D->Value + D->Size) return SrcFile + ":(function " + toString(*D) + ")"; @@ -185,69 +217,144 @@ std::string InputSectionBase<ELFT>::getLocation(typename ELFT::uint Offset) { return (SrcFile + ":(" + Name + "+0x" + utohexstr(Offset) + ")").str(); } -template <class ELFT> -InputSection<ELFT>::InputSection() : InputSectionBase<ELFT>() {} +// Returns a source location string. This function is intended to be +// used for constructing an error message. The returned message looks +// like this: +// +// foo.c:42 (/home/alice/possibly/very/long/path/foo.c:42) +// +// Returns an empty string if there's no way to get line info. +template <class ELFT> std::string InputSectionBase::getSrcMsg(uint64_t Offset) { + // Synthetic sections don't have input files. + elf::ObjectFile<ELFT> *File = getFile<ELFT>(); + if (!File) + return ""; + + Optional<DILineInfo> Info = File->getDILineInfo(this, Offset); + + // File->SourceFile contains STT_FILE symbol, and that is a last resort. + if (!Info) + return File->SourceFile; + + std::string Path = Info->FileName; + std::string Filename = path::filename(Path); + std::string Lineno = ":" + std::to_string(Info->Line); + if (Filename == Path) + return Filename + Lineno; + return Filename + Lineno + " (" + Path + Lineno + ")"; +} + +// Returns a filename string along with an optional section name. This +// function is intended to be used for constructing an error +// message. The returned message looks like this: +// +// path/to/foo.o:(function bar) +// +// or +// +// path/to/foo.o:(function bar) in archive path/to/bar.a +template <class ELFT> std::string InputSectionBase::getObjMsg(uint64_t Off) { + // Synthetic sections don't have input files. + elf::ObjectFile<ELFT> *File = getFile<ELFT>(); + std::string Filename = File ? File->getName() : "(internal)"; + + std::string Archive; + if (!File->ArchiveName.empty()) + Archive = (" in archive " + File->ArchiveName).str(); + + // Find a symbol that encloses a given location. + for (SymbolBody *B : getFile<ELFT>()->getSymbols()) + if (auto *D = dyn_cast<DefinedRegular>(B)) + if (D->Section == this && D->Value <= Off && Off < D->Value + D->Size) + return Filename + ":(" + toString(*D) + ")" + Archive; -template <class ELFT> -InputSection<ELFT>::InputSection(uintX_t Flags, uint32_t Type, - uintX_t Addralign, ArrayRef<uint8_t> Data, - StringRef Name, Kind K) - : InputSectionBase<ELFT>(nullptr, Flags, Type, - /*Entsize*/ 0, /*Link*/ 0, /*Info*/ 0, Addralign, - Data, Name, K) {} + // If there's no symbol, print out the offset in the section. + return (Filename + ":(" + Name + "+0x" + utohexstr(Off) + ")" + Archive) + .str(); +} -template <class ELFT> -InputSection<ELFT>::InputSection(elf::ObjectFile<ELFT> *F, - const Elf_Shdr *Header, StringRef Name) - : InputSectionBase<ELFT>(F, Header, Name, Base::Regular) {} +InputSectionBase InputSectionBase::Discarded; -template <class ELFT> -bool InputSection<ELFT>::classof(const InputSectionData *S) { - return S->kind() == Base::Regular || S->kind() == Base::Synthetic; -} +InputSection::InputSection(uint64_t Flags, uint32_t Type, uint32_t Alignment, + ArrayRef<uint8_t> Data, StringRef Name, Kind K) + : InputSectionBase(nullptr, Flags, Type, + /*Entsize*/ 0, /*Link*/ 0, /*Info*/ 0, Alignment, Data, + Name, K) {} template <class ELFT> -InputSectionBase<ELFT> *InputSection<ELFT>::getRelocatedSection() { - assert(this->Type == SHT_RELA || this->Type == SHT_REL); - ArrayRef<InputSectionBase<ELFT> *> Sections = this->File->getSections(); - return Sections[this->Info]; -} +InputSection::InputSection(elf::ObjectFile<ELFT> *F, + const typename ELFT::Shdr *Header, StringRef Name) + : InputSectionBase(F, Header, Name, InputSectionBase::Regular) {} -template <class ELFT> void InputSection<ELFT>::addThunk(const Thunk<ELFT> *T) { - Thunks.push_back(T); +bool InputSection::classof(const SectionBase *S) { + return S->kind() == SectionBase::Regular || + S->kind() == SectionBase::Synthetic; } -template <class ELFT> uint64_t InputSection<ELFT>::getThunkOff() const { - return this->Data.size(); +bool InputSectionBase::classof(const SectionBase *S) { + return S->kind() != Output; } -template <class ELFT> uint64_t InputSection<ELFT>::getThunksSize() const { - uint64_t Total = 0; - for (const Thunk<ELFT> *T : Thunks) - Total += T->size(); - return Total; +InputSectionBase *InputSection::getRelocatedSection() { + assert(this->Type == SHT_RELA || this->Type == SHT_REL); + ArrayRef<InputSectionBase *> Sections = this->File->getSections(); + return Sections[this->Info]; } -// This is used for -r. We can't use memcpy to copy relocations because we need -// to update symbol table offset and section index for each relocation. So we -// copy relocations one by one. -template <class ELFT> -template <class RelTy> -void InputSection<ELFT>::copyRelocations(uint8_t *Buf, ArrayRef<RelTy> Rels) { - InputSectionBase<ELFT> *RelocatedSection = getRelocatedSection(); +// This is used for -r and --emit-relocs. We can't use memcpy to copy +// relocations because we need to update symbol table offset and section index +// for each relocation. So we copy relocations one by one. +template <class ELFT, class RelTy> +void InputSection::copyRelocations(uint8_t *Buf, ArrayRef<RelTy> Rels) { + InputSectionBase *RelocatedSection = getRelocatedSection(); + // Loop is slow and have complexity O(N*M), where N - amount of + // relocations and M - amount of symbols in symbol table. + // That happens because getSymbolIndex(...) call below performs + // simple linear search. for (const RelTy &Rel : Rels) { - uint32_t Type = Rel.getType(Config->Mips64EL); - SymbolBody &Body = this->File->getRelocTargetSym(Rel); + uint32_t Type = Rel.getType(Config->IsMips64EL); + SymbolBody &Body = this->getFile<ELFT>()->getRelocTargetSym(Rel); - Elf_Rela *P = reinterpret_cast<Elf_Rela *>(Buf); + auto *P = reinterpret_cast<typename ELFT::Rela *>(Buf); Buf += sizeof(RelTy); - if (Config->Rela) + if (Config->IsRela) P->r_addend = getAddend<ELFT>(Rel); - P->r_offset = RelocatedSection->getOffset(Rel.r_offset); + + // Output section VA is zero for -r, so r_offset is an offset within the + // section, but for --emit-relocs it is an virtual address. + P->r_offset = RelocatedSection->OutSec->Addr + + RelocatedSection->getOffset(Rel.r_offset); P->setSymbolAndType(In<ELFT>::SymTab->getSymbolIndex(&Body), Type, - Config->Mips64EL); + Config->IsMips64EL); + + if (Body.Type == STT_SECTION) { + // We combine multiple section symbols into only one per + // section. This means we have to update the addend. That is + // trivial for Elf_Rela, but for Elf_Rel we have to write to the + // section data. We do that by adding to the Relocation vector. + + // .eh_frame is horribly special and can reference discarded sections. To + // avoid having to parse and recreate .eh_frame, we just replace any + // relocation in it pointing to discarded sections with R_*_NONE, which + // hopefully creates a frame that is ignored at runtime. + SectionBase *Section = cast<DefinedRegular>(Body).Section; + if (Section == &InputSection::Discarded) { + P->setSymbolAndType(0, 0, false); + continue; + } + + if (Config->IsRela) { + P->r_addend += Body.getVA() - Section->getOutputSection()->Addr; + } else if (Config->Relocatable) { + const uint8_t *BufLoc = RelocatedSection->Data.begin() + Rel.r_offset; + RelocatedSection->Relocations.push_back( + {R_ABS, Type, Rel.r_offset, Target->getImplicitAddend(BufLoc, Type), + &Body}); + } + } + } } @@ -287,85 +394,52 @@ static uint64_t getAArch64UndefinedRelativeWeakVA(uint64_t Type, uint64_t A, template <class ELFT> static typename ELFT::uint -getRelocTargetVA(uint32_t Type, typename ELFT::uint A, typename ELFT::uint P, +getRelocTargetVA(uint32_t Type, int64_t A, typename ELFT::uint P, const SymbolBody &Body, RelExpr Expr) { switch (Expr) { - case R_HINT: - case R_TLSDESC_CALL: - llvm_unreachable("cannot relocate hint relocs"); - case R_TLSLD: - return In<ELFT>::Got->getTlsIndexOff() + A - In<ELFT>::Got->getSize(); - case R_TLSLD_PC: - return In<ELFT>::Got->getTlsIndexVA() + A - P; - case R_THUNK_ABS: - return Body.getThunkVA<ELFT>() + A; - case R_THUNK_PC: - case R_THUNK_PLT_PC: - return Body.getThunkVA<ELFT>() + A - P; - case R_PPC_TOC: - return getPPC64TocBase() + A; - case R_TLSGD: - return In<ELFT>::Got->getGlobalDynOffset(Body) + A - - In<ELFT>::Got->getSize(); - case R_TLSGD_PC: - return In<ELFT>::Got->getGlobalDynAddr(Body) + A - P; - case R_TLSDESC: - return In<ELFT>::Got->getGlobalDynAddr(Body) + A; - case R_TLSDESC_PAGE: - return getAArch64Page(In<ELFT>::Got->getGlobalDynAddr(Body) + A) - - getAArch64Page(P); - case R_PLT: - return Body.getPltVA<ELFT>() + A; - case R_PLT_PC: - case R_PPC_PLT_OPD: - return Body.getPltVA<ELFT>() + A - P; - case R_SIZE: - return Body.getSize<ELFT>() + A; - case R_GOTREL: - return Body.getVA<ELFT>(A) - In<ELFT>::Got->getVA(); - case R_GOTREL_FROM_END: - return Body.getVA<ELFT>(A) - In<ELFT>::Got->getVA() - - In<ELFT>::Got->getSize(); - case R_RELAX_TLS_GD_TO_IE_END: - case R_GOT_FROM_END: - return Body.getGotOffset<ELFT>() + A - In<ELFT>::Got->getSize(); - case R_RELAX_TLS_GD_TO_IE_ABS: + case R_ABS: + case R_RELAX_GOT_PC_NOPIC: + return Body.getVA(A); case R_GOT: + case R_RELAX_TLS_GD_TO_IE_ABS: return Body.getGotVA<ELFT>() + A; - case R_RELAX_TLS_GD_TO_IE_PAGE_PC: - case R_GOT_PAGE_PC: - return getAArch64Page(Body.getGotVA<ELFT>() + A) - getAArch64Page(P); - case R_RELAX_TLS_GD_TO_IE: - case R_GOT_PC: - return Body.getGotVA<ELFT>() + A - P; case R_GOTONLY_PC: return In<ELFT>::Got->getVA() + A - P; case R_GOTONLY_PC_FROM_END: return In<ELFT>::Got->getVA() + A - P + In<ELFT>::Got->getSize(); - case R_RELAX_TLS_LD_TO_LE: - case R_RELAX_TLS_IE_TO_LE: - case R_RELAX_TLS_GD_TO_LE: - case R_TLS: - // A weak undefined TLS symbol resolves to the base of the TLS - // block, i.e. gets a value of zero. If we pass --gc-sections to - // lld and .tbss is not referenced, it gets reclaimed and we don't - // create a TLS program header. Therefore, we resolve this - // statically to zero. - if (Body.isTls() && (Body.isLazy() || Body.isUndefined()) && - Body.symbol()->isWeak()) - return 0; - if (Target->TcbSize) - return Body.getVA<ELFT>(A) + - alignTo(Target->TcbSize, Out<ELFT>::TlsPhdr->p_align); - return Body.getVA<ELFT>(A) - Out<ELFT>::TlsPhdr->p_memsz; - case R_RELAX_TLS_GD_TO_LE_NEG: - case R_NEG_TLS: - return Out<ELF32LE>::TlsPhdr->p_memsz - Body.getVA<ELFT>(A); - case R_ABS: - case R_RELAX_GOT_PC_NOPIC: - return Body.getVA<ELFT>(A); + case R_GOTREL: + return Body.getVA(A) - In<ELFT>::Got->getVA(); + case R_GOTREL_FROM_END: + return Body.getVA(A) - In<ELFT>::Got->getVA() - In<ELFT>::Got->getSize(); + case R_GOT_FROM_END: + case R_RELAX_TLS_GD_TO_IE_END: + return Body.getGotOffset() + A - In<ELFT>::Got->getSize(); case R_GOT_OFF: - return Body.getGotOffset<ELFT>() + A; + return Body.getGotOffset() + A; + case R_GOT_PAGE_PC: + case R_RELAX_TLS_GD_TO_IE_PAGE_PC: + return getAArch64Page(Body.getGotVA<ELFT>() + A) - getAArch64Page(P); + case R_GOT_PC: + case R_RELAX_TLS_GD_TO_IE: + return Body.getGotVA<ELFT>() + A - P; + case R_HINT: + case R_NONE: + case R_TLSDESC_CALL: + llvm_unreachable("cannot relocate hint relocs"); + case R_MIPS_GOTREL: + return Body.getVA(A) - In<ELFT>::MipsGot->getGp(); + case R_MIPS_GOT_GP: + return In<ELFT>::MipsGot->getGp() + A; + case R_MIPS_GOT_GP_PC: { + // R_MIPS_LO16 expression has R_MIPS_GOT_GP_PC type iif the target + // is _gp_disp symbol. In that case we should use the following + // formula for calculation "AHL + GP - P + 4". For details see p. 4-19 at + // ftp://www.linux-mips.org/pub/linux/mips/doc/ABI/mipsabi.pdf + uint64_t V = In<ELFT>::MipsGot->getGp() + A - P; + if (Type == R_MIPS_LO16) + V += 4; + return V; + } case R_MIPS_GOT_LOCAL_PAGE: // If relocation against MIPS local symbol requires GOT entry, this entry // should be initialized by 'page address'. This address is high 16-bits @@ -381,8 +455,6 @@ getRelocTargetVA(uint32_t Type, typename ELFT::uint A, typename ELFT::uint P, return In<ELFT>::MipsGot->getVA() + In<ELFT>::MipsGot->getBodyEntryOffset(Body, A) - In<ELFT>::MipsGot->getGp(); - case R_MIPS_GOTREL: - return Body.getVA<ELFT>(A) - In<ELFT>::MipsGot->getGp(); case R_MIPS_TLSGD: return In<ELFT>::MipsGot->getVA() + In<ELFT>::MipsGot->getTlsOffset() + In<ELFT>::MipsGot->getGlobalDynOffset(Body) - @@ -390,40 +462,82 @@ getRelocTargetVA(uint32_t Type, typename ELFT::uint A, typename ELFT::uint P, case R_MIPS_TLSLD: return In<ELFT>::MipsGot->getVA() + In<ELFT>::MipsGot->getTlsOffset() + In<ELFT>::MipsGot->getTlsIndexOff() - In<ELFT>::MipsGot->getGp(); + case R_PAGE_PC: + case R_PLT_PAGE_PC: + if (Body.isUndefined() && !Body.isLocal() && Body.symbol()->isWeak()) + return getAArch64Page(A); + return getAArch64Page(Body.getVA(A)) - getAArch64Page(P); + case R_PC: + if (Body.isUndefined() && !Body.isLocal() && Body.symbol()->isWeak()) { + // On ARM and AArch64 a branch to an undefined weak resolves to the + // next instruction, otherwise the place. + if (Config->EMachine == EM_ARM) + return getARMUndefinedRelativeWeakVA(Type, A, P); + if (Config->EMachine == EM_AARCH64) + return getAArch64UndefinedRelativeWeakVA(Type, A, P); + } + return Body.getVA(A) - P; + case R_PLT: + return Body.getPltVA() + A; + case R_PLT_PC: + case R_PPC_PLT_OPD: + return Body.getPltVA() + A - P; case R_PPC_OPD: { - uint64_t SymVA = Body.getVA<ELFT>(A); + uint64_t SymVA = Body.getVA(A); // If we have an undefined weak symbol, we might get here with a symbol // address of zero. That could overflow, but the code must be unreachable, // so don't bother doing anything at all. if (!SymVA) return 0; - if (Out<ELF64BE>::Opd) { + if (Out::Opd) { // If this is a local call, and we currently have the address of a // function-descriptor, get the underlying code address instead. - uint64_t OpdStart = Out<ELF64BE>::Opd->Addr; - uint64_t OpdEnd = OpdStart + Out<ELF64BE>::Opd->Size; + uint64_t OpdStart = Out::Opd->Addr; + uint64_t OpdEnd = OpdStart + Out::Opd->Size; bool InOpd = OpdStart <= SymVA && SymVA < OpdEnd; if (InOpd) - SymVA = read64be(&Out<ELF64BE>::OpdBuf[SymVA - OpdStart]); + SymVA = read64be(&Out::OpdBuf[SymVA - OpdStart]); } return SymVA - P; } - case R_PC: - if (Body.isUndefined() && !Body.isLocal() && Body.symbol()->isWeak()) { - // On ARM and AArch64 a branch to an undefined weak resolves to the - // next instruction, otherwise the place. - if (Config->EMachine == EM_ARM) - return getARMUndefinedRelativeWeakVA(Type, A, P); - if (Config->EMachine == EM_AARCH64) - return getAArch64UndefinedRelativeWeakVA(Type, A, P); - } + case R_PPC_TOC: + return getPPC64TocBase() + A; case R_RELAX_GOT_PC: - return Body.getVA<ELFT>(A) - P; - case R_PLT_PAGE_PC: - case R_PAGE_PC: - if (Body.isUndefined() && !Body.isLocal() && Body.symbol()->isWeak()) - return getAArch64Page(A); - return getAArch64Page(Body.getVA<ELFT>(A)) - getAArch64Page(P); + return Body.getVA(A) - P; + case R_RELAX_TLS_GD_TO_LE: + case R_RELAX_TLS_IE_TO_LE: + case R_RELAX_TLS_LD_TO_LE: + case R_TLS: + // A weak undefined TLS symbol resolves to the base of the TLS + // block, i.e. gets a value of zero. If we pass --gc-sections to + // lld and .tbss is not referenced, it gets reclaimed and we don't + // create a TLS program header. Therefore, we resolve this + // statically to zero. + if (Body.isTls() && (Body.isLazy() || Body.isUndefined()) && + Body.symbol()->isWeak()) + return 0; + if (Target->TcbSize) + return Body.getVA(A) + alignTo(Target->TcbSize, Out::TlsPhdr->p_align); + return Body.getVA(A) - Out::TlsPhdr->p_memsz; + case R_RELAX_TLS_GD_TO_LE_NEG: + case R_NEG_TLS: + return Out::TlsPhdr->p_memsz - Body.getVA(A); + case R_SIZE: + return Body.getSize<ELFT>() + A; + case R_TLSDESC: + return In<ELFT>::Got->getGlobalDynAddr(Body) + A; + case R_TLSDESC_PAGE: + return getAArch64Page(In<ELFT>::Got->getGlobalDynAddr(Body) + A) - + getAArch64Page(P); + case R_TLSGD: + return In<ELFT>::Got->getGlobalDynOffset(Body) + A - + In<ELFT>::Got->getSize(); + case R_TLSGD_PC: + return In<ELFT>::Got->getGlobalDynAddr(Body) + A - P; + case R_TLSLD: + return In<ELFT>::Got->getTlsIndexOff() + A - In<ELFT>::Got->getSize(); + case R_TLSLD_PC: + return In<ELFT>::Got->getTlsIndexVA() + A - P; } llvm_unreachable("Invalid expression"); } @@ -435,57 +549,62 @@ getRelocTargetVA(uint32_t Type, typename ELFT::uint A, typename ELFT::uint P, // treatement such as GOT or PLT (because at runtime no one refers them). // So, we handle relocations for non-alloc sections directly in this // function as a performance optimization. -template <class ELFT> -template <class RelTy> -void InputSection<ELFT>::relocateNonAlloc(uint8_t *Buf, ArrayRef<RelTy> Rels) { +template <class ELFT, class RelTy> +void InputSection::relocateNonAlloc(uint8_t *Buf, ArrayRef<RelTy> Rels) { for (const RelTy &Rel : Rels) { - uint32_t Type = Rel.getType(Config->Mips64EL); - uintX_t Offset = this->getOffset(Rel.r_offset); + uint32_t Type = Rel.getType(Config->IsMips64EL); + uint64_t Offset = getOffset(Rel.r_offset); uint8_t *BufLoc = Buf + Offset; - uintX_t Addend = getAddend<ELFT>(Rel); + int64_t Addend = getAddend<ELFT>(Rel); if (!RelTy::IsRela) Addend += Target->getImplicitAddend(BufLoc, Type); - SymbolBody &Sym = this->File->getRelocTargetSym(Rel); - if (Target->getRelExpr(Type, Sym) != R_ABS) { - error(this->getLocation(Offset) + ": has non-ABS reloc"); + SymbolBody &Sym = this->getFile<ELFT>()->getRelocTargetSym(Rel); + RelExpr Expr = Target->getRelExpr(Type, Sym, BufLoc); + if (Expr == R_NONE) + continue; + if (Expr != R_ABS) { + error(this->getLocation<ELFT>(Offset) + ": has non-ABS reloc"); return; } - uintX_t AddrLoc = this->OutSec->Addr + Offset; + uint64_t AddrLoc = this->OutSec->Addr + Offset; uint64_t SymVA = 0; - if (!Sym.isTls() || Out<ELFT>::TlsPhdr) - SymVA = SignExtend64<sizeof(uintX_t) * 8>( + if (!Sym.isTls() || Out::TlsPhdr) + SymVA = SignExtend64<sizeof(typename ELFT::uint) * 8>( getRelocTargetVA<ELFT>(Type, Addend, AddrLoc, Sym, R_ABS)); Target->relocateOne(BufLoc, Type, SymVA); } } +template <class ELFT> elf::ObjectFile<ELFT> *InputSectionBase::getFile() const { + return cast_or_null<elf::ObjectFile<ELFT>>(File); +} + template <class ELFT> -void InputSectionBase<ELFT>::relocate(uint8_t *Buf, uint8_t *BufEnd) { +void InputSectionBase::relocate(uint8_t *Buf, uint8_t *BufEnd) { // scanReloc function in Writer.cpp constructs Relocations // vector only for SHF_ALLOC'ed sections. For other sections, // we handle relocations directly here. - auto *IS = dyn_cast<InputSection<ELFT>>(this); + auto *IS = dyn_cast<InputSection>(this); if (IS && !(IS->Flags & SHF_ALLOC)) { if (IS->AreRelocsRela) - IS->relocateNonAlloc(Buf, IS->relas()); + IS->relocateNonAlloc<ELFT>(Buf, IS->template relas<ELFT>()); else - IS->relocateNonAlloc(Buf, IS->rels()); + IS->relocateNonAlloc<ELFT>(Buf, IS->template rels<ELFT>()); return; } - const unsigned Bits = sizeof(uintX_t) * 8; + const unsigned Bits = sizeof(typename ELFT::uint) * 8; for (const Relocation &Rel : Relocations) { - uintX_t Offset = getOffset(Rel.Offset); + uint64_t Offset = getOffset(Rel.Offset); uint8_t *BufLoc = Buf + Offset; uint32_t Type = Rel.Type; - uintX_t A = Rel.Addend; - uintX_t AddrLoc = OutSec->Addr + Offset; + uint64_t AddrLoc = getOutputSection()->Addr + Offset; RelExpr Expr = Rel.Expr; uint64_t TargetVA = SignExtend64<Bits>( - getRelocTargetVA<ELFT>(Type, A, AddrLoc, *Rel.Sym, Expr)); + getRelocTargetVA<ELFT>(Type, Rel.Addend, AddrLoc, *Rel.Sym, Expr)); switch (Expr) { case R_RELAX_GOT_PC: @@ -520,67 +639,54 @@ void InputSectionBase<ELFT>::relocate(uint8_t *Buf, uint8_t *BufEnd) { } } -template <class ELFT> void InputSection<ELFT>::writeTo(uint8_t *Buf) { +template <class ELFT> void InputSection::writeTo(uint8_t *Buf) { if (this->Type == SHT_NOBITS) return; - if (auto *S = dyn_cast<SyntheticSection<ELFT>>(this)) { + if (auto *S = dyn_cast<SyntheticSection>(this)) { S->writeTo(Buf + OutSecOff); return; } - // If -r is given, then an InputSection may be a relocation section. + // If -r or --emit-relocs is given, then an InputSection + // may be a relocation section. if (this->Type == SHT_RELA) { - copyRelocations(Buf + OutSecOff, this->template getDataAs<Elf_Rela>()); + copyRelocations<ELFT>(Buf + OutSecOff, + this->template getDataAs<typename ELFT::Rela>()); return; } if (this->Type == SHT_REL) { - copyRelocations(Buf + OutSecOff, this->template getDataAs<Elf_Rel>()); + copyRelocations<ELFT>(Buf + OutSecOff, + this->template getDataAs<typename ELFT::Rel>()); return; } - // Copy section contents from source object file to output file. - ArrayRef<uint8_t> Data = this->Data; + // Copy section contents from source object file to output file + // and then apply relocations. memcpy(Buf + OutSecOff, Data.data(), Data.size()); - - // Iterate over all relocation sections that apply to this section. uint8_t *BufEnd = Buf + OutSecOff + Data.size(); - this->relocate(Buf, BufEnd); - - // The section might have a data/code generated by the linker and need - // to be written after the section. Usually these are thunks - small piece - // of code used to jump between "incompatible" functions like PIC and non-PIC - // or if the jump target too far and its address does not fit to the short - // jump istruction. - if (!Thunks.empty()) { - Buf += OutSecOff + getThunkOff(); - for (const Thunk<ELFT> *T : Thunks) { - T->writeTo(Buf); - Buf += T->size(); - } - } + this->relocate<ELFT>(Buf, BufEnd); } -template <class ELFT> -void InputSection<ELFT>::replace(InputSection<ELFT> *Other) { +void InputSection::replace(InputSection *Other) { this->Alignment = std::max(this->Alignment, Other->Alignment); Other->Repl = this->Repl; Other->Live = false; } template <class ELFT> -EhInputSection<ELFT>::EhInputSection(elf::ObjectFile<ELFT> *F, - const Elf_Shdr *Header, StringRef Name) - : InputSectionBase<ELFT>(F, Header, Name, InputSectionBase<ELFT>::EHFrame) { +EhInputSection::EhInputSection(elf::ObjectFile<ELFT> *F, + const typename ELFT::Shdr *Header, + StringRef Name) + : InputSectionBase(F, Header, Name, InputSectionBase::EHFrame) { // Mark .eh_frame sections as live by default because there are // usually no relocations that point to .eh_frames. Otherwise, // the garbage collector would drop all .eh_frame sections. this->Live = true; } -template <class ELFT> -bool EhInputSection<ELFT>::classof(const InputSectionData *S) { - return S->kind() == InputSectionBase<ELFT>::EHFrame; +bool EhInputSection::classof(const SectionBase *S) { + return S->kind() == InputSectionBase::EHFrame; } // Returns the index of the first relocation that points to a region between @@ -604,24 +710,23 @@ static unsigned getReloc(IntTy Begin, IntTy Size, const ArrayRef<RelTy> &Rels, // .eh_frame is a sequence of CIE or FDE records. // This function splits an input section into records and returns them. -template <class ELFT> void EhInputSection<ELFT>::split() { +template <class ELFT> void EhInputSection::split() { // Early exit if already split. if (!this->Pieces.empty()) return; if (this->NumRelocations) { if (this->AreRelocsRela) - split(this->relas()); + split<ELFT>(this->relas<ELFT>()); else - split(this->rels()); + split<ELFT>(this->rels<ELFT>()); return; } - split(makeArrayRef<typename ELFT::Rela>(nullptr, nullptr)); + split<ELFT>(makeArrayRef<typename ELFT::Rela>(nullptr, nullptr)); } -template <class ELFT> -template <class RelTy> -void EhInputSection<ELFT>::split(ArrayRef<RelTy> Rels) { +template <class ELFT, class RelTy> +void EhInputSection::split(ArrayRef<RelTy> Rels) { ArrayRef<uint8_t> Data = this->Data; unsigned RelI = 0; for (size_t Off = 0, End = Data.size(); Off != End;) { @@ -650,9 +755,7 @@ static size_t findNull(ArrayRef<uint8_t> A, size_t EntSize) { // Split SHF_STRINGS section. Such section is a sequence of // null-terminated strings. -template <class ELFT> -void MergeInputSection<ELFT>::splitStrings(ArrayRef<uint8_t> Data, - size_t EntSize) { +void MergeInputSection::splitStrings(ArrayRef<uint8_t> Data, size_t EntSize) { size_t Off = 0; bool IsAlloc = this->Flags & SHF_ALLOC; while (!Data.empty()) { @@ -669,9 +772,8 @@ void MergeInputSection<ELFT>::splitStrings(ArrayRef<uint8_t> Data, // Split non-SHF_STRINGS section. Such section is a sequence of // fixed size records. -template <class ELFT> -void MergeInputSection<ELFT>::splitNonStrings(ArrayRef<uint8_t> Data, - size_t EntSize) { +void MergeInputSection::splitNonStrings(ArrayRef<uint8_t> Data, + size_t EntSize) { size_t Size = Data.size(); assert((Size % EntSize) == 0); bool IsAlloc = this->Flags & SHF_ALLOC; @@ -682,10 +784,10 @@ void MergeInputSection<ELFT>::splitNonStrings(ArrayRef<uint8_t> Data, } template <class ELFT> -MergeInputSection<ELFT>::MergeInputSection(elf::ObjectFile<ELFT> *F, - const Elf_Shdr *Header, - StringRef Name) - : InputSectionBase<ELFT>(F, Header, Name, InputSectionBase<ELFT>::Merge) {} +MergeInputSection::MergeInputSection(elf::ObjectFile<ELFT> *F, + const typename ELFT::Shdr *Header, + StringRef Name) + : InputSectionBase(F, Header, Name, InputSectionBase::Merge) {} // This function is called after we obtain a complete list of input sections // that need to be linked. This is responsible to split section contents @@ -693,28 +795,26 @@ MergeInputSection<ELFT>::MergeInputSection(elf::ObjectFile<ELFT> *F, // // Note that this function is called from parallel_for_each. This must be // thread-safe (i.e. no memory allocation from the pools). -template <class ELFT> void MergeInputSection<ELFT>::splitIntoPieces() { +void MergeInputSection::splitIntoPieces() { ArrayRef<uint8_t> Data = this->Data; - uintX_t EntSize = this->Entsize; + uint64_t EntSize = this->Entsize; if (this->Flags & SHF_STRINGS) splitStrings(Data, EntSize); else splitNonStrings(Data, EntSize); if (Config->GcSections && (this->Flags & SHF_ALLOC)) - for (uintX_t Off : LiveOffsets) + for (uint64_t Off : LiveOffsets) this->getSectionPiece(Off)->Live = true; } -template <class ELFT> -bool MergeInputSection<ELFT>::classof(const InputSectionData *S) { - return S->kind() == InputSectionBase<ELFT>::Merge; +bool MergeInputSection::classof(const SectionBase *S) { + return S->kind() == InputSectionBase::Merge; } // Do binary search to get a section piece at a given input offset. -template <class ELFT> -SectionPiece *MergeInputSection<ELFT>::getSectionPiece(uintX_t Offset) { - auto *This = static_cast<const MergeInputSection<ELFT> *>(this); +SectionPiece *MergeInputSection::getSectionPiece(uint64_t Offset) { + auto *This = static_cast<const MergeInputSection *>(this); return const_cast<SectionPiece *>(This->getSectionPiece(Offset)); } @@ -731,17 +831,15 @@ static It fastUpperBound(It First, It Last, const T &Value, Compare Comp) { return Comp(Value, *First) ? First : First + 1; } -template <class ELFT> -const SectionPiece * -MergeInputSection<ELFT>::getSectionPiece(uintX_t Offset) const { - uintX_t Size = this->Data.size(); +const SectionPiece *MergeInputSection::getSectionPiece(uint64_t Offset) const { + uint64_t Size = this->Data.size(); if (Offset >= Size) fatal(toString(this) + ": entry is past the end of the section"); // Find the element this offset points to. auto I = fastUpperBound( Pieces.begin(), Pieces.end(), Offset, - [](const uintX_t &A, const SectionPiece &B) { return A < B.InputOff; }); + [](const uint64_t &A, const SectionPiece &B) { return A < B.InputOff; }); --I; return &*I; } @@ -749,8 +847,7 @@ MergeInputSection<ELFT>::getSectionPiece(uintX_t Offset) const { // Returns the offset in an output section for a given input offset. // Because contents of a mergeable section is not contiguous in output, // it is not just an addition to a base output offset. -template <class ELFT> -typename ELFT::uint MergeInputSection<ELFT>::getOffset(uintX_t Offset) const { +uint64_t MergeInputSection::getOffset(uint64_t Offset) const { // Initialize OffsetMap lazily. std::call_once(InitOffsetMap, [&] { OffsetMap.reserve(Pieces.size()); @@ -772,31 +869,63 @@ typename ELFT::uint MergeInputSection<ELFT>::getOffset(uintX_t Offset) const { if (!Piece.Live) return 0; - uintX_t Addend = Offset - Piece.InputOff; + uint64_t Addend = Offset - Piece.InputOff; return Piece.OutputOff + Addend; } -template class elf::InputSectionBase<ELF32LE>; -template class elf::InputSectionBase<ELF32BE>; -template class elf::InputSectionBase<ELF64LE>; -template class elf::InputSectionBase<ELF64BE>; - -template class elf::InputSection<ELF32LE>; -template class elf::InputSection<ELF32BE>; -template class elf::InputSection<ELF64LE>; -template class elf::InputSection<ELF64BE>; - -template class elf::EhInputSection<ELF32LE>; -template class elf::EhInputSection<ELF32BE>; -template class elf::EhInputSection<ELF64LE>; -template class elf::EhInputSection<ELF64BE>; - -template class elf::MergeInputSection<ELF32LE>; -template class elf::MergeInputSection<ELF32BE>; -template class elf::MergeInputSection<ELF64LE>; -template class elf::MergeInputSection<ELF64BE>; - -template std::string lld::toString(const InputSectionBase<ELF32LE> *); -template std::string lld::toString(const InputSectionBase<ELF32BE> *); -template std::string lld::toString(const InputSectionBase<ELF64LE> *); -template std::string lld::toString(const InputSectionBase<ELF64BE> *); +template InputSection::InputSection(elf::ObjectFile<ELF32LE> *, + const ELF32LE::Shdr *, StringRef); +template InputSection::InputSection(elf::ObjectFile<ELF32BE> *, + const ELF32BE::Shdr *, StringRef); +template InputSection::InputSection(elf::ObjectFile<ELF64LE> *, + const ELF64LE::Shdr *, StringRef); +template InputSection::InputSection(elf::ObjectFile<ELF64BE> *, + const ELF64BE::Shdr *, StringRef); + +template std::string InputSectionBase::getLocation<ELF32LE>(uint64_t); +template std::string InputSectionBase::getLocation<ELF32BE>(uint64_t); +template std::string InputSectionBase::getLocation<ELF64LE>(uint64_t); +template std::string InputSectionBase::getLocation<ELF64BE>(uint64_t); + +template std::string InputSectionBase::getSrcMsg<ELF32LE>(uint64_t); +template std::string InputSectionBase::getSrcMsg<ELF32BE>(uint64_t); +template std::string InputSectionBase::getSrcMsg<ELF64LE>(uint64_t); +template std::string InputSectionBase::getSrcMsg<ELF64BE>(uint64_t); + +template std::string InputSectionBase::getObjMsg<ELF32LE>(uint64_t); +template std::string InputSectionBase::getObjMsg<ELF32BE>(uint64_t); +template std::string InputSectionBase::getObjMsg<ELF64LE>(uint64_t); +template std::string InputSectionBase::getObjMsg<ELF64BE>(uint64_t); + +template void InputSection::writeTo<ELF32LE>(uint8_t *); +template void InputSection::writeTo<ELF32BE>(uint8_t *); +template void InputSection::writeTo<ELF64LE>(uint8_t *); +template void InputSection::writeTo<ELF64BE>(uint8_t *); + +template elf::ObjectFile<ELF32LE> *InputSectionBase::getFile<ELF32LE>() const; +template elf::ObjectFile<ELF32BE> *InputSectionBase::getFile<ELF32BE>() const; +template elf::ObjectFile<ELF64LE> *InputSectionBase::getFile<ELF64LE>() const; +template elf::ObjectFile<ELF64BE> *InputSectionBase::getFile<ELF64BE>() const; + +template MergeInputSection::MergeInputSection(elf::ObjectFile<ELF32LE> *, + const ELF32LE::Shdr *, StringRef); +template MergeInputSection::MergeInputSection(elf::ObjectFile<ELF32BE> *, + const ELF32BE::Shdr *, StringRef); +template MergeInputSection::MergeInputSection(elf::ObjectFile<ELF64LE> *, + const ELF64LE::Shdr *, StringRef); +template MergeInputSection::MergeInputSection(elf::ObjectFile<ELF64BE> *, + const ELF64BE::Shdr *, StringRef); + +template EhInputSection::EhInputSection(elf::ObjectFile<ELF32LE> *, + const ELF32LE::Shdr *, StringRef); +template EhInputSection::EhInputSection(elf::ObjectFile<ELF32BE> *, + const ELF32BE::Shdr *, StringRef); +template EhInputSection::EhInputSection(elf::ObjectFile<ELF64LE> *, + const ELF64LE::Shdr *, StringRef); +template EhInputSection::EhInputSection(elf::ObjectFile<ELF64BE> *, + const ELF64BE::Shdr *, StringRef); + +template void EhInputSection::split<ELF32LE>(); +template void EhInputSection::split<ELF32BE>(); +template void EhInputSection::split<ELF64LE>(); +template void EhInputSection::split<ELF64BE>(); diff --git a/ELF/InputSection.h b/ELF/InputSection.h index 3f3a055dcc33..57458588b690 100644 --- a/ELF/InputSection.h +++ b/ELF/InputSection.h @@ -27,97 +27,115 @@ class DefinedCommon; class SymbolBody; struct SectionPiece; -template <class ELFT> class DefinedRegular; +class DefinedRegular; +class SyntheticSection; +template <class ELFT> class EhFrameSection; +class MergeSyntheticSection; template <class ELFT> class ObjectFile; -template <class ELFT> class OutputSection; -class OutputSectionBase; - -// We need non-template input section class to store symbol layout -// in linker script parser structures, where we do not have ELFT -// template parameter. For each scripted output section symbol we -// store pointer to preceding InputSectionData object or nullptr, -// if symbol should be placed at the very beginning of the output -// section -class InputSectionData { +class OutputSection; + +// This is the base class of all sections that lld handles. Some are sections in +// input files, some are sections in the produced output file and some exist +// just as a convenience for implementing special ways of combining some +// sections. +class SectionBase { public: - enum Kind { Regular, EHFrame, Merge, Synthetic, }; + enum Kind { Regular, EHFrame, Merge, Synthetic, Output }; - // The garbage collector sets sections' Live bits. - // If GC is disabled, all sections are considered live by default. - InputSectionData(Kind SectionKind, StringRef Name, ArrayRef<uint8_t> Data, - bool Live) - : SectionKind(SectionKind), Live(Live), Assigned(false), Name(Name), - Data(Data) {} + Kind kind() const { return (Kind)SectionKind; } + + StringRef Name; -private: unsigned SectionKind : 3; -public: - Kind kind() const { return (Kind)SectionKind; } + // The next two bit fields are only used by InputSectionBase, but we + // put them here so the struct packs better. + + // The garbage collector sets sections' Live bits. + // If GC is disabled, all sections are considered live by default. + unsigned Live : 1; // for garbage collection + unsigned Assigned : 1; // for linker script - unsigned Live : 1; // for garbage collection - unsigned Assigned : 1; // for linker script uint32_t Alignment; - StringRef Name; - ArrayRef<uint8_t> Data; - template <typename T> llvm::ArrayRef<T> getDataAs() const { - size_t S = Data.size(); - assert(S % sizeof(T) == 0); - return llvm::makeArrayRef<T>((const T *)Data.data(), S / sizeof(T)); + // These corresponds to the fields in Elf_Shdr. + uint64_t Flags; + uint64_t Entsize; + uint32_t Type; + uint32_t Link; + uint32_t Info; + + OutputSection *getOutputSection(); + const OutputSection *getOutputSection() const { + return const_cast<SectionBase *>(this)->getOutputSection(); } - std::vector<Relocation> Relocations; + // Translate an offset in the input section to an offset in the output + // section. + uint64_t getOffset(uint64_t Offset) const; + + uint64_t getOffset(const DefinedRegular &Sym) const; + +protected: + SectionBase(Kind SectionKind, StringRef Name, uint64_t Flags, + uint64_t Entsize, uint64_t Alignment, uint32_t Type, + uint32_t Info, uint32_t Link) + : Name(Name), SectionKind(SectionKind), Alignment(Alignment), + Flags(Flags), Entsize(Entsize), Type(Type), Link(Link), Info(Info) { + Live = false; + Assigned = false; + } }; // This corresponds to a section of an input file. -template <class ELFT> class InputSectionBase : public InputSectionData { -protected: - typedef typename ELFT::Chdr Elf_Chdr; - typedef typename ELFT::Rel Elf_Rel; - typedef typename ELFT::Rela Elf_Rela; - typedef typename ELFT::Shdr Elf_Shdr; - typedef typename ELFT::Sym Elf_Sym; - typedef typename ELFT::uint uintX_t; +class InputSectionBase : public SectionBase { +public: + static bool classof(const SectionBase *S); // The file this section is from. - ObjectFile<ELFT> *File; + InputFile *File; -public: - // These corresponds to the fields in Elf_Shdr. - uintX_t Flags; - uintX_t Offset = 0; - uintX_t Entsize; - uint32_t Type; - uint32_t Link; - uint32_t Info; + ArrayRef<uint8_t> Data; + uint64_t getOffsetInFile() const; + + static InputSectionBase Discarded; InputSectionBase() - : InputSectionData(Regular, "", ArrayRef<uint8_t>(), false), Repl(this) { + : SectionBase(Regular, "", /*Flags*/ 0, /*Entsize*/ 0, /*Alignment*/ 0, + /*Type*/ 0, + /*Info*/ 0, /*Link*/ 0), + Repl(this) { + Live = false; + Assigned = false; NumRelocations = 0; AreRelocsRela = false; } - InputSectionBase(ObjectFile<ELFT> *File, const Elf_Shdr *Header, + template <class ELFT> + InputSectionBase(ObjectFile<ELFT> *File, const typename ELFT::Shdr *Header, StringRef Name, Kind SectionKind); - InputSectionBase(ObjectFile<ELFT> *File, uintX_t Flags, uint32_t Type, - uintX_t Entsize, uint32_t Link, uint32_t Info, - uintX_t Addralign, ArrayRef<uint8_t> Data, StringRef Name, + + InputSectionBase(InputFile *File, uint64_t Flags, uint32_t Type, + uint64_t Entsize, uint32_t Link, uint32_t Info, + uint32_t Alignment, ArrayRef<uint8_t> Data, StringRef Name, Kind SectionKind); - OutputSectionBase *OutSec = nullptr; + OutputSection *OutSec = nullptr; // Relocations that refer to this section. - const Elf_Rel *FirstRelocation = nullptr; + const void *FirstRelocation = nullptr; unsigned NumRelocations : 31; unsigned AreRelocsRela : 1; - ArrayRef<Elf_Rel> rels() const { + template <class ELFT> ArrayRef<typename ELFT::Rel> rels() const { assert(!AreRelocsRela); - return llvm::makeArrayRef(FirstRelocation, NumRelocations); + return llvm::makeArrayRef( + static_cast<const typename ELFT::Rel *>(FirstRelocation), + NumRelocations); } - ArrayRef<Elf_Rela> relas() const { + template <class ELFT> ArrayRef<typename ELFT::Rela> relas() const { assert(AreRelocsRela); - return llvm::makeArrayRef(static_cast<const Elf_Rela *>(FirstRelocation), - NumRelocations); + return llvm::makeArrayRef( + static_cast<const typename ELFT::Rela *>(FirstRelocation), + NumRelocations); } // This pointer points to the "real" instance of this instance. @@ -125,25 +143,38 @@ public: // Repl pointer of one section points to another section. So, // if you need to get a pointer to this instance, do not use // this but instead this->Repl. - InputSectionBase<ELFT> *Repl; + InputSectionBase *Repl; + + // InputSections that are dependent on us (reverse dependency for GC) + llvm::TinyPtrVector<InputSectionBase *> DependentSections; // Returns the size of this section (even if this is a common or BSS.) size_t getSize() const; - ObjectFile<ELFT> *getFile() const { return File; } - llvm::object::ELFFile<ELFT> getObj() const { return File->getObj(); } - uintX_t getOffset(const DefinedRegular<ELFT> &Sym) const; + template <class ELFT> ObjectFile<ELFT> *getFile() const; + + template <class ELFT> llvm::object::ELFFile<ELFT> getObj() const { + return getFile<ELFT>()->getObj(); + } + InputSectionBase *getLinkOrderDep() const; - // Translate an offset in the input section to an offset in the output - // section. - uintX_t getOffset(uintX_t Offset) const; void uncompress(); // Returns a source location string. Used to construct an error message. - std::string getLocation(uintX_t Offset); + template <class ELFT> std::string getLocation(uint64_t Offset); + template <class ELFT> std::string getSrcMsg(uint64_t Offset); + template <class ELFT> std::string getObjMsg(uint64_t Offset); + + template <class ELFT> void relocate(uint8_t *Buf, uint8_t *BufEnd); + + std::vector<Relocation> Relocations; - void relocate(uint8_t *Buf, uint8_t *BufEnd); + template <typename T> llvm::ArrayRef<T> getDataAs() const { + size_t S = Data.size(); + assert(S % sizeof(T) == 0); + return llvm::makeArrayRef<T>((const T *)Data.data(), S / sizeof(T)); + } }; // SectionPiece represents a piece of splittable section contents. @@ -162,26 +193,23 @@ static_assert(sizeof(SectionPiece) == 2 * sizeof(size_t), "SectionPiece is too big"); // This corresponds to a SHF_MERGE section of an input file. -template <class ELFT> class MergeInputSection : public InputSectionBase<ELFT> { - typedef typename ELFT::uint uintX_t; - typedef typename ELFT::Sym Elf_Sym; - typedef typename ELFT::Shdr Elf_Shdr; - +class MergeInputSection : public InputSectionBase { public: - MergeInputSection(ObjectFile<ELFT> *F, const Elf_Shdr *Header, + template <class ELFT> + MergeInputSection(ObjectFile<ELFT> *F, const typename ELFT::Shdr *Header, StringRef Name); - static bool classof(const InputSectionData *S); + static bool classof(const SectionBase *S); void splitIntoPieces(); // Mark the piece at a given offset live. Used by GC. - void markLiveAt(uintX_t Offset) { + void markLiveAt(uint64_t Offset) { assert(this->Flags & llvm::ELF::SHF_ALLOC); LiveOffsets.insert(Offset); } // Translate an offset in the input section to an offset // in the output section. - uintX_t getOffset(uintX_t Offset) const; + uint64_t getOffset(uint64_t Offset) const; // Splittable sections are handled as a sequence of data // rather than a single large blob of data. @@ -203,8 +231,13 @@ public: } // Returns the SectionPiece at a given input section offset. - SectionPiece *getSectionPiece(uintX_t Offset); - const SectionPiece *getSectionPiece(uintX_t Offset) const; + SectionPiece *getSectionPiece(uint64_t Offset); + const SectionPiece *getSectionPiece(uint64_t Offset) const; + + // MergeInputSections are aggregated to a synthetic input sections, + // and then added to an OutputSection. This pointer points to a + // synthetic MergeSyntheticSection which this section belongs to. + MergeSyntheticSection *MergeSec = nullptr; private: void splitStrings(ArrayRef<uint8_t> A, size_t Size); @@ -212,18 +245,18 @@ private: std::vector<uint32_t> Hashes; - mutable llvm::DenseMap<uintX_t, uintX_t> OffsetMap; + mutable llvm::DenseMap<uint64_t, uint64_t> OffsetMap; mutable std::once_flag InitOffsetMap; - llvm::DenseSet<uintX_t> LiveOffsets; + llvm::DenseSet<uint64_t> LiveOffsets; }; struct EhSectionPiece : public SectionPiece { - EhSectionPiece(size_t Off, InputSectionData *ID, uint32_t Size, + EhSectionPiece(size_t Off, InputSectionBase *ID, uint32_t Size, unsigned FirstRelocation) : SectionPiece(Off, false), ID(ID), Size(Size), FirstRelocation(FirstRelocation) {} - InputSectionData *ID; + InputSectionBase *ID; uint32_t Size; uint32_t size() const { return Size; } @@ -232,85 +265,65 @@ struct EhSectionPiece : public SectionPiece { }; // This corresponds to a .eh_frame section of an input file. -template <class ELFT> class EhInputSection : public InputSectionBase<ELFT> { +class EhInputSection : public InputSectionBase { public: - typedef typename ELFT::Shdr Elf_Shdr; - typedef typename ELFT::uint uintX_t; - EhInputSection(ObjectFile<ELFT> *F, const Elf_Shdr *Header, StringRef Name); - static bool classof(const InputSectionData *S); - void split(); - template <class RelTy> void split(ArrayRef<RelTy> Rels); + template <class ELFT> + EhInputSection(ObjectFile<ELFT> *F, const typename ELFT::Shdr *Header, + StringRef Name); + static bool classof(const SectionBase *S); + template <class ELFT> void split(); + template <class ELFT, class RelTy> void split(ArrayRef<RelTy> Rels); // Splittable sections are handled as a sequence of data // rather than a single large blob of data. std::vector<EhSectionPiece> Pieces; + SyntheticSection *EHSec = nullptr; }; -// This corresponds to a non SHF_MERGE section of an input file. -template <class ELFT> class InputSection : public InputSectionBase<ELFT> { - typedef InputSectionBase<ELFT> Base; - typedef typename ELFT::Shdr Elf_Shdr; - typedef typename ELFT::Rela Elf_Rela; - typedef typename ELFT::Rel Elf_Rel; - typedef typename ELFT::Sym Elf_Sym; - typedef typename ELFT::uint uintX_t; - typedef InputSectionData::Kind Kind; - +// This is a section that is added directly to an output section +// instead of needing special combination via a synthetic section. This +// includes all input sections with the exceptions of SHF_MERGE and +// .eh_frame. It also includes the synthetic sections themselves. +class InputSection : public InputSectionBase { public: - InputSection(); - InputSection(uintX_t Flags, uint32_t Type, uintX_t Addralign, - ArrayRef<uint8_t> Data, StringRef Name, - Kind K = InputSectionData::Regular); - InputSection(ObjectFile<ELFT> *F, const Elf_Shdr *Header, StringRef Name); - - static InputSection<ELFT> Discarded; + InputSection(uint64_t Flags, uint32_t Type, uint32_t Alignment, + ArrayRef<uint8_t> Data, StringRef Name, Kind K = Regular); + template <class ELFT> + InputSection(ObjectFile<ELFT> *F, const typename ELFT::Shdr *Header, + StringRef Name); // Write this section to a mmap'ed file, assuming Buf is pointing to // beginning of the output section. - void writeTo(uint8_t *Buf); + template <class ELFT> void writeTo(uint8_t *Buf); // The offset from beginning of the output sections this section was assigned // to. The writer sets a value. uint64_t OutSecOff = 0; - // InputSection that is dependent on us (reverse dependency for GC) - InputSectionBase<ELFT> *DependentSection = nullptr; - - static bool classof(const InputSectionData *S); - - InputSectionBase<ELFT> *getRelocatedSection(); + static bool classof(const SectionBase *S); - // Register thunk related to the symbol. When the section is written - // to a mmap'ed file, target is requested to write an actual thunk code. - // Now thunks is supported for MIPS and ARM target only. - void addThunk(const Thunk<ELFT> *T); + InputSectionBase *getRelocatedSection(); - // The offset of synthetic thunk code from beginning of this section. - uint64_t getThunkOff() const; - - // Size of chunk with thunks code. - uint64_t getThunksSize() const; - - template <class RelTy> + template <class ELFT, class RelTy> void relocateNonAlloc(uint8_t *Buf, llvm::ArrayRef<RelTy> Rels); // Used by ICF. uint32_t Class[2] = {0, 0}; // Called by ICF to merge two input sections. - void replace(InputSection<ELFT> *Other); + void replace(InputSection *Other); private: - template <class RelTy> + template <class ELFT, class RelTy> void copyRelocations(uint8_t *Buf, llvm::ArrayRef<RelTy> Rels); - - llvm::TinyPtrVector<const Thunk<ELFT> *> Thunks; }; -template <class ELFT> InputSection<ELFT> InputSection<ELFT>::Discarded; +// The list of all input sections. +extern std::vector<InputSectionBase *> InputSections; + } // namespace elf -template <class ELFT> std::string toString(const elf::InputSectionBase<ELFT> *); +std::string toString(const elf::InputSectionBase *); } // namespace lld #endif diff --git a/ELF/LTO.cpp b/ELF/LTO.cpp index b342b6195f1d..dd435173101a 100644 --- a/ELF/LTO.cpp +++ b/ELF/LTO.cpp @@ -12,12 +12,13 @@ #include "Error.h" #include "InputFiles.h" #include "Symbols.h" +#include "lld/Core/TargetOptionsCommandFlags.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" -#include "llvm/CodeGen/CommandFlags.h" #include "llvm/IR/DiagnosticPrinter.h" +#include "llvm/LTO/Caching.h" #include "llvm/LTO/Config.h" #include "llvm/LTO/LTO.h" #include "llvm/Object/SymbolicFile.h" @@ -46,7 +47,7 @@ static void saveBuffer(StringRef Buffer, const Twine &Path) { std::error_code EC; raw_fd_ostream OS(Path.str(), EC, sys::fs::OpenFlags::F_None); if (EC) - error(EC, "cannot create " + Path); + error("cannot create " + Path + ": " + EC.message()); OS << Buffer; } @@ -73,6 +74,7 @@ static std::unique_ptr<lto::LTO> createLTO() { Conf.Options.RelaxELFRelocations = true; Conf.RelocModel = Config->Pic ? Reloc::PIC_ : Reloc::Static; + Conf.CodeModel = GetCodeModelFromCMModel(); Conf.DisableVerify = Config->DisableVerify; Conf.DiagHandler = diagnosticHandler; Conf.OptLevel = Config->LTOO; @@ -81,6 +83,10 @@ static std::unique_ptr<lto::LTO> createLTO() { Conf.OptPipeline = Config->LTONewPmPasses; Conf.AAPipeline = Config->LTOAAPipeline; + // Set up optimization remarks if we've been asked to. + Conf.RemarksFilename = Config->OptRemarksFilename; + Conf.RemarksWithHotness = Config->OptRemarksWithHotness; + if (Config->SaveTemps) checkError(Conf.addSaveTemps(std::string(Config->OutputFile) + ".", /*UseInputModulePath*/ true)); @@ -96,12 +102,12 @@ BitcodeCompiler::BitcodeCompiler() : LTOObj(createLTO()) {} BitcodeCompiler::~BitcodeCompiler() = default; -template <class ELFT> static void undefine(Symbol *S) { - replaceBody<Undefined<ELFT>>(S, S->body()->getName(), /*IsLocal=*/false, - STV_DEFAULT, S->body()->Type, nullptr); +static void undefine(Symbol *S) { + replaceBody<Undefined>(S, S->body()->getName(), /*IsLocal=*/false, + STV_DEFAULT, S->body()->Type, nullptr); } -template <class ELFT> void BitcodeCompiler::add(BitcodeFile &F) { +void BitcodeCompiler::add(BitcodeFile &F) { lto::InputFile &Obj = *F.Obj; unsigned SymNum = 0; std::vector<Symbol *> Syms = F.getSymbols(); @@ -119,14 +125,12 @@ template <class ELFT> void BitcodeCompiler::add(BitcodeFile &F) { // flags an undefined in IR with a definition in ASM as prevailing. // Once IRObjectFile is fixed to report only one symbol this hack can // be removed. - R.Prevailing = - !(ObjSym.getFlags() & object::BasicSymbolRef::SF_Undefined) && - B->File == &F; + R.Prevailing = !ObjSym.isUndefined() && B->File == &F; R.VisibleToRegularObj = Sym->IsUsedInRegularObj || (R.Prevailing && Sym->includeInDynsym()); if (R.Prevailing) - undefine<ELFT>(Sym); + undefine(Sym); } checkError(LTOObj->add(std::move(F.Obj), Resols)); } @@ -137,17 +141,34 @@ std::vector<InputFile *> BitcodeCompiler::compile() { std::vector<InputFile *> Ret; unsigned MaxTasks = LTOObj->getMaxTasks(); Buff.resize(MaxTasks); - - checkError(LTOObj->run([&](size_t Task) { - return llvm::make_unique<lto::NativeObjectStream>( - llvm::make_unique<raw_svector_ostream>(Buff[Task])); - })); + Files.resize(MaxTasks); + + // The --thinlto-cache-dir option specifies the path to a directory in which + // to cache native object files for ThinLTO incremental builds. If a path was + // specified, configure LTO to use it as the cache directory. + lto::NativeObjectCache Cache; + if (!Config->ThinLTOCacheDir.empty()) + Cache = check( + lto::localCache(Config->ThinLTOCacheDir, + [&](size_t Task, std::unique_ptr<MemoryBuffer> MB) { + Files[Task] = std::move(MB); + })); + + checkError(LTOObj->run( + [&](size_t Task) { + return llvm::make_unique<lto::NativeObjectStream>( + llvm::make_unique<raw_svector_ostream>(Buff[Task])); + }, + Cache)); + + if (!Config->ThinLTOCacheDir.empty()) + pruneCache(Config->ThinLTOCacheDir, Config->ThinLTOCachePolicy); for (unsigned I = 0; I != MaxTasks; ++I) { if (Buff[I].empty()) continue; if (Config->SaveTemps) { - if (MaxTasks == 1) + if (I == 0) saveBuffer(Buff[I], Config->OutputFile + ".lto.o"); else saveBuffer(Buff[I], Config->OutputFile + Twine(I) + ".lto.o"); @@ -155,10 +176,10 @@ std::vector<InputFile *> BitcodeCompiler::compile() { InputFile *Obj = createObjectFile(MemoryBufferRef(Buff[I], "lto.tmp")); Ret.push_back(Obj); } + + for (std::unique_ptr<MemoryBuffer> &File : Files) + if (File) + Ret.push_back(createObjectFile(*File)); + return Ret; } - -template void BitcodeCompiler::template add<ELF32LE>(BitcodeFile &); -template void BitcodeCompiler::template add<ELF32BE>(BitcodeFile &); -template void BitcodeCompiler::template add<ELF64LE>(BitcodeFile &); -template void BitcodeCompiler::template add<ELF64BE>(BitcodeFile &); diff --git a/ELF/LTO.h b/ELF/LTO.h index 3cb763650e1c..28afa0e83add 100644 --- a/ELF/LTO.h +++ b/ELF/LTO.h @@ -43,12 +43,13 @@ public: BitcodeCompiler(); ~BitcodeCompiler(); - template <class ELFT> void add(BitcodeFile &F); + void add(BitcodeFile &F); std::vector<InputFile *> compile(); private: std::unique_ptr<llvm::lto::LTO> LTOObj; std::vector<SmallString<0>> Buff; + std::vector<std::unique_ptr<MemoryBuffer>> Files; }; } } diff --git a/ELF/LinkerScript.cpp b/ELF/LinkerScript.cpp index 3cc235386b88..ab2ca22e9e17 100644 --- a/ELF/LinkerScript.cpp +++ b/ELF/LinkerScript.cpp @@ -13,27 +13,21 @@ #include "LinkerScript.h" #include "Config.h" -#include "Driver.h" #include "InputSection.h" #include "Memory.h" #include "OutputSections.h" -#include "ScriptParser.h" #include "Strings.h" #include "SymbolTable.h" #include "Symbols.h" #include "SyntheticSections.h" -#include "Target.h" #include "Writer.h" #include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringRef.h" -#include "llvm/ADT/StringSwitch.h" #include "llvm/Support/Casting.h" #include "llvm/Support/ELF.h" #include "llvm/Support/Endian.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FileSystem.h" -#include "llvm/Support/MathExtras.h" #include "llvm/Support/Path.h" #include <algorithm> #include <cassert> @@ -41,9 +35,7 @@ #include <cstdint> #include <iterator> #include <limits> -#include <memory> #include <string> -#include <tuple> #include <vector> using namespace llvm; @@ -53,77 +45,145 @@ using namespace llvm::support::endian; using namespace lld; using namespace lld::elf; -LinkerScriptBase *elf::ScriptBase; -ScriptConfiguration *elf::ScriptConfig; +LinkerScript *elf::Script; + +uint64_t ExprValue::getValue() const { + if (Sec) + return Sec->getOffset(Val) + Sec->getOutputSection()->Addr; + return Val; +} + +uint64_t ExprValue::getSecAddr() const { + if (Sec) + return Sec->getOffset(0) + Sec->getOutputSection()->Addr; + return 0; +} template <class ELFT> static SymbolBody *addRegular(SymbolAssignment *Cmd) { + Symbol *Sym; uint8_t Visibility = Cmd->Hidden ? STV_HIDDEN : STV_DEFAULT; - Symbol *Sym = Symtab<ELFT>::X->addUndefined( - Cmd->Name, /*IsLocal=*/false, STB_GLOBAL, Visibility, - /*Type*/ 0, - /*CanOmitFromDynSym*/ false, /*File*/ nullptr); - - replaceBody<DefinedRegular<ELFT>>(Sym, Cmd->Name, /*IsLocal=*/false, - Visibility, STT_NOTYPE, 0, 0, nullptr, - nullptr); + std::tie(Sym, std::ignore) = Symtab<ELFT>::X->insert( + Cmd->Name, /*Type*/ 0, Visibility, /*CanOmitFromDynSym*/ false, + /*File*/ nullptr); + Sym->Binding = STB_GLOBAL; + ExprValue Value = Cmd->Expression(); + SectionBase *Sec = Value.isAbsolute() ? nullptr : Value.Sec; + + // We want to set symbol values early if we can. This allows us to use symbols + // as variables in linker scripts. Doing so allows us to write expressions + // like this: `alignment = 16; . = ALIGN(., alignment)` + uint64_t SymValue = Value.isAbsolute() ? Value.getValue() : 0; + replaceBody<DefinedRegular>(Sym, Cmd->Name, /*IsLocal=*/false, Visibility, + STT_NOTYPE, SymValue, 0, Sec, nullptr); return Sym->body(); } -template <class ELFT> static SymbolBody *addSynthetic(SymbolAssignment *Cmd) { - uint8_t Visibility = Cmd->Hidden ? STV_HIDDEN : STV_DEFAULT; - const OutputSectionBase *Sec = - ScriptConfig->HasSections ? nullptr : Cmd->Expression.Section(); - Symbol *Sym = Symtab<ELFT>::X->addUndefined( - Cmd->Name, /*IsLocal=*/false, STB_GLOBAL, Visibility, - /*Type*/ 0, - /*CanOmitFromDynSym*/ false, /*File*/ nullptr); - - replaceBody<DefinedSynthetic>(Sym, Cmd->Name, 0, Sec); - return Sym->body(); +OutputSection *LinkerScript::getOutputSection(const Twine &Loc, + StringRef Name) { + for (OutputSection *Sec : *OutputSections) + if (Sec->Name == Name) + return Sec; + + static OutputSection Dummy("", 0, 0); + if (ErrorOnMissingSection) + error(Loc + ": undefined section " + Name); + return &Dummy; } -static bool isUnderSysroot(StringRef Path) { - if (Config->Sysroot == "") - return false; - for (; !Path.empty(); Path = sys::path::parent_path(Path)) - if (sys::fs::equivalent(Config->Sysroot, Path)) - return true; - return false; +// This function is essentially the same as getOutputSection(Name)->Size, +// but it won't print out an error message if a given section is not found. +// +// Linker script does not create an output section if its content is empty. +// We want to allow SIZEOF(.foo) where .foo is a section which happened to +// be empty. That is why this function is different from getOutputSection(). +uint64_t LinkerScript::getOutputSectionSize(StringRef Name) { + for (OutputSection *Sec : *OutputSections) + if (Sec->Name == Name) + return Sec->Size; + return 0; +} + +void LinkerScript::setDot(Expr E, const Twine &Loc, bool InSec) { + uint64_t Val = E().getValue(); + if (Val < Dot) { + if (InSec) + error(Loc + ": unable to move location counter backward for: " + + CurOutSec->Name); + else + error(Loc + ": unable to move location counter backward"); + } + Dot = Val; + // Update to location counter means update to section size. + if (InSec) + CurOutSec->Size = Dot - CurOutSec->Addr; } -template <class ELFT> static void assignSymbol(SymbolAssignment *Cmd) { - // If there are sections, then let the value be assigned later in - // `assignAddresses`. - if (ScriptConfig->HasSections) +// Sets value of a symbol. Two kinds of symbols are processed: synthetic +// symbols, whose value is an offset from beginning of section and regular +// symbols whose value is absolute. +void LinkerScript::assignSymbol(SymbolAssignment *Cmd, bool InSec) { + if (Cmd->Name == ".") { + setDot(Cmd->Expression, Cmd->Location, InSec); + return; + } + + if (!Cmd->Sym) return; - uint64_t Value = Cmd->Expression(0); - if (Cmd->Expression.IsAbsolute()) { - cast<DefinedRegular<ELFT>>(Cmd->Sym)->Value = Value; + auto *Sym = cast<DefinedRegular>(Cmd->Sym); + ExprValue V = Cmd->Expression(); + if (V.isAbsolute()) { + Sym->Value = V.getValue(); } else { - const OutputSectionBase *Sec = Cmd->Expression.Section(); - if (Sec) - cast<DefinedSynthetic>(Cmd->Sym)->Value = Value - Sec->Addr; + Sym->Section = V.Sec; + if (Sym->Section->Flags & SHF_ALLOC) + Sym->Value = V.Val; + else + Sym->Value = V.getValue(); + } +} + +static SymbolBody *findSymbol(StringRef S) { + switch (Config->EKind) { + case ELF32LEKind: + return Symtab<ELF32LE>::X->find(S); + case ELF32BEKind: + return Symtab<ELF32BE>::X->find(S); + case ELF64LEKind: + return Symtab<ELF64LE>::X->find(S); + case ELF64BEKind: + return Symtab<ELF64BE>::X->find(S); + default: + llvm_unreachable("unknown Config->EKind"); } } -template <class ELFT> static void addSymbol(SymbolAssignment *Cmd) { +static SymbolBody *addRegularSymbol(SymbolAssignment *Cmd) { + switch (Config->EKind) { + case ELF32LEKind: + return addRegular<ELF32LE>(Cmd); + case ELF32BEKind: + return addRegular<ELF32BE>(Cmd); + case ELF64LEKind: + return addRegular<ELF64LE>(Cmd); + case ELF64BEKind: + return addRegular<ELF64BE>(Cmd); + default: + llvm_unreachable("unknown Config->EKind"); + } +} + +void LinkerScript::addSymbol(SymbolAssignment *Cmd) { if (Cmd->Name == ".") return; // If a symbol was in PROVIDE(), we need to define it only when // it is a referenced undefined symbol. - SymbolBody *B = Symtab<ELFT>::X->find(Cmd->Name); + SymbolBody *B = findSymbol(Cmd->Name); if (Cmd->Provide && (!B || B->isDefined())) return; - // Otherwise, create a new symbol if one does not exist or an - // undefined one does exist. - if (Cmd->Expression.IsAbsolute()) - Cmd->Sym = addRegular<ELFT>(Cmd); - else - Cmd->Sym = addSynthetic<ELFT>(Cmd); - assignSymbol<ELFT>(Cmd); + Cmd->Sym = addRegularSymbol(Cmd); } bool SymbolAssignment::classof(const BaseCommand *C) { @@ -146,17 +206,13 @@ bool BytesDataCommand::classof(const BaseCommand *C) { return C->Kind == BytesDataKind; } -template <class ELFT> LinkerScript<ELFT>::LinkerScript() = default; -template <class ELFT> LinkerScript<ELFT>::~LinkerScript() = default; - -template <class ELFT> static StringRef basename(InputSectionBase<ELFT> *S) { - if (S->getFile()) - return sys::path::filename(S->getFile()->getName()); +static StringRef basename(InputSectionBase *S) { + if (S->File) + return sys::path::filename(S->File->getName()); return ""; } -template <class ELFT> -bool LinkerScript<ELFT>::shouldKeep(InputSectionBase<ELFT> *S) { +bool LinkerScript::shouldKeep(InputSectionBase *S) { for (InputSectionDescription *ID : Opt.KeptSections) if (ID->FilePat.match(basename(S))) for (SectionPattern &P : ID->SectionPatterns) @@ -165,73 +221,77 @@ bool LinkerScript<ELFT>::shouldKeep(InputSectionBase<ELFT> *S) { return false; } -static bool comparePriority(InputSectionData *A, InputSectionData *B) { - return getPriority(A->Name) < getPriority(B->Name); -} - -static bool compareName(InputSectionData *A, InputSectionData *B) { - return A->Name < B->Name; -} - -static bool compareAlignment(InputSectionData *A, InputSectionData *B) { - // ">" is not a mistake. Larger alignments are placed before smaller - // alignments in order to reduce the amount of padding necessary. - // This is compatible with GNU. - return A->Alignment > B->Alignment; -} - -static std::function<bool(InputSectionData *, InputSectionData *)> +// A helper function for the SORT() command. +static std::function<bool(InputSectionBase *, InputSectionBase *)> getComparator(SortSectionPolicy K) { switch (K) { case SortSectionPolicy::Alignment: - return compareAlignment; + return [](InputSectionBase *A, InputSectionBase *B) { + // ">" is not a mistake. Sections with larger alignments are placed + // before sections with smaller alignments in order to reduce the + // amount of padding necessary. This is compatible with GNU. + return A->Alignment > B->Alignment; + }; case SortSectionPolicy::Name: - return compareName; + return [](InputSectionBase *A, InputSectionBase *B) { + return A->Name < B->Name; + }; case SortSectionPolicy::Priority: - return comparePriority; + return [](InputSectionBase *A, InputSectionBase *B) { + return getPriority(A->Name) < getPriority(B->Name); + }; default: llvm_unreachable("unknown sort policy"); } } -template <class ELFT> -static bool matchConstraints(ArrayRef<InputSectionBase<ELFT> *> Sections, +// A helper function for the SORT() command. +static bool matchConstraints(ArrayRef<InputSectionBase *> Sections, ConstraintKind Kind) { if (Kind == ConstraintKind::NoConstraint) return true; - bool IsRW = llvm::any_of(Sections, [=](InputSectionData *Sec2) { - auto *Sec = static_cast<InputSectionBase<ELFT> *>(Sec2); - return Sec->Flags & SHF_WRITE; + + bool IsRW = llvm::any_of(Sections, [](InputSectionBase *Sec) { + return static_cast<InputSectionBase *>(Sec)->Flags & SHF_WRITE; }); + return (IsRW && Kind == ConstraintKind::ReadWrite) || (!IsRW && Kind == ConstraintKind::ReadOnly); } -static void sortSections(InputSectionData **Begin, InputSectionData **End, +static void sortSections(InputSectionBase **Begin, InputSectionBase **End, SortSectionPolicy K) { if (K != SortSectionPolicy::Default && K != SortSectionPolicy::None) std::stable_sort(Begin, End, getComparator(K)); } // Compute and remember which sections the InputSectionDescription matches. -template <class ELFT> -void LinkerScript<ELFT>::computeInputSections(InputSectionDescription *I) { - // Collects all sections that satisfy constraints of I - // and attach them to I. - for (SectionPattern &Pat : I->SectionPatterns) { - size_t SizeBefore = I->Sections.size(); - - for (InputSectionBase<ELFT> *S : Symtab<ELFT>::X->Sections) { - if (!S->Live || S->Assigned) +std::vector<InputSectionBase *> +LinkerScript::computeInputSections(const InputSectionDescription *Cmd) { + std::vector<InputSectionBase *> Ret; + + // Collects all sections that satisfy constraints of Cmd. + for (const SectionPattern &Pat : Cmd->SectionPatterns) { + size_t SizeBefore = Ret.size(); + + for (InputSectionBase *Sec : InputSections) { + if (Sec->Assigned) continue; - StringRef Filename = basename(S); - if (!I->FilePat.match(Filename) || Pat.ExcludedFilePat.match(Filename)) + // For -emit-relocs we have to ignore entries like + // .rela.dyn : { *(.rela.data) } + // which are common because they are in the default bfd script. + if (Sec->Type == SHT_REL || Sec->Type == SHT_RELA) continue; - if (!Pat.SectionPat.match(S->Name)) + + StringRef Filename = basename(Sec); + if (!Cmd->FilePat.match(Filename) || + Pat.ExcludedFilePat.match(Filename) || + !Pat.SectionPat.match(Sec->Name)) continue; - I->Sections.push_back(S); - S->Assigned = true; + + Ret.push_back(Sec); + Sec->Assigned = true; } // Sort sections as instructed by SORT-family commands and --sort-section @@ -245,8 +305,8 @@ void LinkerScript<ELFT>::computeInputSections(InputSectionDescription *I) { // --sort-section is handled as an inner SORT command. // 3. If one SORT command is given, and if it is SORT_NONE, don't sort. // 4. If no SORT command is given, sort according to --sort-section. - InputSectionData **Begin = I->Sections.data() + SizeBefore; - InputSectionData **End = I->Sections.data() + I->Sections.size(); + InputSectionBase **Begin = Ret.data() + SizeBefore; + InputSectionBase **End = Ret.data() + Ret.size(); if (Pat.SortOuter != SortSectionPolicy::None) { if (Pat.SortInner == SortSectionPolicy::Default) sortSections(Begin, End, Config->SortSection); @@ -255,68 +315,58 @@ void LinkerScript<ELFT>::computeInputSections(InputSectionDescription *I) { sortSections(Begin, End, Pat.SortOuter); } } + return Ret; } -template <class ELFT> -void LinkerScript<ELFT>::discard(ArrayRef<InputSectionBase<ELFT> *> V) { - for (InputSectionBase<ELFT> *S : V) { +void LinkerScript::discard(ArrayRef<InputSectionBase *> V) { + for (InputSectionBase *S : V) { S->Live = false; - reportDiscarded(S); + if (S == InX::ShStrTab) + error("discarding .shstrtab section is not allowed"); + discard(S->DependentSections); } } -template <class ELFT> -std::vector<InputSectionBase<ELFT> *> -LinkerScript<ELFT>::createInputSectionList(OutputSectionCommand &OutCmd) { - std::vector<InputSectionBase<ELFT> *> Ret; +std::vector<InputSectionBase *> +LinkerScript::createInputSectionList(OutputSectionCommand &OutCmd) { + std::vector<InputSectionBase *> Ret; - for (const std::unique_ptr<BaseCommand> &Base : OutCmd.Commands) { - auto *Cmd = dyn_cast<InputSectionDescription>(Base.get()); + for (BaseCommand *Base : OutCmd.Commands) { + auto *Cmd = dyn_cast<InputSectionDescription>(Base); if (!Cmd) continue; - computeInputSections(Cmd); - for (InputSectionData *S : Cmd->Sections) - Ret.push_back(static_cast<InputSectionBase<ELFT> *>(S)); + + Cmd->Sections = computeInputSections(Cmd); + Ret.insert(Ret.end(), Cmd->Sections.begin(), Cmd->Sections.end()); } return Ret; } -template <class ELFT> -void LinkerScript<ELFT>::addSection(OutputSectionFactory<ELFT> &Factory, - InputSectionBase<ELFT> *Sec, - StringRef Name) { - OutputSectionBase *OutSec; - bool IsNew; - std::tie(OutSec, IsNew) = Factory.create(Sec, Name); - if (IsNew) - OutputSections->push_back(OutSec); - OutSec->addSection(Sec); -} - -template <class ELFT> -void LinkerScript<ELFT>::processCommands(OutputSectionFactory<ELFT> &Factory) { - for (unsigned I = 0; I < Opt.Commands.size(); ++I) { - auto Iter = Opt.Commands.begin() + I; - const std::unique_ptr<BaseCommand> &Base1 = *Iter; +void LinkerScript::processCommands(OutputSectionFactory &Factory) { + // A symbol can be assigned before any section is mentioned in the linker + // script. In an DSO, the symbol values are addresses, so the only important + // section values are: + // * SHN_UNDEF + // * SHN_ABS + // * Any value meaning a regular section. + // To handle that, create a dummy aether section that fills the void before + // the linker scripts switches to another section. It has an index of one + // which will map to whatever the first actual section is. + Aether = make<OutputSection>("", 0, SHF_ALLOC); + Aether->SectionIndex = 1; + CurOutSec = Aether; + Dot = 0; + for (size_t I = 0; I < Opt.Commands.size(); ++I) { // Handle symbol assignments outside of any output section. - if (auto *Cmd = dyn_cast<SymbolAssignment>(Base1.get())) { - addSymbol<ELFT>(Cmd); + if (auto *Cmd = dyn_cast<SymbolAssignment>(Opt.Commands[I])) { + addSymbol(Cmd); continue; } - if (auto *Cmd = dyn_cast<AssertCommand>(Base1.get())) { - // If we don't have SECTIONS then output sections have already been - // created by Writer<ELFT>. The LinkerScript<ELFT>::assignAddresses - // will not be called, so ASSERT should be evaluated now. - if (!Opt.HasSections) - Cmd->Expression(0); - continue; - } - - if (auto *Cmd = dyn_cast<OutputSectionCommand>(Base1.get())) { - std::vector<InputSectionBase<ELFT> *> V = createInputSectionList(*Cmd); + if (auto *Cmd = dyn_cast<OutputSectionCommand>(Opt.Commands[I])) { + std::vector<InputSectionBase *> V = createInputSectionList(*Cmd); // The output section name `/DISCARD/' is special. // Any input section assigned to it is discarded. @@ -332,74 +382,54 @@ void LinkerScript<ELFT>::processCommands(OutputSectionFactory<ELFT> &Factory) { // // Because we'll iterate over Commands many more times, the easiest // way to "make it as if it wasn't present" is to just remove it. - if (!matchConstraints<ELFT>(V, Cmd->Constraint)) { - for (InputSectionBase<ELFT> *S : V) + if (!matchConstraints(V, Cmd->Constraint)) { + for (InputSectionBase *S : V) S->Assigned = false; - Opt.Commands.erase(Iter); + Opt.Commands.erase(Opt.Commands.begin() + I); --I; continue; } // A directive may contain symbol definitions like this: // ".foo : { ...; bar = .; }". Handle them. - for (const std::unique_ptr<BaseCommand> &Base : Cmd->Commands) - if (auto *OutCmd = dyn_cast<SymbolAssignment>(Base.get())) - addSymbol<ELFT>(OutCmd); + for (BaseCommand *Base : Cmd->Commands) + if (auto *OutCmd = dyn_cast<SymbolAssignment>(Base)) + addSymbol(OutCmd); // Handle subalign (e.g. ".foo : SUBALIGN(32) { ... }"). If subalign // is given, input sections are aligned to that value, whether the // given value is larger or smaller than the original section alignment. if (Cmd->SubalignExpr) { - uint32_t Subalign = Cmd->SubalignExpr(0); - for (InputSectionBase<ELFT> *S : V) + uint32_t Subalign = Cmd->SubalignExpr().getValue(); + for (InputSectionBase *S : V) S->Alignment = Subalign; } // Add input sections to an output section. - for (InputSectionBase<ELFT> *S : V) - addSection(Factory, S, Cmd->Name); + for (InputSectionBase *S : V) + Factory.addInputSec(S, Cmd->Name); } } + CurOutSec = nullptr; } // Add sections that didn't match any sections command. -template <class ELFT> -void LinkerScript<ELFT>::addOrphanSections( - OutputSectionFactory<ELFT> &Factory) { - for (InputSectionBase<ELFT> *S : Symtab<ELFT>::X->Sections) +void LinkerScript::addOrphanSections(OutputSectionFactory &Factory) { + for (InputSectionBase *S : InputSections) if (S->Live && !S->OutSec) - addSection(Factory, S, getOutputSectionName(S->Name)); + Factory.addInputSec(S, getOutputSectionName(S->Name)); } -// Sets value of a section-defined symbol. Two kinds of -// symbols are processed: synthetic symbols, whose value -// is an offset from beginning of section and regular -// symbols whose value is absolute. -template <class ELFT> -static void assignSectionSymbol(SymbolAssignment *Cmd, - typename ELFT::uint Value) { - if (!Cmd->Sym) - return; - - if (auto *Body = dyn_cast<DefinedSynthetic>(Cmd->Sym)) { - Body->Section = Cmd->Expression.Section(); - Body->Value = Cmd->Expression(Value) - Body->Section->Addr; - return; - } - auto *Body = cast<DefinedRegular<ELFT>>(Cmd->Sym); - Body->Value = Cmd->Expression(Value); -} - -template <class ELFT> static bool isTbss(OutputSectionBase *Sec) { +static bool isTbss(OutputSection *Sec) { return (Sec->Flags & SHF_TLS) && Sec->Type == SHT_NOBITS; } -template <class ELFT> void LinkerScript<ELFT>::output(InputSection<ELFT> *S) { +void LinkerScript::output(InputSection *S) { if (!AlreadyOutputIS.insert(S).second) return; - bool IsTbss = isTbss<ELFT>(CurOutSec); + bool IsTbss = isTbss(CurOutSec); - uintX_t Pos = IsTbss ? Dot + ThreadBssOffset : Dot; + uint64_t Pos = IsTbss ? Dot + ThreadBssOffset : Dot; Pos = alignTo(Pos, S->Alignment); S->OutSecOff = Pos - CurOutSec->Addr; Pos += S->getSize(); @@ -409,134 +439,171 @@ template <class ELFT> void LinkerScript<ELFT>::output(InputSection<ELFT> *S) { // .foo { *(.aaa) a = SIZEOF(.foo); *(.bbb) } CurOutSec->Size = Pos - CurOutSec->Addr; + // If there is a memory region associated with this input section, then + // place the section in that region and update the region index. + if (CurMemRegion) { + CurMemRegion->Offset += CurOutSec->Size; + uint64_t CurSize = CurMemRegion->Offset - CurMemRegion->Origin; + if (CurSize > CurMemRegion->Length) { + uint64_t OverflowAmt = CurSize - CurMemRegion->Length; + error("section '" + CurOutSec->Name + "' will not fit in region '" + + CurMemRegion->Name + "': overflowed by " + Twine(OverflowAmt) + + " bytes"); + } + } + if (IsTbss) ThreadBssOffset = Pos - Dot; else Dot = Pos; } -template <class ELFT> void LinkerScript<ELFT>::flush() { - if (!CurOutSec || !AlreadyOutputOS.insert(CurOutSec).second) +void LinkerScript::flush() { + assert(CurOutSec); + if (!AlreadyOutputOS.insert(CurOutSec).second) return; - if (auto *OutSec = dyn_cast<OutputSection<ELFT>>(CurOutSec)) { - for (InputSection<ELFT> *I : OutSec->Sections) - output(I); - } else { - Dot += CurOutSec->Size; - } + for (InputSection *I : CurOutSec->Sections) + output(I); } -template <class ELFT> -void LinkerScript<ELFT>::switchTo(OutputSectionBase *Sec) { +void LinkerScript::switchTo(OutputSection *Sec) { if (CurOutSec == Sec) return; if (AlreadyOutputOS.count(Sec)) return; - flush(); CurOutSec = Sec; - Dot = alignTo(Dot, CurOutSec->Addralign); - CurOutSec->Addr = isTbss<ELFT>(CurOutSec) ? Dot + ThreadBssOffset : Dot; + Dot = alignTo(Dot, CurOutSec->Alignment); + CurOutSec->Addr = isTbss(CurOutSec) ? Dot + ThreadBssOffset : Dot; // If neither AT nor AT> is specified for an allocatable section, the linker // will set the LMA such that the difference between VMA and LMA for the // section is the same as the preceding output section in the same region // https://sourceware.org/binutils/docs-2.20/ld/Output-Section-LMA.html - CurOutSec->setLMAOffset(LMAOffset); + if (LMAOffset) + CurOutSec->LMAOffset = LMAOffset(); } -template <class ELFT> void LinkerScript<ELFT>::process(BaseCommand &Base) { - // This handles the assignments to symbol or to a location counter (.) - if (auto *AssignCmd = dyn_cast<SymbolAssignment>(&Base)) { - if (AssignCmd->Name == ".") { - // Update to location counter means update to section size. - uintX_t Val = AssignCmd->Expression(Dot); - if (Val < Dot) - error("unable to move location counter backward for: " + - CurOutSec->Name); - Dot = Val; - CurOutSec->Size = Dot - CurOutSec->Addr; - return; - } - assignSectionSymbol<ELFT>(AssignCmd, Dot); +void LinkerScript::process(BaseCommand &Base) { + // This handles the assignments to symbol or to the dot. + if (auto *Cmd = dyn_cast<SymbolAssignment>(&Base)) { + assignSymbol(Cmd, true); return; } // Handle BYTE(), SHORT(), LONG(), or QUAD(). - if (auto *DataCmd = dyn_cast<BytesDataCommand>(&Base)) { - DataCmd->Offset = Dot - CurOutSec->Addr; - Dot += DataCmd->Size; + if (auto *Cmd = dyn_cast<BytesDataCommand>(&Base)) { + Cmd->Offset = Dot - CurOutSec->Addr; + Dot += Cmd->Size; CurOutSec->Size = Dot - CurOutSec->Addr; return; } - if (auto *AssertCmd = dyn_cast<AssertCommand>(&Base)) { - AssertCmd->Expression(Dot); + // Handle ASSERT(). + if (auto *Cmd = dyn_cast<AssertCommand>(&Base)) { + Cmd->Expression(); return; } - // It handles single input section description command, - // calculates and assigns the offsets for each section and also + // Handle a single input section description command. + // It calculates and assigns the offsets for each section and also // updates the output section size. - auto &ICmd = cast<InputSectionDescription>(Base); - for (InputSectionData *ID : ICmd.Sections) { + auto &Cmd = cast<InputSectionDescription>(Base); + for (InputSectionBase *Sec : Cmd.Sections) { // We tentatively added all synthetic sections at the beginning and removed // empty ones afterwards (because there is no way to know whether they were // going be empty or not other than actually running linker scripts.) // We need to ignore remains of empty sections. - if (auto *Sec = dyn_cast<SyntheticSection<ELFT>>(ID)) - if (Sec->empty()) + if (auto *S = dyn_cast<SyntheticSection>(Sec)) + if (S->empty()) continue; - auto *IB = static_cast<InputSectionBase<ELFT> *>(ID); - switchTo(IB->OutSec); - if (auto *I = dyn_cast<InputSection<ELFT>>(IB)) - output(I); - else - flush(); + if (!Sec->Live) + continue; + assert(CurOutSec == Sec->OutSec || AlreadyOutputOS.count(Sec->OutSec)); + output(cast<InputSection>(Sec)); } } -template <class ELFT> -static std::vector<OutputSectionBase *> -findSections(StringRef Name, const std::vector<OutputSectionBase *> &Sections) { - std::vector<OutputSectionBase *> Ret; - for (OutputSectionBase *Sec : Sections) - if (Sec->getName() == Name) - Ret.push_back(Sec); - return Ret; +static OutputSection * +findSection(StringRef Name, const std::vector<OutputSection *> &Sections) { + for (OutputSection *Sec : Sections) + if (Sec->Name == Name) + return Sec; + return nullptr; +} + +// This function searches for a memory region to place the given output +// section in. If found, a pointer to the appropriate memory region is +// returned. Otherwise, a nullptr is returned. +MemoryRegion *LinkerScript::findMemoryRegion(OutputSectionCommand *Cmd) { + // If a memory region name was specified in the output section command, + // then try to find that region first. + if (!Cmd->MemoryRegionName.empty()) { + auto It = Opt.MemoryRegions.find(Cmd->MemoryRegionName); + if (It != Opt.MemoryRegions.end()) + return &It->second; + error("memory region '" + Cmd->MemoryRegionName + "' not declared"); + return nullptr; + } + + // If at least one memory region is defined, all sections must + // belong to some memory region. Otherwise, we don't need to do + // anything for memory regions. + if (Opt.MemoryRegions.empty()) + return nullptr; + + OutputSection *Sec = Cmd->Sec; + // See if a region can be found by matching section flags. + for (auto &Pair : Opt.MemoryRegions) { + MemoryRegion &M = Pair.second; + if ((M.Flags & Sec->Flags) && (M.NegFlags & Sec->Flags) == 0) + return &M; + } + + // Otherwise, no suitable region was found. + if (Sec->Flags & SHF_ALLOC) + error("no memory region specified for section '" + Sec->Name + "'"); + return nullptr; } // This function assigns offsets to input sections and an output section // for a single sections command (e.g. ".text { *(.text); }"). -template <class ELFT> -void LinkerScript<ELFT>::assignOffsets(OutputSectionCommand *Cmd) { - if (Cmd->LMAExpr) - LMAOffset = Cmd->LMAExpr(Dot) - Dot; - std::vector<OutputSectionBase *> Sections = - findSections<ELFT>(Cmd->Name, *OutputSections); - if (Sections.empty()) +void LinkerScript::assignOffsets(OutputSectionCommand *Cmd) { + OutputSection *Sec = Cmd->Sec; + if (!Sec) return; - switchTo(Sections[0]); - - // Find the last section output location. We will output orphan sections - // there so that end symbols point to the correct location. - auto E = std::find_if(Cmd->Commands.rbegin(), Cmd->Commands.rend(), - [](const std::unique_ptr<BaseCommand> &Cmd) { - return !isa<SymbolAssignment>(*Cmd); - }) - .base(); - for (auto I = Cmd->Commands.begin(); I != E; ++I) + + if (Cmd->AddrExpr && (Sec->Flags & SHF_ALLOC)) + setDot(Cmd->AddrExpr, Cmd->Location, false); + + if (Cmd->LMAExpr) { + uint64_t D = Dot; + LMAOffset = [=] { return Cmd->LMAExpr().getValue() - D; }; + } + + CurMemRegion = Cmd->MemRegion; + if (CurMemRegion) + Dot = CurMemRegion->Offset; + switchTo(Sec); + + // flush() may add orphan sections, so the order of flush() and + // symbol assignments is important. We want to call flush() first so + // that symbols pointing the end of the current section points to + // the location after orphan sections. + auto Mid = + std::find_if(Cmd->Commands.rbegin(), Cmd->Commands.rend(), + [](BaseCommand *Cmd) { return !isa<SymbolAssignment>(Cmd); }) + .base(); + for (auto I = Cmd->Commands.begin(); I != Mid; ++I) process(**I); - for (OutputSectionBase *Base : Sections) - switchTo(Base); flush(); - std::for_each(E, Cmd->Commands.end(), - [this](std::unique_ptr<BaseCommand> &B) { process(*B.get()); }); + for (auto I = Mid, E = Cmd->Commands.end(); I != E; ++I) + process(**I); } -template <class ELFT> void LinkerScript<ELFT>::removeEmptyCommands() { +void LinkerScript::removeEmptyCommands() { // It is common practice to use very generic linker scripts. So for any // given run some of the output sections in the script will be empty. // We could create corresponding empty output sections, but that would @@ -544,52 +611,61 @@ template <class ELFT> void LinkerScript<ELFT>::removeEmptyCommands() { // We instead remove trivially empty sections. The bfd linker seems even // more aggressive at removing them. auto Pos = std::remove_if( - Opt.Commands.begin(), Opt.Commands.end(), - [&](const std::unique_ptr<BaseCommand> &Base) { - if (auto *Cmd = dyn_cast<OutputSectionCommand>(Base.get())) - return findSections<ELFT>(Cmd->Name, *OutputSections).empty(); + Opt.Commands.begin(), Opt.Commands.end(), [&](BaseCommand *Base) { + if (auto *Cmd = dyn_cast<OutputSectionCommand>(Base)) + return !Cmd->Sec; return false; }); Opt.Commands.erase(Pos, Opt.Commands.end()); } static bool isAllSectionDescription(const OutputSectionCommand &Cmd) { - for (const std::unique_ptr<BaseCommand> &I : Cmd.Commands) - if (!isa<InputSectionDescription>(*I)) + for (BaseCommand *Base : Cmd.Commands) + if (!isa<InputSectionDescription>(*Base)) return false; return true; } -template <class ELFT> void LinkerScript<ELFT>::adjustSectionsBeforeSorting() { +void LinkerScript::adjustSectionsBeforeSorting() { // If the output section contains only symbol assignments, create a // corresponding output section. The bfd linker seems to only create them if // '.' is assigned to, but creating these section should not have any bad // consequeces and gives us a section to put the symbol in. - uintX_t Flags = SHF_ALLOC; - uint32_t Type = SHT_NOBITS; - for (const std::unique_ptr<BaseCommand> &Base : Opt.Commands) { - auto *Cmd = dyn_cast<OutputSectionCommand>(Base.get()); + uint64_t Flags = SHF_ALLOC; + uint32_t Type = SHT_PROGBITS; + for (BaseCommand *Base : Opt.Commands) { + auto *Cmd = dyn_cast<OutputSectionCommand>(Base); if (!Cmd) continue; - std::vector<OutputSectionBase *> Secs = - findSections<ELFT>(Cmd->Name, *OutputSections); - if (!Secs.empty()) { - Flags = Secs[0]->Flags; - Type = Secs[0]->Type; + if (OutputSection *Sec = findSection(Cmd->Name, *OutputSections)) { + Cmd->Sec = Sec; + Flags = Sec->Flags; + Type = Sec->Type; continue; } if (isAllSectionDescription(*Cmd)) continue; - auto *OutSec = make<OutputSection<ELFT>>(Cmd->Name, Type, Flags); + auto *OutSec = make<OutputSection>(Cmd->Name, Type, Flags); OutputSections->push_back(OutSec); + Cmd->Sec = OutSec; } } -template <class ELFT> void LinkerScript<ELFT>::adjustSectionsAfterSorting() { +void LinkerScript::adjustSectionsAfterSorting() { placeOrphanSections(); + // Try and find an appropriate memory region to assign offsets in. + for (BaseCommand *Base : Opt.Commands) { + if (auto *Cmd = dyn_cast<OutputSectionCommand>(Base)) { + Cmd->MemRegion = findMemoryRegion(Cmd); + // Handle align (e.g. ".foo : ALIGN(16) { ... }"). + if (Cmd->AlignExpr) + Cmd->Sec->updateAlignment(Cmd->AlignExpr().getValue()); + } + } + // If output section command doesn't specify any segments, // and we haven't previously assigned any section to segment, // then we simply assign section to the very first load segment. @@ -605,10 +681,11 @@ template <class ELFT> void LinkerScript<ELFT>::adjustSectionsAfterSorting() { // Walk the commands and propagate the program headers to commands that don't // explicitly specify them. - for (const std::unique_ptr<BaseCommand> &Base : Opt.Commands) { - auto *Cmd = dyn_cast<OutputSectionCommand>(Base.get()); + for (BaseCommand *Base : Opt.Commands) { + auto *Cmd = dyn_cast<OutputSectionCommand>(Base); if (!Cmd) continue; + if (Cmd->Phdrs.empty()) Cmd->Phdrs = DefPhdrs; else @@ -632,19 +709,37 @@ template <class ELFT> void LinkerScript<ELFT>::adjustSectionsAfterSorting() { // /* The RW PT_LOAD starts here*/ // rw_sec : { *(rw_sec) } // would mean that the RW PT_LOAD would become unaligned. -static bool shouldSkip(const BaseCommand &Cmd) { +static bool shouldSkip(BaseCommand *Cmd) { if (isa<OutputSectionCommand>(Cmd)) return false; - const auto *Assign = dyn_cast<SymbolAssignment>(&Cmd); - if (!Assign) - return true; - return Assign->Name != "."; + if (auto *Assign = dyn_cast<SymbolAssignment>(Cmd)) + return Assign->Name != "."; + return true; } -// Orphan sections are sections present in the input files which are not -// explicitly placed into the output file by the linker script. This just -// places them in the order already decided in OutputSections. -template <class ELFT> void LinkerScript<ELFT>::placeOrphanSections() { +// Orphan sections are sections present in the input files which are +// not explicitly placed into the output file by the linker script. +// +// When the control reaches this function, Opt.Commands contains +// output section commands for non-orphan sections only. This function +// adds new elements for orphan sections so that all sections are +// explicitly handled by Opt.Commands. +// +// Writer<ELFT>::sortSections has already sorted output sections. +// What we need to do is to scan OutputSections vector and +// Opt.Commands in parallel to find orphan sections. If there is an +// output section that doesn't have a corresponding entry in +// Opt.Commands, we will insert a new entry to Opt.Commands. +// +// There is some ambiguity as to where exactly a new entry should be +// inserted, because Opt.Commands contains not only output section +// commands but also other types of commands such as symbol assignment +// expressions. There's no correct answer here due to the lack of the +// formal specification of the linker script. We use heuristics to +// determine whether a new output command should be added before or +// after another commands. For the details, look at shouldSkip +// function. +void LinkerScript::placeOrphanSections() { // The OutputSections are already in the correct order. // This loops creates or moves commands as needed so that they are in the // correct order. @@ -656,40 +751,33 @@ template <class ELFT> void LinkerScript<ELFT>::placeOrphanSections() { // after that. auto FirstSectionOrDotAssignment = std::find_if(Opt.Commands.begin(), Opt.Commands.end(), - [](const std::unique_ptr<BaseCommand> &Cmd) { - if (isa<OutputSectionCommand>(*Cmd)) - return true; - const auto *Assign = dyn_cast<SymbolAssignment>(Cmd.get()); - if (!Assign) - return false; - return Assign->Name == "."; - }); + [](BaseCommand *Cmd) { return !shouldSkip(Cmd); }); if (FirstSectionOrDotAssignment != Opt.Commands.end()) { CmdIndex = FirstSectionOrDotAssignment - Opt.Commands.begin(); if (isa<SymbolAssignment>(**FirstSectionOrDotAssignment)) ++CmdIndex; } - for (OutputSectionBase *Sec : *OutputSections) { - StringRef Name = Sec->getName(); + for (OutputSection *Sec : *OutputSections) { + StringRef Name = Sec->Name; // Find the last spot where we can insert a command and still get the // correct result. auto CmdIter = Opt.Commands.begin() + CmdIndex; auto E = Opt.Commands.end(); - while (CmdIter != E && shouldSkip(**CmdIter)) { + while (CmdIter != E && shouldSkip(*CmdIter)) { ++CmdIter; ++CmdIndex; } - auto Pos = - std::find_if(CmdIter, E, [&](const std::unique_ptr<BaseCommand> &Base) { - auto *Cmd = dyn_cast<OutputSectionCommand>(Base.get()); - return Cmd && Cmd->Name == Name; - }); + auto Pos = std::find_if(CmdIter, E, [&](BaseCommand *Base) { + auto *Cmd = dyn_cast<OutputSectionCommand>(Base); + return Cmd && Cmd->Name == Name; + }); if (Pos == E) { - Opt.Commands.insert(CmdIter, - llvm::make_unique<OutputSectionCommand>(Name)); + auto *Cmd = make<OutputSectionCommand>(Name); + Cmd->Sec = Sec; + Opt.Commands.insert(CmdIter, Cmd); ++CmdIndex; continue; } @@ -699,55 +787,49 @@ template <class ELFT> void LinkerScript<ELFT>::placeOrphanSections() { } } -template <class ELFT> -void LinkerScript<ELFT>::assignAddresses(std::vector<PhdrEntry> &Phdrs) { +void LinkerScript::processNonSectionCommands() { + for (BaseCommand *Base : Opt.Commands) { + if (auto *Cmd = dyn_cast<SymbolAssignment>(Base)) + assignSymbol(Cmd, false); + else if (auto *Cmd = dyn_cast<AssertCommand>(Base)) + Cmd->Expression(); + } +} + +void LinkerScript::assignAddresses(std::vector<PhdrEntry> &Phdrs) { // Assign addresses as instructed by linker script SECTIONS sub-commands. Dot = 0; + ErrorOnMissingSection = true; + switchTo(Aether); - for (const std::unique_ptr<BaseCommand> &Base : Opt.Commands) { - if (auto *Cmd = dyn_cast<SymbolAssignment>(Base.get())) { - if (Cmd->Name == ".") { - Dot = Cmd->Expression(Dot); - } else if (Cmd->Sym) { - assignSectionSymbol<ELFT>(Cmd, Dot); - } + for (BaseCommand *Base : Opt.Commands) { + if (auto *Cmd = dyn_cast<SymbolAssignment>(Base)) { + assignSymbol(Cmd, false); continue; } - if (auto *Cmd = dyn_cast<AssertCommand>(Base.get())) { - Cmd->Expression(Dot); + if (auto *Cmd = dyn_cast<AssertCommand>(Base)) { + Cmd->Expression(); continue; } - auto *Cmd = cast<OutputSectionCommand>(Base.get()); - if (Cmd->AddrExpr) - Dot = Cmd->AddrExpr(Dot); + auto *Cmd = cast<OutputSectionCommand>(Base); assignOffsets(Cmd); } - uintX_t MinVA = std::numeric_limits<uintX_t>::max(); - for (OutputSectionBase *Sec : *OutputSections) { + uint64_t MinVA = std::numeric_limits<uint64_t>::max(); + for (OutputSection *Sec : *OutputSections) { if (Sec->Flags & SHF_ALLOC) MinVA = std::min<uint64_t>(MinVA, Sec->Addr); else Sec->Addr = 0; } - uintX_t HeaderSize = getHeaderSize(); - // If the linker script doesn't have PHDRS, add ElfHeader and ProgramHeaders - // now that we know we have space. - if (HeaderSize <= MinVA && !hasPhdrsCommands()) - allocateHeaders<ELFT>(Phdrs, *OutputSections); - - // ELF and Program headers need to be right before the first section in - // memory. Set their addresses accordingly. - MinVA = alignDown(MinVA - HeaderSize, Config->MaxPageSize); - Out<ELFT>::ElfHeader->Addr = MinVA; - Out<ELFT>::ProgramHeaders->Addr = Out<ELFT>::ElfHeader->Size + MinVA; + allocateHeaders(Phdrs, *OutputSections, MinVA); } // Creates program headers as instructed by PHDRS linker script command. -template <class ELFT> std::vector<PhdrEntry> LinkerScript<ELFT>::createPhdrs() { +std::vector<PhdrEntry> LinkerScript::createPhdrs() { std::vector<PhdrEntry> Ret; // Process PHDRS and FILEHDR keywords because they are not @@ -757,23 +839,23 @@ template <class ELFT> std::vector<PhdrEntry> LinkerScript<ELFT>::createPhdrs() { PhdrEntry &Phdr = Ret.back(); if (Cmd.HasFilehdr) - Phdr.add(Out<ELFT>::ElfHeader); + Phdr.add(Out::ElfHeader); if (Cmd.HasPhdrs) - Phdr.add(Out<ELFT>::ProgramHeaders); + Phdr.add(Out::ProgramHeaders); if (Cmd.LMAExpr) { - Phdr.p_paddr = Cmd.LMAExpr(0); + Phdr.p_paddr = Cmd.LMAExpr().getValue(); Phdr.HasLMA = true; } } // Add output sections to program headers. - for (OutputSectionBase *Sec : *OutputSections) { + for (OutputSection *Sec : *OutputSections) { if (!(Sec->Flags & SHF_ALLOC)) break; // Assign headers specified by linker script - for (size_t Id : getPhdrIndices(Sec->getName())) { + for (size_t Id : getPhdrIndices(Sec->Name)) { Ret[Id].add(Sec); if (Opt.PhdrsCommands[Id].Flags == UINT_MAX) Ret[Id].p_flags |= Sec->getPhdrFlags(); @@ -782,60 +864,52 @@ template <class ELFT> std::vector<PhdrEntry> LinkerScript<ELFT>::createPhdrs() { return Ret; } -template <class ELFT> bool LinkerScript<ELFT>::ignoreInterpSection() { +bool LinkerScript::ignoreInterpSection() { // Ignore .interp section in case we have PHDRS specification // and PT_INTERP isn't listed. - return !Opt.PhdrsCommands.empty() && - llvm::find_if(Opt.PhdrsCommands, [](const PhdrsCommand &Cmd) { - return Cmd.Type == PT_INTERP; - }) == Opt.PhdrsCommands.end(); + if (Opt.PhdrsCommands.empty()) + return false; + for (PhdrsCommand &Cmd : Opt.PhdrsCommands) + if (Cmd.Type == PT_INTERP) + return false; + return true; } -template <class ELFT> uint32_t LinkerScript<ELFT>::getFiller(StringRef Name) { - for (const std::unique_ptr<BaseCommand> &Base : Opt.Commands) - if (auto *Cmd = dyn_cast<OutputSectionCommand>(Base.get())) +Optional<uint32_t> LinkerScript::getFiller(StringRef Name) { + for (BaseCommand *Base : Opt.Commands) + if (auto *Cmd = dyn_cast<OutputSectionCommand>(Base)) if (Cmd->Name == Name) return Cmd->Filler; - return 0; + return None; } -template <class ELFT> static void writeInt(uint8_t *Buf, uint64_t Data, uint64_t Size) { - const endianness E = ELFT::TargetEndianness; - - switch (Size) { - case 1: - *Buf = (uint8_t)Data; - break; - case 2: - write16<E>(Buf, Data); - break; - case 4: - write32<E>(Buf, Data); - break; - case 8: - write64<E>(Buf, Data); - break; - default: + if (Size == 1) + *Buf = Data; + else if (Size == 2) + write16(Buf, Data, Config->Endianness); + else if (Size == 4) + write32(Buf, Data, Config->Endianness); + else if (Size == 8) + write64(Buf, Data, Config->Endianness); + else llvm_unreachable("unsupported Size argument"); - } } -template <class ELFT> -void LinkerScript<ELFT>::writeDataBytes(StringRef Name, uint8_t *Buf) { +void LinkerScript::writeDataBytes(StringRef Name, uint8_t *Buf) { int I = getSectionIndex(Name); if (I == INT_MAX) return; - auto *Cmd = dyn_cast<OutputSectionCommand>(Opt.Commands[I].get()); - for (const std::unique_ptr<BaseCommand> &Base : Cmd->Commands) - if (auto *Data = dyn_cast<BytesDataCommand>(Base.get())) - writeInt<ELFT>(Buf + Data->Offset, Data->Expression(0), Data->Size); + auto *Cmd = dyn_cast<OutputSectionCommand>(Opt.Commands[I]); + for (BaseCommand *Base : Cmd->Commands) + if (auto *Data = dyn_cast<BytesDataCommand>(Base)) + writeInt(Buf + Data->Offset, Data->Expression().getValue(), Data->Size); } -template <class ELFT> bool LinkerScript<ELFT>::hasLMA(StringRef Name) { - for (const std::unique_ptr<BaseCommand> &Base : Opt.Commands) - if (auto *Cmd = dyn_cast<OutputSectionCommand>(Base.get())) +bool LinkerScript::hasLMA(StringRef Name) { + for (BaseCommand *Base : Opt.Commands) + if (auto *Cmd = dyn_cast<OutputSectionCommand>(Base)) if (Cmd->LMAExpr && Cmd->Name == Name) return true; return false; @@ -845,89 +919,35 @@ template <class ELFT> bool LinkerScript<ELFT>::hasLMA(StringRef Name) { // SECTIONS commands. Sections are laid out as the same order as they // were in the script. If a given name did not appear in the script, // it returns INT_MAX, so that it will be laid out at end of file. -template <class ELFT> int LinkerScript<ELFT>::getSectionIndex(StringRef Name) { +int LinkerScript::getSectionIndex(StringRef Name) { for (int I = 0, E = Opt.Commands.size(); I != E; ++I) - if (auto *Cmd = dyn_cast<OutputSectionCommand>(Opt.Commands[I].get())) + if (auto *Cmd = dyn_cast<OutputSectionCommand>(Opt.Commands[I])) if (Cmd->Name == Name) return I; return INT_MAX; } -template <class ELFT> bool LinkerScript<ELFT>::hasPhdrsCommands() { - return !Opt.PhdrsCommands.empty(); -} - -template <class ELFT> -const OutputSectionBase *LinkerScript<ELFT>::getOutputSection(const Twine &Loc, - StringRef Name) { - static OutputSectionBase FakeSec("", 0, 0); - - for (OutputSectionBase *Sec : *OutputSections) - if (Sec->getName() == Name) - return Sec; - - error(Loc + ": undefined section " + Name); - return &FakeSec; -} - -// This function is essentially the same as getOutputSection(Name)->Size, -// but it won't print out an error message if a given section is not found. -// -// Linker script does not create an output section if its content is empty. -// We want to allow SIZEOF(.foo) where .foo is a section which happened to -// be empty. That is why this function is different from getOutputSection(). -template <class ELFT> -uint64_t LinkerScript<ELFT>::getOutputSectionSize(StringRef Name) { - for (OutputSectionBase *Sec : *OutputSections) - if (Sec->getName() == Name) - return Sec->Size; - return 0; -} - -template <class ELFT> uint64_t LinkerScript<ELFT>::getHeaderSize() { - return elf::getHeaderSize<ELFT>(); -} - -template <class ELFT> -uint64_t LinkerScript<ELFT>::getSymbolValue(const Twine &Loc, StringRef S) { - if (SymbolBody *B = Symtab<ELFT>::X->find(S)) - return B->getVA<ELFT>(); +ExprValue LinkerScript::getSymbolValue(const Twine &Loc, StringRef S) { + if (S == ".") + return {CurOutSec, Dot - CurOutSec->Addr}; + if (SymbolBody *B = findSymbol(S)) { + if (auto *D = dyn_cast<DefinedRegular>(B)) + return {D->Section, D->Value}; + if (auto *C = dyn_cast<DefinedCommon>(B)) + return {InX::Common, C->Offset}; + } error(Loc + ": symbol not found: " + S); return 0; } -template <class ELFT> bool LinkerScript<ELFT>::isDefined(StringRef S) { - return Symtab<ELFT>::X->find(S) != nullptr; -} - -template <class ELFT> bool LinkerScript<ELFT>::isAbsolute(StringRef S) { - SymbolBody *Sym = Symtab<ELFT>::X->find(S); - auto *DR = dyn_cast_or_null<DefinedRegular<ELFT>>(Sym); - return DR && !DR->Section; -} - -// Gets section symbol belongs to. Symbol "." doesn't belong to any -// specific section but isn't absolute at the same time, so we try -// to find suitable section for it as well. -template <class ELFT> -const OutputSectionBase *LinkerScript<ELFT>::getSymbolSection(StringRef S) { - SymbolBody *Sym = Symtab<ELFT>::X->find(S); - if (!Sym) { - if (OutputSections->empty()) - return nullptr; - return CurOutSec ? CurOutSec : (*OutputSections)[0]; - } - - return SymbolTableSection<ELFT>::getOutputSection(Sym); -} +bool LinkerScript::isDefined(StringRef S) { return findSymbol(S) != nullptr; } // Returns indices of ELF headers containing specific section, identified // by Name. Each index is a zero based number of ELF header listed within // PHDRS {} script block. -template <class ELFT> -std::vector<size_t> LinkerScript<ELFT>::getPhdrIndices(StringRef SectionName) { - for (const std::unique_ptr<BaseCommand> &Base : Opt.Commands) { - auto *Cmd = dyn_cast<OutputSectionCommand>(Base.get()); +std::vector<size_t> LinkerScript::getPhdrIndices(StringRef SectionName) { + for (BaseCommand *Base : Opt.Commands) { + auto *Cmd = dyn_cast<OutputSectionCommand>(Base); if (!Cmd || Cmd->Name != SectionName) continue; @@ -939,8 +959,7 @@ std::vector<size_t> LinkerScript<ELFT>::getPhdrIndices(StringRef SectionName) { return {}; } -template <class ELFT> -size_t LinkerScript<ELFT>::getPhdrIndex(const Twine &Loc, StringRef PhdrName) { +size_t LinkerScript::getPhdrIndex(const Twine &Loc, StringRef PhdrName) { size_t I = 0; for (PhdrsCommand &Cmd : Opt.PhdrsCommands) { if (Cmd.Name == PhdrName) @@ -950,1009 +969,3 @@ size_t LinkerScript<ELFT>::getPhdrIndex(const Twine &Loc, StringRef PhdrName) { error(Loc + ": section header '" + PhdrName + "' is not listed in PHDRS"); return 0; } - -class elf::ScriptParser final : public ScriptParserBase { - typedef void (ScriptParser::*Handler)(); - -public: - ScriptParser(MemoryBufferRef MB) - : ScriptParserBase(MB), - IsUnderSysroot(isUnderSysroot(MB.getBufferIdentifier())) {} - - void readLinkerScript(); - void readVersionScript(); - void readDynamicList(); - -private: - void addFile(StringRef Path); - - void readAsNeeded(); - void readEntry(); - void readExtern(); - void readGroup(); - void readInclude(); - void readOutput(); - void readOutputArch(); - void readOutputFormat(); - void readPhdrs(); - void readSearchDir(); - void readSections(); - void readVersion(); - void readVersionScriptCommand(); - - SymbolAssignment *readAssignment(StringRef Name); - BytesDataCommand *readBytesDataCommand(StringRef Tok); - uint32_t readFill(); - OutputSectionCommand *readOutputSectionDescription(StringRef OutSec); - uint32_t readOutputSectionFiller(StringRef Tok); - std::vector<StringRef> readOutputSectionPhdrs(); - InputSectionDescription *readInputSectionDescription(StringRef Tok); - StringMatcher readFilePatterns(); - std::vector<SectionPattern> readInputSectionsList(); - InputSectionDescription *readInputSectionRules(StringRef FilePattern); - unsigned readPhdrType(); - SortSectionPolicy readSortKind(); - SymbolAssignment *readProvideHidden(bool Provide, bool Hidden); - SymbolAssignment *readProvideOrAssignment(StringRef Tok); - void readSort(); - Expr readAssert(); - - Expr readExpr(); - Expr readExpr1(Expr Lhs, int MinPrec); - StringRef readParenLiteral(); - Expr readPrimary(); - Expr readTernary(Expr Cond); - Expr readParenExpr(); - - // For parsing version script. - std::vector<SymbolVersion> readVersionExtern(); - void readAnonymousDeclaration(); - void readVersionDeclaration(StringRef VerStr); - std::vector<SymbolVersion> readSymbols(); - void readLocals(); - - ScriptConfiguration &Opt = *ScriptConfig; - bool IsUnderSysroot; -}; - -void ScriptParser::readDynamicList() { - expect("{"); - readAnonymousDeclaration(); - if (!atEOF()) - setError("EOF expected, but got " + next()); -} - -void ScriptParser::readVersionScript() { - readVersionScriptCommand(); - if (!atEOF()) - setError("EOF expected, but got " + next()); -} - -void ScriptParser::readVersionScriptCommand() { - if (consume("{")) { - readAnonymousDeclaration(); - return; - } - - while (!atEOF() && !Error && peek() != "}") { - StringRef VerStr = next(); - if (VerStr == "{") { - setError("anonymous version definition is used in " - "combination with other version definitions"); - return; - } - expect("{"); - readVersionDeclaration(VerStr); - } -} - -void ScriptParser::readVersion() { - expect("{"); - readVersionScriptCommand(); - expect("}"); -} - -void ScriptParser::readLinkerScript() { - while (!atEOF()) { - StringRef Tok = next(); - if (Tok == ";") - continue; - - if (Tok == "ASSERT") { - Opt.Commands.emplace_back(new AssertCommand(readAssert())); - } else if (Tok == "ENTRY") { - readEntry(); - } else if (Tok == "EXTERN") { - readExtern(); - } else if (Tok == "GROUP" || Tok == "INPUT") { - readGroup(); - } else if (Tok == "INCLUDE") { - readInclude(); - } else if (Tok == "OUTPUT") { - readOutput(); - } else if (Tok == "OUTPUT_ARCH") { - readOutputArch(); - } else if (Tok == "OUTPUT_FORMAT") { - readOutputFormat(); - } else if (Tok == "PHDRS") { - readPhdrs(); - } else if (Tok == "SEARCH_DIR") { - readSearchDir(); - } else if (Tok == "SECTIONS") { - readSections(); - } else if (Tok == "VERSION") { - readVersion(); - } else if (SymbolAssignment *Cmd = readProvideOrAssignment(Tok)) { - Opt.Commands.emplace_back(Cmd); - } else { - setError("unknown directive: " + Tok); - } - } -} - -void ScriptParser::addFile(StringRef S) { - if (IsUnderSysroot && S.startswith("/")) { - SmallString<128> PathData; - StringRef Path = (Config->Sysroot + S).toStringRef(PathData); - if (sys::fs::exists(Path)) { - Driver->addFile(Saver.save(Path)); - return; - } - } - - if (sys::path::is_absolute(S)) { - Driver->addFile(S); - } else if (S.startswith("=")) { - if (Config->Sysroot.empty()) - Driver->addFile(S.substr(1)); - else - Driver->addFile(Saver.save(Config->Sysroot + "/" + S.substr(1))); - } else if (S.startswith("-l")) { - Driver->addLibrary(S.substr(2)); - } else if (sys::fs::exists(S)) { - Driver->addFile(S); - } else { - if (Optional<std::string> Path = findFromSearchPaths(S)) - Driver->addFile(Saver.save(*Path)); - else - setError("unable to find " + S); - } -} - -void ScriptParser::readAsNeeded() { - expect("("); - bool Orig = Config->AsNeeded; - Config->AsNeeded = true; - while (!Error && !consume(")")) - addFile(unquote(next())); - Config->AsNeeded = Orig; -} - -void ScriptParser::readEntry() { - // -e <symbol> takes predecence over ENTRY(<symbol>). - expect("("); - StringRef Tok = next(); - if (Config->Entry.empty()) - Config->Entry = Tok; - expect(")"); -} - -void ScriptParser::readExtern() { - expect("("); - while (!Error && !consume(")")) - Config->Undefined.push_back(next()); -} - -void ScriptParser::readGroup() { - expect("("); - while (!Error && !consume(")")) { - StringRef Tok = next(); - if (Tok == "AS_NEEDED") - readAsNeeded(); - else - addFile(unquote(Tok)); - } -} - -void ScriptParser::readInclude() { - StringRef Tok = unquote(next()); - - // https://sourceware.org/binutils/docs/ld/File-Commands.html: - // The file will be searched for in the current directory, and in any - // directory specified with the -L option. - if (sys::fs::exists(Tok)) { - if (Optional<MemoryBufferRef> MB = readFile(Tok)) - tokenize(*MB); - return; - } - if (Optional<std::string> Path = findFromSearchPaths(Tok)) { - if (Optional<MemoryBufferRef> MB = readFile(*Path)) - tokenize(*MB); - return; - } - setError("cannot open " + Tok); -} - -void ScriptParser::readOutput() { - // -o <file> takes predecence over OUTPUT(<file>). - expect("("); - StringRef Tok = next(); - if (Config->OutputFile.empty()) - Config->OutputFile = unquote(Tok); - expect(")"); -} - -void ScriptParser::readOutputArch() { - // Error checking only for now. - expect("("); - skip(); - expect(")"); -} - -void ScriptParser::readOutputFormat() { - // Error checking only for now. - expect("("); - skip(); - StringRef Tok = next(); - if (Tok == ")") - return; - if (Tok != ",") { - setError("unexpected token: " + Tok); - return; - } - skip(); - expect(","); - skip(); - expect(")"); -} - -void ScriptParser::readPhdrs() { - expect("{"); - while (!Error && !consume("}")) { - StringRef Tok = next(); - Opt.PhdrsCommands.push_back( - {Tok, PT_NULL, false, false, UINT_MAX, nullptr}); - PhdrsCommand &PhdrCmd = Opt.PhdrsCommands.back(); - - PhdrCmd.Type = readPhdrType(); - do { - Tok = next(); - if (Tok == ";") - break; - if (Tok == "FILEHDR") - PhdrCmd.HasFilehdr = true; - else if (Tok == "PHDRS") - PhdrCmd.HasPhdrs = true; - else if (Tok == "AT") - PhdrCmd.LMAExpr = readParenExpr(); - else if (Tok == "FLAGS") { - expect("("); - // Passing 0 for the value of dot is a bit of a hack. It means that - // we accept expressions like ".|1". - PhdrCmd.Flags = readExpr()(0); - expect(")"); - } else - setError("unexpected header attribute: " + Tok); - } while (!Error); - } -} - -void ScriptParser::readSearchDir() { - expect("("); - StringRef Tok = next(); - if (!Config->Nostdlib) - Config->SearchPaths.push_back(unquote(Tok)); - expect(")"); -} - -void ScriptParser::readSections() { - Opt.HasSections = true; - // -no-rosegment is used to avoid placing read only non-executable sections in - // their own segment. We do the same if SECTIONS command is present in linker - // script. See comment for computeFlags(). - Config->SingleRoRx = true; - - expect("{"); - while (!Error && !consume("}")) { - StringRef Tok = next(); - BaseCommand *Cmd = readProvideOrAssignment(Tok); - if (!Cmd) { - if (Tok == "ASSERT") - Cmd = new AssertCommand(readAssert()); - else - Cmd = readOutputSectionDescription(Tok); - } - Opt.Commands.emplace_back(Cmd); - } -} - -static int precedence(StringRef Op) { - return StringSwitch<int>(Op) - .Cases("*", "/", 5) - .Cases("+", "-", 4) - .Cases("<<", ">>", 3) - .Cases("<", "<=", ">", ">=", "==", "!=", 2) - .Cases("&", "|", 1) - .Default(-1); -} - -StringMatcher ScriptParser::readFilePatterns() { - std::vector<StringRef> V; - while (!Error && !consume(")")) - V.push_back(next()); - return StringMatcher(V); -} - -SortSectionPolicy ScriptParser::readSortKind() { - if (consume("SORT") || consume("SORT_BY_NAME")) - return SortSectionPolicy::Name; - if (consume("SORT_BY_ALIGNMENT")) - return SortSectionPolicy::Alignment; - if (consume("SORT_BY_INIT_PRIORITY")) - return SortSectionPolicy::Priority; - if (consume("SORT_NONE")) - return SortSectionPolicy::None; - return SortSectionPolicy::Default; -} - -// Method reads a list of sequence of excluded files and section globs given in -// a following form: ((EXCLUDE_FILE(file_pattern+))? section_pattern+)+ -// Example: *(.foo.1 EXCLUDE_FILE (*a.o) .foo.2 EXCLUDE_FILE (*b.o) .foo.3) -// The semantics of that is next: -// * Include .foo.1 from every file. -// * Include .foo.2 from every file but a.o -// * Include .foo.3 from every file but b.o -std::vector<SectionPattern> ScriptParser::readInputSectionsList() { - std::vector<SectionPattern> Ret; - while (!Error && peek() != ")") { - StringMatcher ExcludeFilePat; - if (consume("EXCLUDE_FILE")) { - expect("("); - ExcludeFilePat = readFilePatterns(); - } - - std::vector<StringRef> V; - while (!Error && peek() != ")" && peek() != "EXCLUDE_FILE") - V.push_back(next()); - - if (!V.empty()) - Ret.push_back({std::move(ExcludeFilePat), StringMatcher(V)}); - else - setError("section pattern is expected"); - } - return Ret; -} - -// Reads contents of "SECTIONS" directive. That directive contains a -// list of glob patterns for input sections. The grammar is as follows. -// -// <patterns> ::= <section-list> -// | <sort> "(" <section-list> ")" -// | <sort> "(" <sort> "(" <section-list> ")" ")" -// -// <sort> ::= "SORT" | "SORT_BY_NAME" | "SORT_BY_ALIGNMENT" -// | "SORT_BY_INIT_PRIORITY" | "SORT_NONE" -// -// <section-list> is parsed by readInputSectionsList(). -InputSectionDescription * -ScriptParser::readInputSectionRules(StringRef FilePattern) { - auto *Cmd = new InputSectionDescription(FilePattern); - expect("("); - while (!Error && !consume(")")) { - SortSectionPolicy Outer = readSortKind(); - SortSectionPolicy Inner = SortSectionPolicy::Default; - std::vector<SectionPattern> V; - if (Outer != SortSectionPolicy::Default) { - expect("("); - Inner = readSortKind(); - if (Inner != SortSectionPolicy::Default) { - expect("("); - V = readInputSectionsList(); - expect(")"); - } else { - V = readInputSectionsList(); - } - expect(")"); - } else { - V = readInputSectionsList(); - } - - for (SectionPattern &Pat : V) { - Pat.SortInner = Inner; - Pat.SortOuter = Outer; - } - - std::move(V.begin(), V.end(), std::back_inserter(Cmd->SectionPatterns)); - } - return Cmd; -} - -InputSectionDescription * -ScriptParser::readInputSectionDescription(StringRef Tok) { - // Input section wildcard can be surrounded by KEEP. - // https://sourceware.org/binutils/docs/ld/Input-Section-Keep.html#Input-Section-Keep - if (Tok == "KEEP") { - expect("("); - StringRef FilePattern = next(); - InputSectionDescription *Cmd = readInputSectionRules(FilePattern); - expect(")"); - Opt.KeptSections.push_back(Cmd); - return Cmd; - } - return readInputSectionRules(Tok); -} - -void ScriptParser::readSort() { - expect("("); - expect("CONSTRUCTORS"); - expect(")"); -} - -Expr ScriptParser::readAssert() { - expect("("); - Expr E = readExpr(); - expect(","); - StringRef Msg = unquote(next()); - expect(")"); - return [=](uint64_t Dot) { - uint64_t V = E(Dot); - if (!V) - error(Msg); - return V; - }; -} - -// Reads a FILL(expr) command. We handle the FILL command as an -// alias for =fillexp section attribute, which is different from -// what GNU linkers do. -// https://sourceware.org/binutils/docs/ld/Output-Section-Data.html -uint32_t ScriptParser::readFill() { - expect("("); - uint32_t V = readOutputSectionFiller(next()); - expect(")"); - expect(";"); - return V; -} - -OutputSectionCommand * -ScriptParser::readOutputSectionDescription(StringRef OutSec) { - OutputSectionCommand *Cmd = new OutputSectionCommand(OutSec); - Cmd->Location = getCurrentLocation(); - - // Read an address expression. - // https://sourceware.org/binutils/docs/ld/Output-Section-Address.html#Output-Section-Address - if (peek() != ":") - Cmd->AddrExpr = readExpr(); - - expect(":"); - - if (consume("AT")) - Cmd->LMAExpr = readParenExpr(); - if (consume("ALIGN")) - Cmd->AlignExpr = readParenExpr(); - if (consume("SUBALIGN")) - Cmd->SubalignExpr = readParenExpr(); - - // Parse constraints. - if (consume("ONLY_IF_RO")) - Cmd->Constraint = ConstraintKind::ReadOnly; - if (consume("ONLY_IF_RW")) - Cmd->Constraint = ConstraintKind::ReadWrite; - expect("{"); - - while (!Error && !consume("}")) { - StringRef Tok = next(); - if (SymbolAssignment *Assignment = readProvideOrAssignment(Tok)) { - Cmd->Commands.emplace_back(Assignment); - } else if (BytesDataCommand *Data = readBytesDataCommand(Tok)) { - Cmd->Commands.emplace_back(Data); - } else if (Tok == "ASSERT") { - Cmd->Commands.emplace_back(new AssertCommand(readAssert())); - expect(";"); - } else if (Tok == "FILL") { - Cmd->Filler = readFill(); - } else if (Tok == "SORT") { - readSort(); - } else if (peek() == "(") { - Cmd->Commands.emplace_back(readInputSectionDescription(Tok)); - } else { - setError("unknown command " + Tok); - } - } - Cmd->Phdrs = readOutputSectionPhdrs(); - - if (consume("=")) - Cmd->Filler = readOutputSectionFiller(next()); - else if (peek().startswith("=")) - Cmd->Filler = readOutputSectionFiller(next().drop_front()); - - return Cmd; -} - -// Read "=<number>" where <number> is an octal/decimal/hexadecimal number. -// https://sourceware.org/binutils/docs/ld/Output-Section-Fill.html -// -// ld.gold is not fully compatible with ld.bfd. ld.bfd handles -// hexstrings as blobs of arbitrary sizes, while ld.gold handles them -// as 32-bit big-endian values. We will do the same as ld.gold does -// because it's simpler than what ld.bfd does. -uint32_t ScriptParser::readOutputSectionFiller(StringRef Tok) { - uint32_t V; - if (!Tok.getAsInteger(0, V)) - return V; - setError("invalid filler expression: " + Tok); - return 0; -} - -SymbolAssignment *ScriptParser::readProvideHidden(bool Provide, bool Hidden) { - expect("("); - SymbolAssignment *Cmd = readAssignment(next()); - Cmd->Provide = Provide; - Cmd->Hidden = Hidden; - expect(")"); - expect(";"); - return Cmd; -} - -SymbolAssignment *ScriptParser::readProvideOrAssignment(StringRef Tok) { - SymbolAssignment *Cmd = nullptr; - if (peek() == "=" || peek() == "+=") { - Cmd = readAssignment(Tok); - expect(";"); - } else if (Tok == "PROVIDE") { - Cmd = readProvideHidden(true, false); - } else if (Tok == "HIDDEN") { - Cmd = readProvideHidden(false, true); - } else if (Tok == "PROVIDE_HIDDEN") { - Cmd = readProvideHidden(true, true); - } - return Cmd; -} - -static uint64_t getSymbolValue(const Twine &Loc, StringRef S, uint64_t Dot) { - if (S == ".") - return Dot; - return ScriptBase->getSymbolValue(Loc, S); -} - -static bool isAbsolute(StringRef S) { - if (S == ".") - return false; - return ScriptBase->isAbsolute(S); -} - -SymbolAssignment *ScriptParser::readAssignment(StringRef Name) { - StringRef Op = next(); - Expr E; - assert(Op == "=" || Op == "+="); - if (consume("ABSOLUTE")) { - // The RHS may be something like "ABSOLUTE(.) & 0xff". - // Call readExpr1 to read the whole expression. - E = readExpr1(readParenExpr(), 0); - E.IsAbsolute = [] { return true; }; - } else { - E = readExpr(); - } - if (Op == "+=") { - std::string Loc = getCurrentLocation(); - E = [=](uint64_t Dot) { - return getSymbolValue(Loc, Name, Dot) + E(Dot); - }; - } - return new SymbolAssignment(Name, E); -} - -// This is an operator-precedence parser to parse a linker -// script expression. -Expr ScriptParser::readExpr() { return readExpr1(readPrimary(), 0); } - -static Expr combine(StringRef Op, Expr L, Expr R) { - if (Op == "*") - return [=](uint64_t Dot) { return L(Dot) * R(Dot); }; - if (Op == "/") { - return [=](uint64_t Dot) -> uint64_t { - uint64_t RHS = R(Dot); - if (RHS == 0) { - error("division by zero"); - return 0; - } - return L(Dot) / RHS; - }; - } - if (Op == "+") - return {[=](uint64_t Dot) { return L(Dot) + R(Dot); }, - [=] { return L.IsAbsolute() && R.IsAbsolute(); }, - [=] { - const OutputSectionBase *S = L.Section(); - return S ? S : R.Section(); - }}; - if (Op == "-") - return [=](uint64_t Dot) { return L(Dot) - R(Dot); }; - if (Op == "<<") - return [=](uint64_t Dot) { return L(Dot) << R(Dot); }; - if (Op == ">>") - return [=](uint64_t Dot) { return L(Dot) >> R(Dot); }; - if (Op == "<") - return [=](uint64_t Dot) { return L(Dot) < R(Dot); }; - if (Op == ">") - return [=](uint64_t Dot) { return L(Dot) > R(Dot); }; - if (Op == ">=") - return [=](uint64_t Dot) { return L(Dot) >= R(Dot); }; - if (Op == "<=") - return [=](uint64_t Dot) { return L(Dot) <= R(Dot); }; - if (Op == "==") - return [=](uint64_t Dot) { return L(Dot) == R(Dot); }; - if (Op == "!=") - return [=](uint64_t Dot) { return L(Dot) != R(Dot); }; - if (Op == "&") - return [=](uint64_t Dot) { return L(Dot) & R(Dot); }; - if (Op == "|") - return [=](uint64_t Dot) { return L(Dot) | R(Dot); }; - llvm_unreachable("invalid operator"); -} - -// This is a part of the operator-precedence parser. This function -// assumes that the remaining token stream starts with an operator. -Expr ScriptParser::readExpr1(Expr Lhs, int MinPrec) { - while (!atEOF() && !Error) { - // Read an operator and an expression. - if (consume("?")) - return readTernary(Lhs); - StringRef Op1 = peek(); - if (precedence(Op1) < MinPrec) - break; - skip(); - Expr Rhs = readPrimary(); - - // Evaluate the remaining part of the expression first if the - // next operator has greater precedence than the previous one. - // For example, if we have read "+" and "3", and if the next - // operator is "*", then we'll evaluate 3 * ... part first. - while (!atEOF()) { - StringRef Op2 = peek(); - if (precedence(Op2) <= precedence(Op1)) - break; - Rhs = readExpr1(Rhs, precedence(Op2)); - } - - Lhs = combine(Op1, Lhs, Rhs); - } - return Lhs; -} - -uint64_t static getConstant(StringRef S) { - if (S == "COMMONPAGESIZE") - return Target->PageSize; - if (S == "MAXPAGESIZE") - return Config->MaxPageSize; - error("unknown constant: " + S); - return 0; -} - -// Parses Tok as an integer. Returns true if successful. -// It recognizes hexadecimal (prefixed with "0x" or suffixed with "H") -// and decimal numbers. Decimal numbers may have "K" (kilo) or -// "M" (mega) prefixes. -static bool readInteger(StringRef Tok, uint64_t &Result) { - // Negative number - if (Tok.startswith("-")) { - if (!readInteger(Tok.substr(1), Result)) - return false; - Result = -Result; - return true; - } - - // Hexadecimal - if (Tok.startswith_lower("0x")) - return !Tok.substr(2).getAsInteger(16, Result); - if (Tok.endswith_lower("H")) - return !Tok.drop_back().getAsInteger(16, Result); - - // Decimal - int Suffix = 1; - if (Tok.endswith_lower("K")) { - Suffix = 1024; - Tok = Tok.drop_back(); - } else if (Tok.endswith_lower("M")) { - Suffix = 1024 * 1024; - Tok = Tok.drop_back(); - } - if (Tok.getAsInteger(10, Result)) - return false; - Result *= Suffix; - return true; -} - -BytesDataCommand *ScriptParser::readBytesDataCommand(StringRef Tok) { - int Size = StringSwitch<unsigned>(Tok) - .Case("BYTE", 1) - .Case("SHORT", 2) - .Case("LONG", 4) - .Case("QUAD", 8) - .Default(-1); - if (Size == -1) - return nullptr; - - return new BytesDataCommand(readParenExpr(), Size); -} - -StringRef ScriptParser::readParenLiteral() { - expect("("); - StringRef Tok = next(); - expect(")"); - return Tok; -} - -Expr ScriptParser::readPrimary() { - if (peek() == "(") - return readParenExpr(); - - StringRef Tok = next(); - std::string Location = getCurrentLocation(); - - if (Tok == "~") { - Expr E = readPrimary(); - return [=](uint64_t Dot) { return ~E(Dot); }; - } - if (Tok == "-") { - Expr E = readPrimary(); - return [=](uint64_t Dot) { return -E(Dot); }; - } - - // Built-in functions are parsed here. - // https://sourceware.org/binutils/docs/ld/Builtin-Functions.html. - if (Tok == "ADDR") { - StringRef Name = readParenLiteral(); - return {[=](uint64_t Dot) { - return ScriptBase->getOutputSection(Location, Name)->Addr; - }, - [=] { return false; }, - [=] { return ScriptBase->getOutputSection(Location, Name); }}; - } - if (Tok == "LOADADDR") { - StringRef Name = readParenLiteral(); - return [=](uint64_t Dot) { - return ScriptBase->getOutputSection(Location, Name)->getLMA(); - }; - } - if (Tok == "ASSERT") - return readAssert(); - if (Tok == "ALIGN") { - expect("("); - Expr E = readExpr(); - if (consume(",")) { - Expr E2 = readExpr(); - expect(")"); - return [=](uint64_t Dot) { return alignTo(E(Dot), E2(Dot)); }; - } - expect(")"); - return [=](uint64_t Dot) { return alignTo(Dot, E(Dot)); }; - } - if (Tok == "CONSTANT") { - StringRef Name = readParenLiteral(); - return [=](uint64_t Dot) { return getConstant(Name); }; - } - if (Tok == "DEFINED") { - StringRef Name = readParenLiteral(); - return [=](uint64_t Dot) { return ScriptBase->isDefined(Name) ? 1 : 0; }; - } - if (Tok == "SEGMENT_START") { - expect("("); - skip(); - expect(","); - Expr E = readExpr(); - expect(")"); - return [=](uint64_t Dot) { return E(Dot); }; - } - if (Tok == "DATA_SEGMENT_ALIGN") { - expect("("); - Expr E = readExpr(); - expect(","); - readExpr(); - expect(")"); - return [=](uint64_t Dot) { return alignTo(Dot, E(Dot)); }; - } - if (Tok == "DATA_SEGMENT_END") { - expect("("); - expect("."); - expect(")"); - return [](uint64_t Dot) { return Dot; }; - } - // GNU linkers implements more complicated logic to handle - // DATA_SEGMENT_RELRO_END. We instead ignore the arguments and just align to - // the next page boundary for simplicity. - if (Tok == "DATA_SEGMENT_RELRO_END") { - expect("("); - readExpr(); - expect(","); - readExpr(); - expect(")"); - return [](uint64_t Dot) { return alignTo(Dot, Target->PageSize); }; - } - if (Tok == "SIZEOF") { - StringRef Name = readParenLiteral(); - return [=](uint64_t Dot) { return ScriptBase->getOutputSectionSize(Name); }; - } - if (Tok == "ALIGNOF") { - StringRef Name = readParenLiteral(); - return [=](uint64_t Dot) { - return ScriptBase->getOutputSection(Location, Name)->Addralign; - }; - } - if (Tok == "SIZEOF_HEADERS") - return [=](uint64_t Dot) { return ScriptBase->getHeaderSize(); }; - - // Tok is a literal number. - uint64_t V; - if (readInteger(Tok, V)) - return [=](uint64_t Dot) { return V; }; - - // Tok is a symbol name. - if (Tok != "." && !isValidCIdentifier(Tok)) - setError("malformed number: " + Tok); - return {[=](uint64_t Dot) { return getSymbolValue(Location, Tok, Dot); }, - [=] { return isAbsolute(Tok); }, - [=] { return ScriptBase->getSymbolSection(Tok); }}; -} - -Expr ScriptParser::readTernary(Expr Cond) { - Expr L = readExpr(); - expect(":"); - Expr R = readExpr(); - return [=](uint64_t Dot) { return Cond(Dot) ? L(Dot) : R(Dot); }; -} - -Expr ScriptParser::readParenExpr() { - expect("("); - Expr E = readExpr(); - expect(")"); - return E; -} - -std::vector<StringRef> ScriptParser::readOutputSectionPhdrs() { - std::vector<StringRef> Phdrs; - while (!Error && peek().startswith(":")) { - StringRef Tok = next(); - Phdrs.push_back((Tok.size() == 1) ? next() : Tok.substr(1)); - } - return Phdrs; -} - -// Read a program header type name. The next token must be a -// name of a program header type or a constant (e.g. "0x3"). -unsigned ScriptParser::readPhdrType() { - StringRef Tok = next(); - uint64_t Val; - if (readInteger(Tok, Val)) - return Val; - - unsigned Ret = StringSwitch<unsigned>(Tok) - .Case("PT_NULL", PT_NULL) - .Case("PT_LOAD", PT_LOAD) - .Case("PT_DYNAMIC", PT_DYNAMIC) - .Case("PT_INTERP", PT_INTERP) - .Case("PT_NOTE", PT_NOTE) - .Case("PT_SHLIB", PT_SHLIB) - .Case("PT_PHDR", PT_PHDR) - .Case("PT_TLS", PT_TLS) - .Case("PT_GNU_EH_FRAME", PT_GNU_EH_FRAME) - .Case("PT_GNU_STACK", PT_GNU_STACK) - .Case("PT_GNU_RELRO", PT_GNU_RELRO) - .Case("PT_OPENBSD_RANDOMIZE", PT_OPENBSD_RANDOMIZE) - .Case("PT_OPENBSD_WXNEEDED", PT_OPENBSD_WXNEEDED) - .Case("PT_OPENBSD_BOOTDATA", PT_OPENBSD_BOOTDATA) - .Default(-1); - - if (Ret == (unsigned)-1) { - setError("invalid program header type: " + Tok); - return PT_NULL; - } - return Ret; -} - -// Reads a list of symbols, e.g. "{ global: foo; bar; local: *; };". -void ScriptParser::readAnonymousDeclaration() { - // Read global symbols first. "global:" is default, so if there's - // no label, we assume global symbols. - if (consume("global:") || peek() != "local:") - Config->VersionScriptGlobals = readSymbols(); - - readLocals(); - expect("}"); - expect(";"); -} - -void ScriptParser::readLocals() { - if (!consume("local:")) - return; - std::vector<SymbolVersion> Locals = readSymbols(); - for (SymbolVersion V : Locals) { - if (V.Name == "*") { - Config->DefaultSymbolVersion = VER_NDX_LOCAL; - continue; - } - Config->VersionScriptLocals.push_back(V); - } -} - -// Reads a list of symbols, e.g. "VerStr { global: foo; bar; local: *; };". -void ScriptParser::readVersionDeclaration(StringRef VerStr) { - // Identifiers start at 2 because 0 and 1 are reserved - // for VER_NDX_LOCAL and VER_NDX_GLOBAL constants. - uint16_t VersionId = Config->VersionDefinitions.size() + 2; - Config->VersionDefinitions.push_back({VerStr, VersionId}); - - // Read global symbols. - if (consume("global:") || peek() != "local:") - Config->VersionDefinitions.back().Globals = readSymbols(); - - readLocals(); - expect("}"); - - // Each version may have a parent version. For example, "Ver2" - // defined as "Ver2 { global: foo; local: *; } Ver1;" has "Ver1" - // as a parent. This version hierarchy is, probably against your - // instinct, purely for hint; the runtime doesn't care about it - // at all. In LLD, we simply ignore it. - if (peek() != ";") - skip(); - expect(";"); -} - -// Reads a list of symbols for a versions cript. -std::vector<SymbolVersion> ScriptParser::readSymbols() { - std::vector<SymbolVersion> Ret; - for (;;) { - if (consume("extern")) { - for (SymbolVersion V : readVersionExtern()) - Ret.push_back(V); - continue; - } - - if (peek() == "}" || peek() == "local:" || Error) - break; - StringRef Tok = next(); - Ret.push_back({unquote(Tok), false, hasWildcard(Tok)}); - expect(";"); - } - return Ret; -} - -// Reads an "extern C++" directive, e.g., -// "extern "C++" { ns::*; "f(int, double)"; };" -std::vector<SymbolVersion> ScriptParser::readVersionExtern() { - StringRef Tok = next(); - bool IsCXX = Tok == "\"C++\""; - if (!IsCXX && Tok != "\"C\"") - setError("Unknown language"); - expect("{"); - - std::vector<SymbolVersion> Ret; - while (!Error && peek() != "}") { - StringRef Tok = next(); - bool HasWildcard = !Tok.startswith("\"") && hasWildcard(Tok); - Ret.push_back({unquote(Tok), IsCXX, HasWildcard}); - expect(";"); - } - - expect("}"); - expect(";"); - return Ret; -} - -void elf::readLinkerScript(MemoryBufferRef MB) { - ScriptParser(MB).readLinkerScript(); -} - -void elf::readVersionScript(MemoryBufferRef MB) { - ScriptParser(MB).readVersionScript(); -} - -void elf::readDynamicList(MemoryBufferRef MB) { - ScriptParser(MB).readDynamicList(); -} - -template class elf::LinkerScript<ELF32LE>; -template class elf::LinkerScript<ELF32BE>; -template class elf::LinkerScript<ELF64LE>; -template class elf::LinkerScript<ELF64BE>; diff --git a/ELF/LinkerScript.h b/ELF/LinkerScript.h index 505162f0ab43..04a388efb4e9 100644 --- a/ELF/LinkerScript.h +++ b/ELF/LinkerScript.h @@ -15,6 +15,7 @@ #include "Writer.h" #include "lld/Core/LLVM.h" #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/MemoryBuffer.h" @@ -28,45 +29,32 @@ namespace lld { namespace elf { class DefinedCommon; -class ScriptParser; class SymbolBody; -template <class ELFT> class InputSectionBase; -template <class ELFT> class InputSection; -class OutputSectionBase; -template <class ELFT> class OutputSectionFactory; -class InputSectionData; +class InputSectionBase; +class InputSection; +class OutputSection; +class OutputSectionFactory; +class InputSectionBase; +class SectionBase; + +struct ExprValue { + SectionBase *Sec; + uint64_t Val; + bool ForceAbsolute; + + ExprValue(SectionBase *Sec, bool ForceAbsolute, uint64_t Val) + : Sec(Sec), Val(Val), ForceAbsolute(ForceAbsolute) {} + ExprValue(SectionBase *Sec, uint64_t Val) : ExprValue(Sec, false, Val) {} + ExprValue(uint64_t Val) : ExprValue(nullptr, Val) {} + bool isAbsolute() const { return ForceAbsolute || Sec == nullptr; } + uint64_t getValue() const; + uint64_t getSecAddr() const; +}; // This represents an expression in the linker script. // ScriptParser::readExpr reads an expression and returns an Expr. -// Later, we evaluate the expression by calling the function -// with the value of special context variable ".". -struct Expr { - std::function<uint64_t(uint64_t)> Val; - std::function<bool()> IsAbsolute; - - // If expression is section-relative the function below is used - // to get the output section pointer. - std::function<const OutputSectionBase *()> Section; - - uint64_t operator()(uint64_t Dot) const { return Val(Dot); } - operator bool() const { return (bool)Val; } - - Expr(std::function<uint64_t(uint64_t)> Val, std::function<bool()> IsAbsolute, - std::function<const OutputSectionBase *()> Section) - : Val(Val), IsAbsolute(IsAbsolute), Section(Section) {} - template <typename T> - Expr(T V) : Expr(V, [] { return true; }, [] { return nullptr; }) {} - Expr() : Expr(nullptr) {} -}; - -// Parses a linker script. Calling this function updates -// Config and ScriptConfig. -void readLinkerScript(MemoryBufferRef MB); - -// Parses a version script. -void readVersionScript(MemoryBufferRef MB); - -void readDynamicList(MemoryBufferRef MB); +// Later, we evaluate the expression by calling the function. +typedef std::function<ExprValue()> Expr; // This enum is used to implement linker script SECTIONS command. // https://sourceware.org/binutils/docs/ld/SECTIONS.html#SECTIONS @@ -80,16 +68,13 @@ enum SectionsCommandKind { struct BaseCommand { BaseCommand(int K) : Kind(K) {} - - virtual ~BaseCommand() = default; - int Kind; }; // This represents ". = <expr>" or "<symbol> = <expr>". struct SymbolAssignment : BaseCommand { - SymbolAssignment(StringRef Name, Expr E) - : BaseCommand(AssignmentKind), Name(Name), Expression(E) {} + SymbolAssignment(StringRef Name, Expr E, std::string Loc) + : BaseCommand(AssignmentKind), Name(Name), Expression(E), Location(Loc) {} static bool classof(const BaseCommand *C); @@ -103,6 +88,9 @@ struct SymbolAssignment : BaseCommand { // Command attributes for PROVIDE, HIDDEN and PROVIDE_HIDDEN. bool Provide = false; bool Hidden = false; + + // Holds file name and line number for error reporting. + std::string Location; }; // Linker scripts allow additional constraints to be put on ouput sections. @@ -111,22 +99,37 @@ struct SymbolAssignment : BaseCommand { // with ONLY_IF_RW is created if all input sections are RW. enum class ConstraintKind { NoConstraint, ReadOnly, ReadWrite }; +// This struct is used to represent the location and size of regions of +// target memory. Instances of the struct are created by parsing the +// MEMORY command. +struct MemoryRegion { + std::string Name; + uint64_t Origin; + uint64_t Length; + uint64_t Offset; + uint32_t Flags; + uint32_t NegFlags; +}; + struct OutputSectionCommand : BaseCommand { OutputSectionCommand(StringRef Name) : BaseCommand(OutputSectionKind), Name(Name) {} static bool classof(const BaseCommand *C); + OutputSection *Sec = nullptr; + MemoryRegion *MemRegion = nullptr; StringRef Name; Expr AddrExpr; Expr AlignExpr; Expr LMAExpr; Expr SubalignExpr; - std::vector<std::unique_ptr<BaseCommand>> Commands; + std::vector<BaseCommand *> Commands; std::vector<StringRef> Phdrs; - uint32_t Filler = 0; + llvm::Optional<uint32_t> Filler; ConstraintKind Constraint = ConstraintKind::NoConstraint; std::string Location; + std::string MemoryRegionName; }; // This struct represents one section match pattern in SECTIONS() command. @@ -154,7 +157,7 @@ struct InputSectionDescription : BaseCommand { // will be associated with this InputSectionDescription. std::vector<SectionPattern> SectionPatterns; - std::vector<InputSectionData *> Sections; + std::vector<InputSectionBase *> Sections; }; // Represents an ASSERT(). @@ -187,25 +190,10 @@ struct PhdrsCommand { Expr LMAExpr; }; -class LinkerScriptBase { -protected: - ~LinkerScriptBase() = default; - -public: - virtual uint64_t getHeaderSize() = 0; - virtual uint64_t getSymbolValue(const Twine &Loc, StringRef S) = 0; - virtual bool isDefined(StringRef S) = 0; - virtual bool isAbsolute(StringRef S) = 0; - virtual const OutputSectionBase *getSymbolSection(StringRef S) = 0; - virtual const OutputSectionBase *getOutputSection(const Twine &Loc, - StringRef S) = 0; - virtual uint64_t getOutputSectionSize(StringRef S) = 0; -}; - // ScriptConfiguration holds linker script parse results. struct ScriptConfiguration { // Used to assign addresses to sections. - std::vector<std::unique_ptr<BaseCommand>> Commands; + std::vector<BaseCommand *> Commands; // Used to assign sections to headers. std::vector<PhdrsCommand> PhdrsCommands; @@ -215,20 +203,60 @@ struct ScriptConfiguration { // List of section patterns specified with KEEP commands. They will // be kept even if they are unused and --gc-sections is specified. std::vector<InputSectionDescription *> KeptSections; + + // A map from memory region name to a memory region descriptor. + llvm::DenseMap<llvm::StringRef, MemoryRegion> MemoryRegions; + + // A list of symbols referenced by the script. + std::vector<llvm::StringRef> ReferencedSymbols; }; -extern ScriptConfiguration *ScriptConfig; +class LinkerScript { +protected: + void assignSymbol(SymbolAssignment *Cmd, bool InSec); + void setDot(Expr E, const Twine &Loc, bool InSec); + + std::vector<InputSectionBase *> + computeInputSections(const InputSectionDescription *); + + std::vector<InputSectionBase *> + createInputSectionList(OutputSectionCommand &Cmd); + + std::vector<size_t> getPhdrIndices(StringRef SectionName); + size_t getPhdrIndex(const Twine &Loc, StringRef PhdrName); + + MemoryRegion *findMemoryRegion(OutputSectionCommand *Cmd); + + void switchTo(OutputSection *Sec); + void flush(); + void output(InputSection *Sec); + void process(BaseCommand &Base); -// This is a runner of the linker script. -template <class ELFT> class LinkerScript final : public LinkerScriptBase { - typedef typename ELFT::uint uintX_t; + OutputSection *Aether; + bool ErrorOnMissingSection = false; + + uint64_t Dot; + uint64_t ThreadBssOffset = 0; + + std::function<uint64_t()> LMAOffset; + OutputSection *CurOutSec = nullptr; + MemoryRegion *CurMemRegion = nullptr; + + llvm::DenseSet<OutputSection *> AlreadyOutputOS; + llvm::DenseSet<InputSectionBase *> AlreadyOutputIS; public: - LinkerScript(); - ~LinkerScript(); + bool hasPhdrsCommands() { return !Opt.PhdrsCommands.empty(); } + uint64_t getDot() { return Dot; } + OutputSection *getOutputSection(const Twine &Loc, StringRef S); + uint64_t getOutputSectionSize(StringRef S); + void discard(ArrayRef<InputSectionBase *> V); - void processCommands(OutputSectionFactory<ELFT> &Factory); - void addOrphanSections(OutputSectionFactory<ELFT> &Factory); + ExprValue getSymbolValue(const Twine &Loc, StringRef S); + bool isDefined(StringRef S); + + std::vector<OutputSection *> *OutputSections; + void addOrphanSections(OutputSectionFactory &Factory); void removeEmptyCommands(); void adjustSectionsBeforeSorting(); void adjustSectionsAfterSorting(); @@ -236,61 +264,24 @@ public: std::vector<PhdrEntry> createPhdrs(); bool ignoreInterpSection(); - uint32_t getFiller(StringRef Name); - void writeDataBytes(StringRef Name, uint8_t *Buf); + llvm::Optional<uint32_t> getFiller(StringRef Name); bool hasLMA(StringRef Name); - bool shouldKeep(InputSectionBase<ELFT> *S); + bool shouldKeep(InputSectionBase *S); void assignOffsets(OutputSectionCommand *Cmd); void placeOrphanSections(); + void processNonSectionCommands(); void assignAddresses(std::vector<PhdrEntry> &Phdrs); - bool hasPhdrsCommands(); - uint64_t getHeaderSize() override; - uint64_t getSymbolValue(const Twine &Loc, StringRef S) override; - bool isDefined(StringRef S) override; - bool isAbsolute(StringRef S) override; - const OutputSectionBase *getSymbolSection(StringRef S) override; - const OutputSectionBase *getOutputSection(const Twine &Loc, - StringRef S) override; - uint64_t getOutputSectionSize(StringRef S) override; - - std::vector<OutputSectionBase *> *OutputSections; - int getSectionIndex(StringRef Name); -private: - void computeInputSections(InputSectionDescription *); - - void addSection(OutputSectionFactory<ELFT> &Factory, - InputSectionBase<ELFT> *Sec, StringRef Name); - void discard(ArrayRef<InputSectionBase<ELFT> *> V); - - std::vector<InputSectionBase<ELFT> *> - createInputSectionList(OutputSectionCommand &Cmd); - - // "ScriptConfig" is a bit too long, so define a short name for it. - ScriptConfiguration &Opt = *ScriptConfig; - - std::vector<size_t> getPhdrIndices(StringRef SectionName); - size_t getPhdrIndex(const Twine &Loc, StringRef PhdrName); + void writeDataBytes(StringRef Name, uint8_t *Buf); + void addSymbol(SymbolAssignment *Cmd); + void processCommands(OutputSectionFactory &Factory); - uintX_t Dot; - uintX_t LMAOffset = 0; - OutputSectionBase *CurOutSec = nullptr; - uintX_t ThreadBssOffset = 0; - void switchTo(OutputSectionBase *Sec); - void flush(); - void output(InputSection<ELFT> *Sec); - void process(BaseCommand &Base); - llvm::DenseSet<OutputSectionBase *> AlreadyOutputOS; - llvm::DenseSet<InputSectionData *> AlreadyOutputIS; + // Parsed linker script configurations are set to this struct. + ScriptConfiguration Opt; }; -// Variable template is a C++14 feature, so we can't template -// a global variable. Use a struct to workaround. -template <class ELFT> struct Script { static LinkerScript<ELFT> *X; }; -template <class ELFT> LinkerScript<ELFT> *Script<ELFT>::X; - -extern LinkerScriptBase *ScriptBase; +extern LinkerScript *Script; } // end namespace elf } // end namespace lld diff --git a/ELF/MapFile.cpp b/ELF/MapFile.cpp new file mode 100644 index 000000000000..31c8091bb6a1 --- /dev/null +++ b/ELF/MapFile.cpp @@ -0,0 +1,131 @@ +//===- MapFile.cpp --------------------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the -Map option. It shows lists in order and +// hierarchically the output sections, input sections, input files and +// symbol: +// +// Address Size Align Out In File Symbol +// ================================================================= +// 00201000 00000015 4 .text +// 00201000 0000000e 4 .text +// 00201000 0000000e 4 test.o +// 0020100e 00000000 0 local +// 00201005 00000000 0 f(int) +// +//===----------------------------------------------------------------------===// + +#include "MapFile.h" +#include "InputFiles.h" +#include "Strings.h" + +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; +using namespace llvm::object; + +using namespace lld; +using namespace lld::elf; + +static void writeOutSecLine(raw_fd_ostream &OS, int Width, uint64_t Address, + uint64_t Size, uint64_t Align, StringRef Name) { + OS << format("%0*llx %0*llx %5lld ", Width, Address, Width, Size, Align) + << left_justify(Name, 7); +} + +static void writeInSecLine(raw_fd_ostream &OS, int Width, uint64_t Address, + uint64_t Size, uint64_t Align, StringRef Name) { + // Pass an empty name to align the text to the correct column. + writeOutSecLine(OS, Width, Address, Size, Align, ""); + OS << ' ' << left_justify(Name, 7); +} + +static void writeFileLine(raw_fd_ostream &OS, int Width, uint64_t Address, + uint64_t Size, uint64_t Align, StringRef Name) { + // Pass an empty name to align the text to the correct column. + writeInSecLine(OS, Width, Address, Size, Align, ""); + OS << ' ' << left_justify(Name, 7); +} + +static void writeSymbolLine(raw_fd_ostream &OS, int Width, uint64_t Address, + uint64_t Size, StringRef Name) { + // Pass an empty name to align the text to the correct column. + writeFileLine(OS, Width, Address, Size, 0, ""); + OS << ' ' << left_justify(Name, 7); +} + +template <class ELFT> +static void writeInputSection(raw_fd_ostream &OS, const InputSection *IS, + StringRef &PrevName) { + int Width = ELFT::Is64Bits ? 16 : 8; + StringRef Name = IS->Name; + if (Name != PrevName) { + writeInSecLine(OS, Width, IS->OutSec->Addr + IS->OutSecOff, IS->getSize(), + IS->Alignment, Name); + OS << '\n'; + PrevName = Name; + } + + elf::ObjectFile<ELFT> *File = IS->template getFile<ELFT>(); + if (!File) + return; + writeFileLine(OS, Width, IS->OutSec->Addr + IS->OutSecOff, IS->getSize(), + IS->Alignment, toString(File)); + OS << '\n'; + + for (SymbolBody *Sym : File->getSymbols()) { + auto *DR = dyn_cast<DefinedRegular>(Sym); + if (!DR) + continue; + if (DR->Section != IS) + continue; + if (DR->isSection()) + continue; + writeSymbolLine(OS, Width, Sym->getVA(), Sym->getSize<ELFT>(), + toString(*Sym)); + OS << '\n'; + } +} + +template <class ELFT> +static void writeMapFile2(raw_fd_ostream &OS, + ArrayRef<OutputSection *> OutputSections) { + int Width = ELFT::Is64Bits ? 16 : 8; + + OS << left_justify("Address", Width) << ' ' << left_justify("Size", Width) + << " Align Out In File Symbol\n"; + + for (OutputSection *Sec : OutputSections) { + writeOutSecLine(OS, Width, Sec->Addr, Sec->Size, Sec->Alignment, Sec->Name); + OS << '\n'; + + StringRef PrevName = ""; + for (InputSection *IS : Sec->Sections) { + writeInputSection<ELFT>(OS, IS, PrevName); + } + } +} + +template <class ELFT> +void elf::writeMapFile(ArrayRef<OutputSection *> OutputSections) { + if (Config->MapFile.empty()) + return; + + std::error_code EC; + raw_fd_ostream OS(Config->MapFile, EC, sys::fs::F_None); + if (EC) + error("cannot open " + Config->MapFile + ": " + EC.message()); + else + writeMapFile2<ELFT>(OS, OutputSections); +} + +template void elf::writeMapFile<ELF32LE>(ArrayRef<OutputSection *>); +template void elf::writeMapFile<ELF32BE>(ArrayRef<OutputSection *>); +template void elf::writeMapFile<ELF64LE>(ArrayRef<OutputSection *>); +template void elf::writeMapFile<ELF64BE>(ArrayRef<OutputSection *>); diff --git a/ELF/MapFile.h b/ELF/MapFile.h new file mode 100644 index 000000000000..24d636890e53 --- /dev/null +++ b/ELF/MapFile.h @@ -0,0 +1,22 @@ +//===- MapFile.h ------------------------------------------------*- C++ -*-===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_ELF_MAPFILE_H +#define LLD_ELF_MAPFILE_H + +#include "OutputSections.h" + +namespace lld { +namespace elf { +template <class ELFT> +void writeMapFile(llvm::ArrayRef<OutputSection *> OutputSections); +} +} + +#endif diff --git a/ELF/MarkLive.cpp b/ELF/MarkLive.cpp index 8d129fc3ff13..ee499265886e 100644 --- a/ELF/MarkLive.cpp +++ b/ELF/MarkLive.cpp @@ -22,6 +22,7 @@ #include "InputSection.h" #include "LinkerScript.h" +#include "Memory.h" #include "OutputSections.h" #include "Strings.h" #include "SymbolTable.h" @@ -44,51 +45,59 @@ using namespace lld::elf; namespace { // A resolved relocation. The Sec and Offset fields are set if the relocation // was resolved to an offset within a section. -template <class ELFT> struct ResolvedReloc { - InputSectionBase<ELFT> *Sec; - typename ELFT::uint Offset; +struct ResolvedReloc { + InputSectionBase *Sec; + uint64_t Offset; }; } // end anonymous namespace template <class ELFT> -static typename ELFT::uint getAddend(InputSectionBase<ELFT> &Sec, +static typename ELFT::uint getAddend(InputSectionBase &Sec, const typename ELFT::Rel &Rel) { return Target->getImplicitAddend(Sec.Data.begin() + Rel.r_offset, - Rel.getType(Config->Mips64EL)); + Rel.getType(Config->IsMips64EL)); } template <class ELFT> -static typename ELFT::uint getAddend(InputSectionBase<ELFT> &Sec, +static typename ELFT::uint getAddend(InputSectionBase &Sec, const typename ELFT::Rela &Rel) { return Rel.r_addend; } +// There are normally few input sections whose names are valid C +// identifiers, so we just store a std::vector instead of a multimap. +static DenseMap<StringRef, std::vector<InputSectionBase *>> CNamedSections; + template <class ELFT, class RelT> -static ResolvedReloc<ELFT> resolveReloc(InputSectionBase<ELFT> &Sec, - RelT &Rel) { - SymbolBody &B = Sec.getFile()->getRelocTargetSym(Rel); - auto *D = dyn_cast<DefinedRegular<ELFT>>(&B); - if (!D || !D->Section) - return {nullptr, 0}; - typename ELFT::uint Offset = D->Value; - if (D->isSection()) - Offset += getAddend(Sec, Rel); - return {D->Section->Repl, Offset}; +static void resolveReloc(InputSectionBase &Sec, RelT &Rel, + std::function<void(ResolvedReloc)> Fn) { + SymbolBody &B = Sec.getFile<ELFT>()->getRelocTargetSym(Rel); + if (auto *D = dyn_cast<DefinedRegular>(&B)) { + if (!D->Section) + return; + typename ELFT::uint Offset = D->Value; + if (D->isSection()) + Offset += getAddend<ELFT>(Sec, Rel); + Fn({cast<InputSectionBase>(D->Section)->Repl, Offset}); + } else if (auto *U = dyn_cast<Undefined>(&B)) { + for (InputSectionBase *Sec : CNamedSections.lookup(U->getName())) + Fn({Sec, 0}); + } } // Calls Fn for each section that Sec refers to via relocations. template <class ELFT> -static void forEachSuccessor(InputSection<ELFT> &Sec, - std::function<void(ResolvedReloc<ELFT>)> Fn) { +static void forEachSuccessor(InputSection &Sec, + std::function<void(ResolvedReloc)> Fn) { if (Sec.AreRelocsRela) { - for (const typename ELFT::Rela &Rel : Sec.relas()) - Fn(resolveReloc(Sec, Rel)); + for (const typename ELFT::Rela &Rel : Sec.template relas<ELFT>()) + resolveReloc<ELFT>(Sec, Rel, Fn); } else { - for (const typename ELFT::Rel &Rel : Sec.rels()) - Fn(resolveReloc(Sec, Rel)); + for (const typename ELFT::Rel &Rel : Sec.template rels<ELFT>()) + resolveReloc<ELFT>(Sec, Rel, Fn); } - if (Sec.DependentSection) - Fn({Sec.DependentSection, 0}); + for (InputSectionBase *IS : Sec.DependentSections) + Fn({IS, 0}); } // The .eh_frame section is an unfortunate special case. @@ -106,9 +115,8 @@ static void forEachSuccessor(InputSection<ELFT> &Sec, // the gc pass. With that we would be able to also gc some sections holding // LSDAs and personality functions if we found that they were unused. template <class ELFT, class RelTy> -static void -scanEhFrameSection(EhInputSection<ELFT> &EH, ArrayRef<RelTy> Rels, - std::function<void(ResolvedReloc<ELFT>)> Enqueue) { +static void scanEhFrameSection(EhInputSection &EH, ArrayRef<RelTy> Rels, + std::function<void(ResolvedReloc)> Enqueue) { const endianness E = ELFT::TargetEndianness; for (unsigned I = 0, N = EH.Pieces.size(); I < N; ++I) { EhSectionPiece &Piece = EH.Pieces[I]; @@ -118,7 +126,7 @@ scanEhFrameSection(EhInputSection<ELFT> &EH, ArrayRef<RelTy> Rels, if (read32<E>(Piece.data().data() + 4) == 0) { // This is a CIE, we only need to worry about the first relocation. It is // known to point to the personality function. - Enqueue(resolveReloc(EH, Rels[FirstRelI])); + resolveReloc<ELFT>(EH, Rels[FirstRelI], Enqueue); continue; } // This is a FDE. The relocations point to the described function or to @@ -129,37 +137,37 @@ scanEhFrameSection(EhInputSection<ELFT> &EH, ArrayRef<RelTy> Rels, const RelTy &Rel = Rels[I2]; if (Rel.r_offset >= PieceEnd) break; - ResolvedReloc<ELFT> R = resolveReloc(EH, Rels[I2]); - if (!R.Sec || R.Sec == &InputSection<ELFT>::Discarded) - continue; - if (R.Sec->Flags & SHF_EXECINSTR) - continue; - Enqueue({R.Sec, 0}); + resolveReloc<ELFT>(EH, Rels[I2], [&](ResolvedReloc R) { + if (!R.Sec || R.Sec == &InputSection::Discarded) + return; + if (R.Sec->Flags & SHF_EXECINSTR) + return; + Enqueue({R.Sec, 0}); + }); } } } template <class ELFT> -static void -scanEhFrameSection(EhInputSection<ELFT> &EH, - std::function<void(ResolvedReloc<ELFT>)> Enqueue) { +static void scanEhFrameSection(EhInputSection &EH, + std::function<void(ResolvedReloc)> Enqueue) { if (!EH.NumRelocations) return; // Unfortunately we need to split .eh_frame early since some relocations in // .eh_frame keep other section alive and some don't. - EH.split(); + EH.split<ELFT>(); if (EH.AreRelocsRela) - scanEhFrameSection(EH, EH.relas(), Enqueue); + scanEhFrameSection<ELFT>(EH, EH.template relas<ELFT>(), Enqueue); else - scanEhFrameSection(EH, EH.rels(), Enqueue); + scanEhFrameSection<ELFT>(EH, EH.template rels<ELFT>(), Enqueue); } // We do not garbage-collect two types of sections: // 1) Sections used by the loader (.init, .fini, .ctors, .dtors or .jcr) // 2) Non-allocatable sections which typically contain debugging information -template <class ELFT> static bool isReserved(InputSectionBase<ELFT> *Sec) { +template <class ELFT> static bool isReserved(InputSectionBase *Sec) { switch (Sec->Type) { case SHT_FINI_ARRAY: case SHT_INIT_ARRAY: @@ -170,12 +178,7 @@ template <class ELFT> static bool isReserved(InputSectionBase<ELFT> *Sec) { if (!(Sec->Flags & SHF_ALLOC)) return true; - // We do not want to reclaim sections if they can be referred - // by __start_* and __stop_* symbols. StringRef S = Sec->Name; - if (isValidCIdentifier(S)) - return true; - return S.startswith(".ctors") || S.startswith(".dtors") || S.startswith(".init") || S.startswith(".fini") || S.startswith(".jcr"); @@ -186,14 +189,15 @@ template <class ELFT> static bool isReserved(InputSectionBase<ELFT> *Sec) { // Starting from GC-root sections, this function visits all reachable // sections to set their "Live" bits. template <class ELFT> void elf::markLive() { - SmallVector<InputSection<ELFT> *, 256> Q; + SmallVector<InputSection *, 256> Q; + CNamedSections.clear(); - auto Enqueue = [&](ResolvedReloc<ELFT> R) { + auto Enqueue = [&](ResolvedReloc R) { // Skip over discarded sections. This in theory shouldn't happen, because // the ELF spec doesn't allow a relocation to point to a deduplicated // COMDAT section directly. Unfortunately this happens in practice (e.g. // .eh_frame) so we need to add a check. - if (!R.Sec || R.Sec == &InputSection<ELFT>::Discarded) + if (R.Sec == &InputSection::Discarded) return; // We don't gc non alloc sections. @@ -203,20 +207,20 @@ template <class ELFT> void elf::markLive() { // Usually, a whole section is marked as live or dead, but in mergeable // (splittable) sections, each piece of data has independent liveness bit. // So we explicitly tell it which offset is in use. - if (auto *MS = dyn_cast<MergeInputSection<ELFT>>(R.Sec)) + if (auto *MS = dyn_cast<MergeInputSection>(R.Sec)) MS->markLiveAt(R.Offset); if (R.Sec->Live) return; R.Sec->Live = true; // Add input section to the queue. - if (InputSection<ELFT> *S = dyn_cast<InputSection<ELFT>>(R.Sec)) + if (InputSection *S = dyn_cast<InputSection>(R.Sec)) Q.push_back(S); }; auto MarkSymbol = [&](const SymbolBody *Sym) { - if (auto *D = dyn_cast_or_null<DefinedRegular<ELFT>>(Sym)) - Enqueue({D->Section, D->Value}); + if (auto *D = dyn_cast_or_null<DefinedRegular>(Sym)) + Enqueue({cast<InputSectionBase>(D->Section), D->Value}); }; // Add GC root symbols. @@ -234,14 +238,20 @@ template <class ELFT> void elf::markLive() { // Preserve special sections and those which are specified in linker // script KEEP command. - for (InputSectionBase<ELFT> *Sec : Symtab<ELFT>::X->Sections) { + for (InputSectionBase *Sec : InputSections) { // .eh_frame is always marked as live now, but also it can reference to // sections that contain personality. We preserve all non-text sections // referred by .eh_frame here. - if (auto *EH = dyn_cast_or_null<EhInputSection<ELFT>>(Sec)) + if (auto *EH = dyn_cast_or_null<EhInputSection>(Sec)) scanEhFrameSection<ELFT>(*EH, Enqueue); - if (isReserved(Sec) || Script<ELFT>::X->shouldKeep(Sec)) + if (Sec->Flags & SHF_LINK_ORDER) + continue; + if (isReserved<ELFT>(Sec) || Script->shouldKeep(Sec)) Enqueue({Sec, 0}); + else if (isValidCIdentifier(Sec->Name)) { + CNamedSections[Saver.save("__start_" + Sec->Name)].push_back(Sec); + CNamedSections[Saver.save("__end_" + Sec->Name)].push_back(Sec); + } } // Mark all reachable sections. diff --git a/ELF/Options.td b/ELF/Options.td index 77ed4c7e466f..7ed8dfb090bd 100644 --- a/ELF/Options.td +++ b/ELF/Options.td @@ -48,6 +48,8 @@ def color_diagnostics_eq: J<"color-diagnostics=">, def define_common: F<"define-common">, HelpText<"Assign space to common symbols">; +def demangle: F<"demangle">, HelpText<"Demangle symbol names">; + def disable_new_dtags: F<"disable-new-dtags">, HelpText<"Disable new dynamic tags">; @@ -68,6 +70,8 @@ def dynamic_list: S<"dynamic-list">, def eh_frame_hdr: F<"eh-frame-hdr">, HelpText<"Request creation of .eh_frame_hdr section and PT_GNU_EH_FRAME segment header">; +def emit_relocs: F<"emit-relocs">, HelpText<"Generate relocations in output">; + def enable_new_dtags: F<"enable-new-dtags">, HelpText<"Enable new dynamic tags">; @@ -80,6 +84,9 @@ def entry: S<"entry">, MetaVarName<"<entry>">, def error_limit: S<"error-limit">, HelpText<"Maximum number of errors to emit before stopping (0 = no limit)">; +def error_unresolved_symbols: F<"error-unresolved-symbols">, + HelpText<"Report unresolved symbols as errors">; + def export_dynamic: F<"export-dynamic">, HelpText<"Put symbols in the dynamic symbol table">; @@ -124,6 +131,8 @@ def lto_O: J<"lto-O">, MetaVarName<"<opt-level>">, def m: JoinedOrSeparate<["-"], "m">, HelpText<"Set target emulation">; +def Map: JS<"Map">, HelpText<"Print a link map to the specified file">; + def nostdlib: F<"nostdlib">, HelpText<"Only search directories specified on the command line">; @@ -139,6 +148,12 @@ def no_define_common: F<"no-define-common">, def no_demangle: F<"no-demangle">, HelpText<"Do not demangle symbol names">; +def no_dynamic_linker: F<"no-dynamic-linker">, + HelpText<"Inhibit output of .interp section">; + +def no_export_dynamic: F<"no-export-dynamic">; +def no_fatal_warnings: F<"no-fatal-warnings">; + def no_gc_sections: F<"no-gc-sections">, HelpText<"Disable garbage collection of unused sections">; @@ -170,7 +185,7 @@ def o: JoinedOrSeparate<["-"], "o">, MetaVarName<"<path>">, def oformat: Separate<["--"], "oformat">, MetaVarName<"<format>">, HelpText<"Specify the binary format for the output object file">; -def omagic: F<"omagic">, MetaVarName<"<magic>">, +def omagic: Flag<["--"], "omagic">, MetaVarName<"<magic>">, HelpText<"Set the text and data sections to be readable and writable">; def pie: F<"pie">, HelpText<"Create a position independent executable">; @@ -178,6 +193,9 @@ def pie: F<"pie">, HelpText<"Create a position independent executable">; def print_gc_sections: F<"print-gc-sections">, HelpText<"List removed unused sections">; +def print_map: F<"print-map">, + HelpText<"Print a link map to the standard output">; + def reproduce: S<"reproduce">, HelpText<"Dump linker invocation and input files for debugging">; @@ -221,7 +239,7 @@ def threads: F<"threads">, HelpText<"Run the linker multi-threaded">; def trace: F<"trace">, HelpText<"Print the names of the input files">; -def trace_symbol : J<"trace-symbol=">, HelpText<"Trace references to symbols">; +def trace_symbol : S<"trace-symbol">, HelpText<"Trace references to symbols">; def undefined: S<"undefined">, HelpText<"Force undefined symbol during linking">; @@ -244,6 +262,9 @@ def version_script: S<"version-script">, def warn_common: F<"warn-common">, HelpText<"Warn about duplicate common symbols">; +def warn_unresolved_symbols: F<"warn-unresolved-symbols">, + HelpText<"Report unresolved symbols as warnings">; + def whole_archive: F<"whole-archive">, HelpText<"Force load of all members in a static library">; @@ -267,6 +288,7 @@ def alias_define_common_dp: F<"dp">, Alias<define_common>; def alias_discard_all_x: Flag<["-"], "x">, Alias<discard_all>; def alias_discard_locals_X: Flag<["-"], "X">, Alias<discard_locals>; def alias_dynamic_list: J<"dynamic-list=">, Alias<dynamic_list>; +def alias_emit_relocs: Flag<["-"], "q">, Alias<emit_relocs>; def alias_entry_e: JoinedOrSeparate<["-"], "e">, Alias<entry>; def alias_entry_entry: J<"entry=">, Alias<entry>; def alias_error_limit: J<"error-limit=">, Alias<error_limit>; @@ -278,10 +300,12 @@ def alias_format_b: S<"b">, Alias<format>; def alias_hash_style_hash_style: J<"hash-style=">, Alias<hash_style>; def alias_init_init: J<"init=">, Alias<init>; def alias_l__library: J<"library=">, Alias<l>; +def alias_Map_eq: J<"Map=">, Alias<Map>; def alias_omagic: Flag<["-"], "N">, Alias<omagic>; def alias_o_output: Joined<["--"], "output=">, Alias<o>; def alias_o_output2 : Separate<["--"], "output">, Alias<o>; def alias_pie_pic_executable: F<"pic-executable">, Alias<pie>; +def alias_print_map_M: Flag<["-"], "M">, Alias<print_map>; def alias_relocatable_r: Flag<["-"], "r">, Alias<relocatable>; def alias_retain_symbols_file: S<"retain-symbols-file">, Alias<retain_symbols_file>; def alias_rpath_R: JoinedOrSeparate<["-"], "R">, Alias<rpath>; @@ -297,6 +321,7 @@ def alias_strip_debug_S: Flag<["-"], "S">, Alias<strip_debug>; def alias_Tbss: J<"Tbss=">, Alias<Tbss>; def alias_Tdata: J<"Tdata=">, Alias<Tdata>; def alias_trace: Flag<["-"], "t">, Alias<trace>; +def trace_trace_symbol_eq : J<"trace-symbol=">, Alias<trace_symbol>; def alias_trace_symbol_y : JoinedOrSeparate<["-"], "y">, Alias<trace_symbol>; def alias_Ttext: J<"Ttext=">, Alias<Ttext>; def alias_Ttext_segment: S<"Ttext-segment">, Alias<Ttext>; @@ -329,17 +354,12 @@ def plugin_opt_eq: J<"plugin-opt=">; // Options listed below are silently ignored for now for compatibility. def allow_shlib_undefined: F<"allow-shlib-undefined">; def cref: Flag<["--"], "cref">; -def demangle: F<"demangle">; def detect_odr_violations: F<"detect-odr-violations">; def g: Flag<["-"], "g">; -def M: Flag<["-"], "M">; -def Map: JS<"Map">; def no_add_needed: F<"no-add-needed">; def no_allow_shlib_undefined: F<"no-allow-shlib-undefined">; def no_copy_dt_needed_entries: F<"no-copy-dt-needed-entries">, Alias<no_add_needed>; -def no_dynamic_linker: F<"no-dynamic-linker">; -def no_fatal_warnings: F<"no-fatal-warnings">; def no_mmap_output_file: F<"no-mmap-output-file">; def no_warn_common: F<"no-warn-common">; def no_warn_mismatch: F<"no-warn-mismatch">; @@ -355,7 +375,6 @@ def G: JoinedOrSeparate<["-"], "G">; def Qy : F<"Qy">; // Aliases for ignored options -def alias_Map_eq: J<"Map=">, Alias<Map>; def alias_version_script_version_script: J<"version-script=">, Alias<version_script>; @@ -368,5 +387,13 @@ def lto_partitions: J<"lto-partitions=">, HelpText<"Number of LTO codegen partitions">; def disable_verify: F<"disable-verify">; def mllvm: S<"mllvm">; +def opt_remarks_filename: Separate<["--"], "opt-remarks-filename">, + HelpText<"YAML output file for optimization remarks">; +def opt_remarks_with_hotness: Flag<["--"], "opt-remarks-with-hotness">, + HelpText<"Include hotness informations in the optimization remarks file">; def save_temps: F<"save-temps">; +def thinlto_cache_dir: J<"thinlto-cache-dir=">, + HelpText<"Path to ThinLTO cached object file directory">; +def thinlto_cache_policy: S<"thinlto-cache-policy">, + HelpText<"Pruning policy for the ThinLTO cache">; def thinlto_jobs: J<"thinlto-jobs=">, HelpText<"Number of ThinLTO jobs">; diff --git a/ELF/OutputSections.cpp b/ELF/OutputSections.cpp index 7c708ce4ed67..93f83100a745 100644 --- a/ELF/OutputSections.cpp +++ b/ELF/OutputSections.cpp @@ -9,7 +9,6 @@ #include "OutputSections.h" #include "Config.h" -#include "EhFrame.h" #include "LinkerScript.h" #include "Memory.h" #include "Strings.h" @@ -31,15 +30,18 @@ using namespace llvm::ELF; using namespace lld; using namespace lld::elf; -OutputSectionBase::OutputSectionBase(StringRef Name, uint32_t Type, - uint64_t Flags) - : Name(Name) { - this->Type = Type; - this->Flags = Flags; - this->Addralign = 1; -} - -uint32_t OutputSectionBase::getPhdrFlags() const { +uint8_t Out::First; +OutputSection *Out::Opd; +uint8_t *Out::OpdBuf; +PhdrEntry *Out::TlsPhdr; +OutputSection *Out::DebugInfo; +OutputSection *Out::ElfHeader; +OutputSection *Out::ProgramHeaders; +OutputSection *Out::PreinitArray; +OutputSection *Out::InitArray; +OutputSection *Out::FiniArray; + +uint32_t OutputSection::getPhdrFlags() const { uint32_t Ret = PF_R; if (Flags & SHF_WRITE) Ret |= PF_W; @@ -49,9 +51,9 @@ uint32_t OutputSectionBase::getPhdrFlags() const { } template <class ELFT> -void OutputSectionBase::writeHeaderTo(typename ELFT::Shdr *Shdr) { +void OutputSection::writeHeaderTo(typename ELFT::Shdr *Shdr) { Shdr->sh_entsize = Entsize; - Shdr->sh_addralign = Addralign; + Shdr->sh_addralign = Alignment; Shdr->sh_type = Type; Shdr->sh_offset = Offset; Shdr->sh_flags = Flags; @@ -62,49 +64,28 @@ void OutputSectionBase::writeHeaderTo(typename ELFT::Shdr *Shdr) { Shdr->sh_name = ShName; } -template <class ELFT> static uint64_t getEntsize(uint32_t Type) { - switch (Type) { - case SHT_RELA: - return sizeof(typename ELFT::Rela); - case SHT_REL: - return sizeof(typename ELFT::Rel); - case SHT_MIPS_REGINFO: - return sizeof(Elf_Mips_RegInfo<ELFT>); - case SHT_MIPS_OPTIONS: - return sizeof(Elf_Mips_Options<ELFT>) + sizeof(Elf_Mips_RegInfo<ELFT>); - case SHT_MIPS_ABIFLAGS: - return sizeof(Elf_Mips_ABIFlags<ELFT>); - default: - return 0; - } -} - -template <class ELFT> -OutputSection<ELFT>::OutputSection(StringRef Name, uint32_t Type, uintX_t Flags) - : OutputSectionBase(Name, Type, Flags) { - this->Entsize = getEntsize<ELFT>(Type); -} +OutputSection::OutputSection(StringRef Name, uint32_t Type, uint64_t Flags) + : SectionBase(Output, Name, Flags, /*Entsize*/ 0, /*Alignment*/ 1, Type, + /*Info*/ 0, + /*Link*/ 0) {} -template <typename ELFT> -static bool compareByFilePosition(InputSection<ELFT> *A, - InputSection<ELFT> *B) { +static bool compareByFilePosition(InputSection *A, InputSection *B) { // Synthetic doesn't have link order dependecy, stable_sort will keep it last - if (A->kind() == InputSectionData::Synthetic || - B->kind() == InputSectionData::Synthetic) + if (A->kind() == InputSectionBase::Synthetic || + B->kind() == InputSectionBase::Synthetic) return false; - auto *LA = cast<InputSection<ELFT>>(A->getLinkOrderDep()); - auto *LB = cast<InputSection<ELFT>>(B->getLinkOrderDep()); - OutputSectionBase *AOut = LA->OutSec; - OutputSectionBase *BOut = LB->OutSec; + auto *LA = cast<InputSection>(A->getLinkOrderDep()); + auto *LB = cast<InputSection>(B->getLinkOrderDep()); + OutputSection *AOut = LA->OutSec; + OutputSection *BOut = LB->OutSec; if (AOut != BOut) return AOut->SectionIndex < BOut->SectionIndex; return LA->OutSecOff < LB->OutSecOff; } -template <class ELFT> void OutputSection<ELFT>::finalize() { +template <class ELFT> void OutputSection::finalize() { if ((this->Flags & SHF_LINK_ORDER) && !this->Sections.empty()) { - std::sort(Sections.begin(), Sections.end(), compareByFilePosition<ELFT>); - Size = 0; + std::sort(Sections.begin(), Sections.end(), compareByFilePosition); assignOffsets(); // We must preserve the link order dependency of sections with the @@ -116,34 +97,41 @@ template <class ELFT> void OutputSection<ELFT>::finalize() { } uint32_t Type = this->Type; - if (!Config->Relocatable || (Type != SHT_RELA && Type != SHT_REL)) + if (!Config->CopyRelocs || (Type != SHT_RELA && Type != SHT_REL)) + return; + + InputSection *First = Sections[0]; + if (isa<SyntheticSection>(First)) return; this->Link = In<ELFT>::SymTab->OutSec->SectionIndex; // sh_info for SHT_REL[A] sections should contain the section header index of // the section to which the relocation applies. - InputSectionBase<ELFT> *S = Sections[0]->getRelocatedSection(); + InputSectionBase *S = First->getRelocatedSection(); this->Info = S->OutSec->SectionIndex; } -template <class ELFT> -void OutputSection<ELFT>::addSection(InputSectionData *C) { - assert(C->Live); - auto *S = cast<InputSection<ELFT>>(C); +void OutputSection::addSection(InputSection *S) { + assert(S->Live); Sections.push_back(S); S->OutSec = this; this->updateAlignment(S->Alignment); - // Keep sh_entsize value of the input section to be able to perform merging - // later during a final linking using the generated relocatable object. - if (Config->Relocatable && (S->Flags & SHF_MERGE)) - this->Entsize = S->Entsize; + + // If this section contains a table of fixed-size entries, sh_entsize + // holds the element size. Consequently, if this contains two or more + // input sections, all of them must have the same sh_entsize. However, + // you can put different types of input sections into one output + // sectin by using linker scripts. I don't know what to do here. + // Probably we sholuld handle that as an error. But for now we just + // pick the largest sh_entsize. + this->Entsize = std::max(this->Entsize, S->Entsize); } // This function is called after we sort input sections // and scan relocations to setup sections' offsets. -template <class ELFT> void OutputSection<ELFT>::assignOffsets() { - uintX_t Off = this->Size; - for (InputSection<ELFT> *S : Sections) { +void OutputSection::assignOffsets() { + uint64_t Off = 0; + for (InputSection *S : Sections) { Off = alignTo(Off, S->Alignment); S->OutSecOff = Off; Off += S->getSize(); @@ -151,14 +139,12 @@ template <class ELFT> void OutputSection<ELFT>::assignOffsets() { this->Size = Off; } -template <class ELFT> -void OutputSection<ELFT>::sort( - std::function<int(InputSection<ELFT> *S)> Order) { - typedef std::pair<unsigned, InputSection<ELFT> *> Pair; +void OutputSection::sort(std::function<int(InputSectionBase *S)> Order) { + typedef std::pair<unsigned, InputSection *> Pair; auto Comp = [](const Pair &A, const Pair &B) { return A.first < B.first; }; std::vector<Pair> V; - for (InputSection<ELFT> *S : Sections) + for (InputSection *S : Sections) V.push_back({Order(S), S}); std::stable_sort(V.begin(), V.end(), Comp); Sections.clear(); @@ -172,9 +158,9 @@ void OutputSection<ELFT>::sort( // because the compiler keeps the original initialization order in a // translation unit and we need to respect that. // For more detail, read the section of the GCC's manual about init_priority. -template <class ELFT> void OutputSection<ELFT>::sortInitFini() { +void OutputSection::sortInitFini() { // Sort sections by priority. - sort([](InputSection<ELFT> *S) { return getPriority(S->Name); }); + sort([](InputSectionBase *S) { return getPriority(S->Name); }); } // Returns true if S matches /Filename.?\.o$/. @@ -208,15 +194,13 @@ static bool isCrtend(StringRef S) { return isCrtBeginEnd(S, "crtend"); } // .ctors are duplicate features (and .init_array is newer.) However, there // are too many real-world use cases of .ctors, so we had no choice to // support that with this rather ad-hoc semantics. -template <class ELFT> -static bool compCtors(const InputSection<ELFT> *A, - const InputSection<ELFT> *B) { - bool BeginA = isCrtbegin(A->getFile()->getName()); - bool BeginB = isCrtbegin(B->getFile()->getName()); +static bool compCtors(const InputSection *A, const InputSection *B) { + bool BeginA = isCrtbegin(A->File->getName()); + bool BeginB = isCrtbegin(B->File->getName()); if (BeginA != BeginB) return BeginA; - bool EndA = isCrtend(A->getFile()->getName()); - bool EndB = isCrtend(B->getFile()->getName()); + bool EndA = isCrtend(A->File->getName()); + bool EndB = isCrtend(B->File->getName()); if (EndA != EndB) return EndB; StringRef X = A->Name; @@ -233,319 +217,65 @@ static bool compCtors(const InputSection<ELFT> *A, // Sorts input sections by the special rules for .ctors and .dtors. // Unfortunately, the rules are different from the one for .{init,fini}_array. // Read the comment above. -template <class ELFT> void OutputSection<ELFT>::sortCtorsDtors() { - std::stable_sort(Sections.begin(), Sections.end(), compCtors<ELFT>); +void OutputSection::sortCtorsDtors() { + std::stable_sort(Sections.begin(), Sections.end(), compCtors); } -// Fill [Buf, Buf + Size) with Filler. Filler is written in big -// endian order. This is used for linker script "=fillexp" command. -void fill(uint8_t *Buf, size_t Size, uint32_t Filler) { - uint8_t V[4]; - write32be(V, Filler); +// Fill [Buf, Buf + Size) with Filler. +// This is used for linker script "=fillexp" command. +static void fill(uint8_t *Buf, size_t Size, uint32_t Filler) { size_t I = 0; for (; I + 4 < Size; I += 4) - memcpy(Buf + I, V, 4); - memcpy(Buf + I, V, Size - I); -} - -template <class ELFT> void OutputSection<ELFT>::writeTo(uint8_t *Buf) { - Loc = Buf; - if (uint32_t Filler = Script<ELFT>::X->getFiller(this->Name)) - fill(Buf, this->Size, Filler); - - auto Fn = [=](InputSection<ELFT> *IS) { IS->writeTo(Buf); }; - forEach(Sections.begin(), Sections.end(), Fn); - - // Linker scripts may have BYTE()-family commands with which you - // can write arbitrary bytes to the output. Process them if any. - Script<ELFT>::X->writeDataBytes(this->Name, Buf); -} - -template <class ELFT> -EhOutputSection<ELFT>::EhOutputSection() - : OutputSectionBase(".eh_frame", SHT_PROGBITS, SHF_ALLOC) {} - -// Search for an existing CIE record or create a new one. -// CIE records from input object files are uniquified by their contents -// and where their relocations point to. -template <class ELFT> -template <class RelTy> -CieRecord *EhOutputSection<ELFT>::addCie(EhSectionPiece &Piece, - ArrayRef<RelTy> Rels) { - auto *Sec = cast<EhInputSection<ELFT>>(Piece.ID); - const endianness E = ELFT::TargetEndianness; - if (read32<E>(Piece.data().data() + 4) != 0) - fatal(toString(Sec) + ": CIE expected at beginning of .eh_frame"); - - SymbolBody *Personality = nullptr; - unsigned FirstRelI = Piece.FirstRelocation; - if (FirstRelI != (unsigned)-1) - Personality = &Sec->getFile()->getRelocTargetSym(Rels[FirstRelI]); - - // Search for an existing CIE by CIE contents/relocation target pair. - CieRecord *Cie = &CieMap[{Piece.data(), Personality}]; - - // If not found, create a new one. - if (Cie->Piece == nullptr) { - Cie->Piece = &Piece; - Cies.push_back(Cie); - } - return Cie; -} - -// There is one FDE per function. Returns true if a given FDE -// points to a live function. -template <class ELFT> -template <class RelTy> -bool EhOutputSection<ELFT>::isFdeLive(EhSectionPiece &Piece, - ArrayRef<RelTy> Rels) { - auto *Sec = cast<EhInputSection<ELFT>>(Piece.ID); - unsigned FirstRelI = Piece.FirstRelocation; - if (FirstRelI == (unsigned)-1) - fatal(toString(Sec) + ": FDE doesn't reference another section"); - const RelTy &Rel = Rels[FirstRelI]; - SymbolBody &B = Sec->getFile()->getRelocTargetSym(Rel); - auto *D = dyn_cast<DefinedRegular<ELFT>>(&B); - if (!D || !D->Section) - return false; - InputSectionBase<ELFT> *Target = D->Section->Repl; - return Target && Target->Live; -} - -// .eh_frame is a sequence of CIE or FDE records. In general, there -// is one CIE record per input object file which is followed by -// a list of FDEs. This function searches an existing CIE or create a new -// one and associates FDEs to the CIE. -template <class ELFT> -template <class RelTy> -void EhOutputSection<ELFT>::addSectionAux(EhInputSection<ELFT> *Sec, - ArrayRef<RelTy> Rels) { - const endianness E = ELFT::TargetEndianness; - - DenseMap<size_t, CieRecord *> OffsetToCie; - for (EhSectionPiece &Piece : Sec->Pieces) { - // The empty record is the end marker. - if (Piece.size() == 4) - return; - - size_t Offset = Piece.InputOff; - uint32_t ID = read32<E>(Piece.data().data() + 4); - if (ID == 0) { - OffsetToCie[Offset] = addCie(Piece, Rels); - continue; - } - - uint32_t CieOffset = Offset + 4 - ID; - CieRecord *Cie = OffsetToCie[CieOffset]; - if (!Cie) - fatal(toString(Sec) + ": invalid CIE reference"); - - if (!isFdeLive(Piece, Rels)) - continue; - Cie->FdePieces.push_back(&Piece); - NumFdes++; - } -} - -template <class ELFT> -void EhOutputSection<ELFT>::addSection(InputSectionData *C) { - auto *Sec = cast<EhInputSection<ELFT>>(C); - Sec->OutSec = this; - this->updateAlignment(Sec->Alignment); - Sections.push_back(Sec); - - // .eh_frame is a sequence of CIE or FDE records. This function - // splits it into pieces so that we can call - // SplitInputSection::getSectionPiece on the section. - Sec->split(); - if (Sec->Pieces.empty()) - return; - - if (Sec->NumRelocations) { - if (Sec->AreRelocsRela) - addSectionAux(Sec, Sec->relas()); - else - addSectionAux(Sec, Sec->rels()); - return; - } - addSectionAux(Sec, makeArrayRef<Elf_Rela>(nullptr, nullptr)); -} - -template <class ELFT> -static void writeCieFde(uint8_t *Buf, ArrayRef<uint8_t> D) { - memcpy(Buf, D.data(), D.size()); - - // Fix the size field. -4 since size does not include the size field itself. - const endianness E = ELFT::TargetEndianness; - write32<E>(Buf, alignTo(D.size(), sizeof(typename ELFT::uint)) - 4); -} - -template <class ELFT> void EhOutputSection<ELFT>::finalize() { - if (this->Size) - return; // Already finalized. - - size_t Off = 0; - for (CieRecord *Cie : Cies) { - Cie->Piece->OutputOff = Off; - Off += alignTo(Cie->Piece->size(), sizeof(uintX_t)); - - for (EhSectionPiece *Fde : Cie->FdePieces) { - Fde->OutputOff = Off; - Off += alignTo(Fde->size(), sizeof(uintX_t)); - } - } - this->Size = Off; -} - -template <class ELFT> static uint64_t readFdeAddr(uint8_t *Buf, int Size) { - const endianness E = ELFT::TargetEndianness; - switch (Size) { - case DW_EH_PE_udata2: - return read16<E>(Buf); - case DW_EH_PE_udata4: - return read32<E>(Buf); - case DW_EH_PE_udata8: - return read64<E>(Buf); - case DW_EH_PE_absptr: - if (ELFT::Is64Bits) - return read64<E>(Buf); - return read32<E>(Buf); - } - fatal("unknown FDE size encoding"); + memcpy(Buf + I, &Filler, 4); + memcpy(Buf + I, &Filler, Size - I); } -// Returns the VA to which a given FDE (on a mmap'ed buffer) is applied to. -// We need it to create .eh_frame_hdr section. -template <class ELFT> -typename ELFT::uint EhOutputSection<ELFT>::getFdePc(uint8_t *Buf, size_t FdeOff, - uint8_t Enc) { - // The starting address to which this FDE applies is - // stored at FDE + 8 byte. - size_t Off = FdeOff + 8; - uint64_t Addr = readFdeAddr<ELFT>(Buf + Off, Enc & 0x7); - if ((Enc & 0x70) == DW_EH_PE_absptr) - return Addr; - if ((Enc & 0x70) == DW_EH_PE_pcrel) - return Addr + this->Addr + Off; - fatal("unknown FDE size relative encoding"); +uint32_t OutputSection::getFiller() { + // Determine what to fill gaps between InputSections with, as specified by the + // linker script. If nothing is specified and this is an executable section, + // fall back to trap instructions to prevent bad diassembly and detect invalid + // jumps to padding. + if (Optional<uint32_t> Filler = Script->getFiller(Name)) + return *Filler; + if (Flags & SHF_EXECINSTR) + return Target->TrapInstr; + return 0; } -template <class ELFT> void EhOutputSection<ELFT>::writeTo(uint8_t *Buf) { - const endianness E = ELFT::TargetEndianness; - for (CieRecord *Cie : Cies) { - size_t CieOffset = Cie->Piece->OutputOff; - writeCieFde<ELFT>(Buf + CieOffset, Cie->Piece->data()); - - for (EhSectionPiece *Fde : Cie->FdePieces) { - size_t Off = Fde->OutputOff; - writeCieFde<ELFT>(Buf + Off, Fde->data()); - - // FDE's second word should have the offset to an associated CIE. - // Write it. - write32<E>(Buf + Off + 4, Off + 4 - CieOffset); - } - } +template <class ELFT> void OutputSection::writeTo(uint8_t *Buf) { + Loc = Buf; - for (EhInputSection<ELFT> *S : Sections) - S->relocate(Buf, nullptr); - - // Construct .eh_frame_hdr. .eh_frame_hdr is a binary search table - // to get a FDE from an address to which FDE is applied. So here - // we obtain two addresses and pass them to EhFrameHdr object. - if (In<ELFT>::EhFrameHdr) { - for (CieRecord *Cie : Cies) { - uint8_t Enc = getFdeEncoding<ELFT>(Cie->Piece); - for (SectionPiece *Fde : Cie->FdePieces) { - uintX_t Pc = getFdePc(Buf, Fde->OutputOff, Enc); - uintX_t FdeVA = this->Addr + Fde->OutputOff; - In<ELFT>::EhFrameHdr->addFde(Pc, FdeVA); - } + // Write leading padding. + uint32_t Filler = getFiller(); + if (Filler) + fill(Buf, Sections.empty() ? Size : Sections[0]->OutSecOff, Filler); + + parallelFor(0, Sections.size(), [=](size_t I) { + InputSection *Sec = Sections[I]; + Sec->writeTo<ELFT>(Buf); + + // Fill gaps between sections. + if (Filler) { + uint8_t *Start = Buf + Sec->OutSecOff + Sec->getSize(); + uint8_t *End; + if (I + 1 == Sections.size()) + End = Buf + Size; + else + End = Buf + Sections[I + 1]->OutSecOff; + fill(Start, End - Start, Filler); } - } -} - -template <class ELFT> -MergeOutputSection<ELFT>::MergeOutputSection(StringRef Name, uint32_t Type, - uintX_t Flags, uintX_t Alignment) - : OutputSectionBase(Name, Type, Flags), - Builder(StringTableBuilder::RAW, Alignment) {} - -template <class ELFT> void MergeOutputSection<ELFT>::writeTo(uint8_t *Buf) { - Builder.write(Buf); -} - -template <class ELFT> -void MergeOutputSection<ELFT>::addSection(InputSectionData *C) { - auto *Sec = cast<MergeInputSection<ELFT>>(C); - Sec->OutSec = this; - this->updateAlignment(Sec->Alignment); - this->Entsize = Sec->Entsize; - Sections.push_back(Sec); -} - -template <class ELFT> bool MergeOutputSection<ELFT>::shouldTailMerge() const { - return (this->Flags & SHF_STRINGS) && Config->Optimize >= 2; -} - -template <class ELFT> void MergeOutputSection<ELFT>::finalizeTailMerge() { - // Add all string pieces to the string table builder to create section - // contents. - for (MergeInputSection<ELFT> *Sec : Sections) - for (size_t I = 0, E = Sec->Pieces.size(); I != E; ++I) - if (Sec->Pieces[I].Live) - Builder.add(Sec->getData(I)); - - // Fix the string table content. After this, the contents will never change. - Builder.finalize(); - this->Size = Builder.getSize(); - - // finalize() fixed tail-optimized strings, so we can now get - // offsets of strings. Get an offset for each string and save it - // to a corresponding StringPiece for easy access. - for (MergeInputSection<ELFT> *Sec : Sections) - for (size_t I = 0, E = Sec->Pieces.size(); I != E; ++I) - if (Sec->Pieces[I].Live) - Sec->Pieces[I].OutputOff = Builder.getOffset(Sec->getData(I)); -} + }); -template <class ELFT> void MergeOutputSection<ELFT>::finalizeNoTailMerge() { - // Add all string pieces to the string table builder to create section - // contents. Because we are not tail-optimizing, offsets of strings are - // fixed when they are added to the builder (string table builder contains - // a hash table from strings to offsets). - for (MergeInputSection<ELFT> *Sec : Sections) - for (size_t I = 0, E = Sec->Pieces.size(); I != E; ++I) - if (Sec->Pieces[I].Live) - Sec->Pieces[I].OutputOff = Builder.add(Sec->getData(I)); - - Builder.finalizeInOrder(); - this->Size = Builder.getSize(); -} - -template <class ELFT> void MergeOutputSection<ELFT>::finalize() { - if (shouldTailMerge()) - finalizeTailMerge(); - else - finalizeNoTailMerge(); + // Linker scripts may have BYTE()-family commands with which you + // can write arbitrary bytes to the output. Process them if any. + Script->writeDataBytes(Name, Buf); } -template <class ELFT> -static typename ELFT::uint getOutFlags(InputSectionBase<ELFT> *S) { +static uint64_t getOutFlags(InputSectionBase *S) { return S->Flags & ~SHF_GROUP & ~SHF_COMPRESSED; } -namespace llvm { -template <> struct DenseMapInfo<lld::elf::SectionKey> { - static lld::elf::SectionKey getEmptyKey(); - static lld::elf::SectionKey getTombstoneKey(); - static unsigned getHashValue(const lld::elf::SectionKey &Val); - static bool isEqual(const lld::elf::SectionKey &LHS, - const lld::elf::SectionKey &RHS); -}; -} - -template <class ELFT> -static SectionKey createKey(InputSectionBase<ELFT> *C, StringRef OutsecName) { +static SectionKey createKey(InputSectionBase *C, StringRef OutsecName) { // The ELF spec just says // ---------------------------------------------------------------- // In the first phase, input sections that match in name, type and @@ -588,81 +318,76 @@ static SectionKey createKey(InputSectionBase<ELFT> *C, StringRef OutsecName) { // // Given the above issues, we instead merge sections by name and error on // incompatible types and flags. - // - // The exception being SHF_MERGE, where we create different output sections - // for each alignment. This makes each output section simple. In case of - // relocatable object generation we do not try to perform merging and treat - // SHF_MERGE sections as regular ones, but also create different output - // sections for them to allow merging at final linking stage. - // - // Fortunately, creating symbols in the middle of a merge section is not - // supported by bfd or gold, so the SHF_MERGE exception should not cause - // problems with most linker scripts. - - typedef typename ELFT::uint uintX_t; - uintX_t Flags = C->Flags & (SHF_MERGE | SHF_STRINGS); - uintX_t Alignment = 0; - if (isa<MergeInputSection<ELFT>>(C) || - (Config->Relocatable && (C->Flags & SHF_MERGE))) - Alignment = std::max<uintX_t>(C->Alignment, C->Entsize); + uint32_t Alignment = 0; + uint64_t Flags = 0; + if (Config->Relocatable && (C->Flags & SHF_MERGE)) { + Alignment = std::max<uint64_t>(C->Alignment, C->Entsize); + Flags = C->Flags & (SHF_MERGE | SHF_STRINGS); + } return SectionKey{OutsecName, Flags, Alignment}; } -template <class ELFT> OutputSectionFactory<ELFT>::OutputSectionFactory() {} +OutputSectionFactory::OutputSectionFactory( + std::vector<OutputSection *> &OutputSections) + : OutputSections(OutputSections) {} -template <class ELFT> OutputSectionFactory<ELFT>::~OutputSectionFactory() {} +static uint64_t getIncompatibleFlags(uint64_t Flags) { + return Flags & (SHF_ALLOC | SHF_TLS); +} -template <class ELFT> -std::pair<OutputSectionBase *, bool> -OutputSectionFactory<ELFT>::create(InputSectionBase<ELFT> *C, - StringRef OutsecName) { - SectionKey Key = createKey(C, OutsecName); - return create(Key, C); +// We allow sections of types listed below to merged into a +// single progbits section. This is typically done by linker +// scripts. Merging nobits and progbits will force disk space +// to be allocated for nobits sections. Other ones don't require +// any special treatment on top of progbits, so there doesn't +// seem to be a harm in merging them. +static bool canMergeToProgbits(unsigned Type) { + return Type == SHT_NOBITS || Type == SHT_PROGBITS || Type == SHT_INIT_ARRAY || + Type == SHT_PREINIT_ARRAY || Type == SHT_FINI_ARRAY || + Type == SHT_NOTE; } -static uint64_t getIncompatibleFlags(uint64_t Flags) { - return Flags & (SHF_ALLOC | SHF_TLS); +static void reportDiscarded(InputSectionBase *IS) { + if (!Config->PrintGcSections) + return; + message("removing unused section from '" + IS->Name + "' in file '" + + IS->File->getName()); } -template <class ELFT> -std::pair<OutputSectionBase *, bool> -OutputSectionFactory<ELFT>::create(const SectionKey &Key, - InputSectionBase<ELFT> *C) { - uintX_t Flags = getOutFlags(C); - OutputSectionBase *&Sec = Map[Key]; +void OutputSectionFactory::addInputSec(InputSectionBase *IS, + StringRef OutsecName) { + if (!IS->Live) { + reportDiscarded(IS); + return; + } + + SectionKey Key = createKey(IS, OutsecName); + uint64_t Flags = getOutFlags(IS); + OutputSection *&Sec = Map[Key]; if (Sec) { - if (getIncompatibleFlags(Sec->Flags) != getIncompatibleFlags(C->Flags)) + if (getIncompatibleFlags(Sec->Flags) != getIncompatibleFlags(IS->Flags)) error("Section has flags incompatible with others with the same name " + - toString(C)); - // Convert notbits to progbits if they are mixed. This happens is some - // linker scripts. - if (Sec->Type == SHT_NOBITS && C->Type == SHT_PROGBITS) - Sec->Type = SHT_PROGBITS; - if (Sec->Type != C->Type && - !(Sec->Type == SHT_PROGBITS && C->Type == SHT_NOBITS)) - error("Section has different type from others with the same name " + - toString(C)); + toString(IS)); + if (Sec->Type != IS->Type) { + if (canMergeToProgbits(Sec->Type) && canMergeToProgbits(IS->Type)) + Sec->Type = SHT_PROGBITS; + else + error("Section has different type from others with the same name " + + toString(IS)); + } Sec->Flags |= Flags; - return {Sec, false}; + } else { + Sec = make<OutputSection>(Key.Name, IS->Type, Flags); + OutputSections.push_back(Sec); } - uint32_t Type = C->Type; - switch (C->kind()) { - case InputSectionBase<ELFT>::Regular: - case InputSectionBase<ELFT>::Synthetic: - Sec = make<OutputSection<ELFT>>(Key.Name, Type, Flags); - break; - case InputSectionBase<ELFT>::EHFrame: - return {Out<ELFT>::EhFrame, false}; - case InputSectionBase<ELFT>::Merge: - Sec = make<MergeOutputSection<ELFT>>(Key.Name, Type, Flags, Key.Alignment); - break; - } - return {Sec, true}; + Sec->addSection(cast<InputSection>(IS)); } +OutputSectionFactory::~OutputSectionFactory() {} + SectionKey DenseMapInfo<SectionKey>::getEmptyKey() { return SectionKey{DenseMapInfo<StringRef>::getEmptyKey(), 0, 0}; } @@ -681,32 +406,23 @@ bool DenseMapInfo<SectionKey>::isEqual(const SectionKey &LHS, LHS.Flags == RHS.Flags && LHS.Alignment == RHS.Alignment; } -namespace lld { -namespace elf { - -template void OutputSectionBase::writeHeaderTo<ELF32LE>(ELF32LE::Shdr *Shdr); -template void OutputSectionBase::writeHeaderTo<ELF32BE>(ELF32BE::Shdr *Shdr); -template void OutputSectionBase::writeHeaderTo<ELF64LE>(ELF64LE::Shdr *Shdr); -template void OutputSectionBase::writeHeaderTo<ELF64BE>(ELF64BE::Shdr *Shdr); - -template class OutputSection<ELF32LE>; -template class OutputSection<ELF32BE>; -template class OutputSection<ELF64LE>; -template class OutputSection<ELF64BE>; - -template class EhOutputSection<ELF32LE>; -template class EhOutputSection<ELF32BE>; -template class EhOutputSection<ELF64LE>; -template class EhOutputSection<ELF64BE>; - -template class MergeOutputSection<ELF32LE>; -template class MergeOutputSection<ELF32BE>; -template class MergeOutputSection<ELF64LE>; -template class MergeOutputSection<ELF64BE>; - -template class OutputSectionFactory<ELF32LE>; -template class OutputSectionFactory<ELF32BE>; -template class OutputSectionFactory<ELF64LE>; -template class OutputSectionFactory<ELF64BE>; -} +uint64_t elf::getHeaderSize() { + if (Config->OFormatBinary) + return 0; + return Out::ElfHeader->Size + Out::ProgramHeaders->Size; } + +template void OutputSection::writeHeaderTo<ELF32LE>(ELF32LE::Shdr *Shdr); +template void OutputSection::writeHeaderTo<ELF32BE>(ELF32BE::Shdr *Shdr); +template void OutputSection::writeHeaderTo<ELF64LE>(ELF64LE::Shdr *Shdr); +template void OutputSection::writeHeaderTo<ELF64BE>(ELF64BE::Shdr *Shdr); + +template void OutputSection::finalize<ELF32LE>(); +template void OutputSection::finalize<ELF32BE>(); +template void OutputSection::finalize<ELF64LE>(); +template void OutputSection::finalize<ELF64BE>(); + +template void OutputSection::writeTo<ELF32LE>(uint8_t *Buf); +template void OutputSection::writeTo<ELF32BE>(uint8_t *Buf); +template void OutputSection::writeTo<ELF64LE>(uint8_t *Buf); +template void OutputSection::writeTo<ELF64BE>(uint8_t *Buf); diff --git a/ELF/OutputSections.h b/ELF/OutputSections.h index 5c494bba977a..0ae3df5f7859 100644 --- a/ELF/OutputSections.h +++ b/ELF/OutputSections.h @@ -11,6 +11,7 @@ #define LLD_ELF_OUTPUT_SECTIONS_H #include "Config.h" +#include "InputSection.h" #include "Relocations.h" #include "lld/Core/LLVM.h" @@ -23,49 +24,38 @@ namespace elf { struct PhdrEntry; class SymbolBody; struct EhSectionPiece; -template <class ELFT> class EhInputSection; -template <class ELFT> class InputSection; -template <class ELFT> class InputSectionBase; -template <class ELFT> class MergeInputSection; -template <class ELFT> class OutputSection; +class EhInputSection; +class InputSection; +class InputSectionBase; +class MergeInputSection; +class OutputSection; template <class ELFT> class ObjectFile; template <class ELFT> class SharedFile; -template <class ELFT> class SharedSymbol; -template <class ELFT> class DefinedRegular; +class SharedSymbol; +class DefinedRegular; // This represents a section in an output file. -// Different sub classes represent different types of sections. Some contain -// input sections, others are created by the linker. +// It is composed of multiple InputSections. // The writer creates multiple OutputSections and assign them unique, // non-overlapping file offsets and VAs. -class OutputSectionBase { +class OutputSection final : public SectionBase { public: - enum Kind { - Base, - EHFrame, - Merge, - Regular, - }; + OutputSection(StringRef Name, uint32_t Type, uint64_t Flags); + + static bool classof(const SectionBase *S) { + return S->kind() == SectionBase::Output; + } - OutputSectionBase(StringRef Name, uint32_t Type, uint64_t Flags); - void setLMAOffset(uint64_t LMAOff) { LMAOffset = LMAOff; } uint64_t getLMA() const { return Addr + LMAOffset; } template <typename ELFT> void writeHeaderTo(typename ELFT::Shdr *SHdr); - StringRef getName() const { return Name; } - - virtual void addSection(InputSectionData *C) {} - virtual Kind getKind() const { return Base; } - static bool classof(const OutputSectionBase *B) { - return B->getKind() == Base; - } unsigned SectionIndex; uint32_t getPhdrFlags() const; - void updateAlignment(uint64_t Alignment) { - if (Alignment > Addralign) - Addralign = Alignment; + void updateAlignment(uint32_t Val) { + if (Val > Alignment) + Alignment = Val; } // If true, this section will be page aligned on disk. @@ -78,191 +68,82 @@ public: // between their file offsets should be equal to difference between their // virtual addresses. To compute some section offset we use the following // formula: Off = Off_first + VA - VA_first. - OutputSectionBase *FirstInPtLoad = nullptr; - - virtual void finalize() {} - virtual void assignOffsets() {} - virtual void writeTo(uint8_t *Buf) {} - virtual ~OutputSectionBase() = default; - - StringRef Name; + OutputSection *FirstInPtLoad = nullptr; // The following fields correspond to Elf_Shdr members. uint64_t Size = 0; - uint64_t Entsize = 0; - uint64_t Addralign = 0; uint64_t Offset = 0; - uint64_t Flags = 0; uint64_t LMAOffset = 0; uint64_t Addr = 0; uint32_t ShName = 0; - uint32_t Type = 0; - uint32_t Info = 0; - uint32_t Link = 0; -}; - -template <class ELFT> class OutputSection final : public OutputSectionBase { -public: - typedef typename ELFT::Shdr Elf_Shdr; - typedef typename ELFT::Sym Elf_Sym; - typedef typename ELFT::Rel Elf_Rel; - typedef typename ELFT::Rela Elf_Rela; - typedef typename ELFT::uint uintX_t; - OutputSection(StringRef Name, uint32_t Type, uintX_t Flags); - void addSection(InputSectionData *C) override; - void sort(std::function<int(InputSection<ELFT> *S)> Order); + void addSection(InputSection *S); + void sort(std::function<int(InputSectionBase *S)> Order); void sortInitFini(); void sortCtorsDtors(); - void writeTo(uint8_t *Buf) override; - void finalize() override; - void assignOffsets() override; - Kind getKind() const override { return Regular; } - static bool classof(const OutputSectionBase *B) { - return B->getKind() == Regular; - } - std::vector<InputSection<ELFT> *> Sections; + uint32_t getFiller(); + template <class ELFT> void writeTo(uint8_t *Buf); + template <class ELFT> void finalize(); + void assignOffsets(); + std::vector<InputSection *> Sections; // Location in the output buffer. uint8_t *Loc = nullptr; }; -template <class ELFT> -class MergeOutputSection final : public OutputSectionBase { - typedef typename ELFT::uint uintX_t; - -public: - MergeOutputSection(StringRef Name, uint32_t Type, uintX_t Flags, - uintX_t Alignment); - void addSection(InputSectionData *S) override; - void writeTo(uint8_t *Buf) override; - void finalize() override; - bool shouldTailMerge() const; - Kind getKind() const override { return Merge; } - static bool classof(const OutputSectionBase *B) { - return B->getKind() == Merge; - } - -private: - void finalizeTailMerge(); - void finalizeNoTailMerge(); - - llvm::StringTableBuilder Builder; - std::vector<MergeInputSection<ELFT> *> Sections; -}; - -struct CieRecord { - EhSectionPiece *Piece = nullptr; - std::vector<EhSectionPiece *> FdePieces; -}; - -// Output section for .eh_frame. -template <class ELFT> class EhOutputSection final : public OutputSectionBase { - typedef typename ELFT::uint uintX_t; - typedef typename ELFT::Shdr Elf_Shdr; - typedef typename ELFT::Rel Elf_Rel; - typedef typename ELFT::Rela Elf_Rela; - -public: - EhOutputSection(); - void writeTo(uint8_t *Buf) override; - void finalize() override; - bool empty() const { return Sections.empty(); } - - void addSection(InputSectionData *S) override; - Kind getKind() const override { return EHFrame; } - static bool classof(const OutputSectionBase *B) { - return B->getKind() == EHFrame; - } - - size_t NumFdes = 0; - -private: - template <class RelTy> - void addSectionAux(EhInputSection<ELFT> *S, llvm::ArrayRef<RelTy> Rels); - - template <class RelTy> - CieRecord *addCie(EhSectionPiece &Piece, ArrayRef<RelTy> Rels); - - template <class RelTy> - bool isFdeLive(EhSectionPiece &Piece, ArrayRef<RelTy> Rels); - - uintX_t getFdePc(uint8_t *Buf, size_t Off, uint8_t Enc); - - std::vector<EhInputSection<ELFT> *> Sections; - std::vector<CieRecord *> Cies; - - // CIE records are uniquified by their contents and personality functions. - llvm::DenseMap<std::pair<ArrayRef<uint8_t>, SymbolBody *>, CieRecord> CieMap; -}; - -// All output sections that are hadnled by the linker specially are +// All output sections that are handled by the linker specially are // globally accessible. Writer initializes them, so don't use them // until Writer is initialized. -template <class ELFT> struct Out { - typedef typename ELFT::uint uintX_t; - typedef typename ELFT::Phdr Elf_Phdr; - +struct Out { static uint8_t First; - static EhOutputSection<ELFT> *EhFrame; - static OutputSection<ELFT> *Bss; - static OutputSection<ELFT> *BssRelRo; - static OutputSectionBase *Opd; + static OutputSection *Opd; static uint8_t *OpdBuf; static PhdrEntry *TlsPhdr; - static OutputSectionBase *DebugInfo; - static OutputSectionBase *ElfHeader; - static OutputSectionBase *ProgramHeaders; - static OutputSectionBase *PreinitArray; - static OutputSectionBase *InitArray; - static OutputSectionBase *FiniArray; + static OutputSection *DebugInfo; + static OutputSection *ElfHeader; + static OutputSection *ProgramHeaders; + static OutputSection *PreinitArray; + static OutputSection *InitArray; + static OutputSection *FiniArray; }; struct SectionKey { StringRef Name; uint64_t Flags; - uint64_t Alignment; + uint32_t Alignment; +}; +} +} +namespace llvm { +template <> struct DenseMapInfo<lld::elf::SectionKey> { + static lld::elf::SectionKey getEmptyKey(); + static lld::elf::SectionKey getTombstoneKey(); + static unsigned getHashValue(const lld::elf::SectionKey &Val); + static bool isEqual(const lld::elf::SectionKey &LHS, + const lld::elf::SectionKey &RHS); }; +} +namespace lld { +namespace elf { // This class knows how to create an output section for a given // input section. Output section type is determined by various // factors, including input section's sh_flags, sh_type and // linker scripts. -template <class ELFT> class OutputSectionFactory { - typedef typename ELFT::Shdr Elf_Shdr; - typedef typename ELFT::uint uintX_t; - +class OutputSectionFactory { public: - OutputSectionFactory(); + OutputSectionFactory(std::vector<OutputSection *> &OutputSections); ~OutputSectionFactory(); - std::pair<OutputSectionBase *, bool> create(InputSectionBase<ELFT> *C, - StringRef OutsecName); - std::pair<OutputSectionBase *, bool> create(const SectionKey &Key, - InputSectionBase<ELFT> *C); + + void addInputSec(InputSectionBase *IS, StringRef OutsecName); private: - llvm::SmallDenseMap<SectionKey, OutputSectionBase *> Map; + llvm::SmallDenseMap<SectionKey, OutputSection *> Map; + std::vector<OutputSection *> &OutputSections; }; -template <class ELFT> uint64_t getHeaderSize() { - if (Config->OFormatBinary) - return 0; - return Out<ELFT>::ElfHeader->Size + Out<ELFT>::ProgramHeaders->Size; -} +uint64_t getHeaderSize(); -template <class ELFT> uint8_t Out<ELFT>::First; -template <class ELFT> EhOutputSection<ELFT> *Out<ELFT>::EhFrame; -template <class ELFT> OutputSection<ELFT> *Out<ELFT>::Bss; -template <class ELFT> OutputSection<ELFT> *Out<ELFT>::BssRelRo; -template <class ELFT> OutputSectionBase *Out<ELFT>::Opd; -template <class ELFT> uint8_t *Out<ELFT>::OpdBuf; -template <class ELFT> PhdrEntry *Out<ELFT>::TlsPhdr; -template <class ELFT> OutputSectionBase *Out<ELFT>::DebugInfo; -template <class ELFT> OutputSectionBase *Out<ELFT>::ElfHeader; -template <class ELFT> OutputSectionBase *Out<ELFT>::ProgramHeaders; -template <class ELFT> OutputSectionBase *Out<ELFT>::PreinitArray; -template <class ELFT> OutputSectionBase *Out<ELFT>::InitArray; -template <class ELFT> OutputSectionBase *Out<ELFT>::FiniArray; } // namespace elf } // namespace lld diff --git a/ELF/Relocations.cpp b/ELF/Relocations.cpp index cecd11e90790..baef0a2f2257 100644 --- a/ELF/Relocations.cpp +++ b/ELF/Relocations.cpp @@ -43,6 +43,7 @@ #include "Relocations.h" #include "Config.h" +#include "Memory.h" #include "OutputSections.h" #include "Strings.h" #include "SymbolTable.h" @@ -52,20 +53,30 @@ #include "llvm/Support/Endian.h" #include "llvm/Support/raw_ostream.h" +#include <algorithm> using namespace llvm; using namespace llvm::ELF; using namespace llvm::object; using namespace llvm::support::endian; -namespace lld { -namespace elf { +using namespace lld; +using namespace lld::elf; -static bool refersToGotEntry(RelExpr Expr) { - return isRelExprOneOf<R_GOT, R_GOT_OFF, R_MIPS_GOT_LOCAL_PAGE, R_MIPS_GOT_OFF, - R_MIPS_GOT_OFF32, R_MIPS_TLSGD, R_MIPS_TLSLD, - R_GOT_PAGE_PC, R_GOT_PC, R_GOT_FROM_END, R_TLSGD, - R_TLSGD_PC, R_TLSDESC, R_TLSDESC_PAGE>(Expr); +// Construct a message in the following format. +// +// >>> defined in /home/alice/src/foo.o +// >>> referenced by bar.c:12 (/home/alice/src/bar.c:12) +// >>> /home/alice/src/bar.o:(.text+0x1) +template <class ELFT> +static std::string getLocation(InputSectionBase &S, const SymbolBody &Sym, + uint64_t Off) { + std::string Msg = + "\n>>> defined in " + toString(Sym.File) + "\n>>> referenced by "; + std::string Src = S.getSrcMsg<ELFT>(Off); + if (!Src.empty()) + Msg += Src + "\n>>> "; + return Msg + S.getObjMsg<ELFT>(Off); } static bool isPreemptible(const SymbolBody &Body, uint32_t Type) { @@ -84,44 +95,92 @@ static bool isPreemptible(const SymbolBody &Body, uint32_t Type) { return Body.isPreemptible(); } -// This function is similar to the `handleTlsRelocation`. ARM and MIPS do not -// support any relaxations for TLS relocations so by factoring out ARM and MIPS +// This function is similar to the `handleTlsRelocation`. MIPS does not +// support any relaxations for TLS relocations so by factoring out MIPS // handling in to the separate function we can simplify the code and do not -// pollute `handleTlsRelocation` by ARM and MIPS `ifs` statements. -template <class ELFT, class GOT> -static unsigned handleNoRelaxTlsRelocation( - GOT *Got, uint32_t Type, SymbolBody &Body, InputSectionBase<ELFT> &C, - typename ELFT::uint Offset, typename ELFT::uint Addend, RelExpr Expr) { - typedef typename ELFT::uint uintX_t; - auto addModuleReloc = [](SymbolBody &Body, GOT *Got, uintX_t Off, bool LD) { - // The Dynamic TLS Module Index Relocation can be statically resolved to 1 - // if we know that we are linking an executable. For ARM we resolve the - // relocation when writing the Got. MIPS has a custom Got implementation - // that writes the Module index in directly. - if (!Body.isPreemptible() && !Config->Pic && Config->EMachine == EM_ARM) - Got->Relocations.push_back( - {R_ABS, Target->TlsModuleIndexRel, Off, 0, &Body}); - else { - SymbolBody *Dest = LD ? nullptr : &Body; +// pollute other `handleTlsRelocation` by MIPS `ifs` statements. +// Mips has a custom MipsGotSection that handles the writing of GOT entries +// without dynamic relocations. +template <class ELFT> +static unsigned handleMipsTlsRelocation(uint32_t Type, SymbolBody &Body, + InputSectionBase &C, uint64_t Offset, + int64_t Addend, RelExpr Expr) { + if (Expr == R_MIPS_TLSLD) { + if (In<ELFT>::MipsGot->addTlsIndex() && Config->Pic) + In<ELFT>::RelaDyn->addReloc({Target->TlsModuleIndexRel, In<ELFT>::MipsGot, + In<ELFT>::MipsGot->getTlsIndexOff(), false, + nullptr, 0}); + C.Relocations.push_back({Expr, Type, Offset, Addend, &Body}); + return 1; + } + + if (Expr == R_MIPS_TLSGD) { + if (In<ELFT>::MipsGot->addDynTlsEntry(Body) && Body.isPreemptible()) { + uint64_t Off = In<ELFT>::MipsGot->getGlobalDynOffset(Body); In<ELFT>::RelaDyn->addReloc( - {Target->TlsModuleIndexRel, Got, Off, false, Dest, 0}); + {Target->TlsModuleIndexRel, In<ELFT>::MipsGot, Off, false, &Body, 0}); + if (Body.isPreemptible()) + In<ELFT>::RelaDyn->addReloc({Target->TlsOffsetRel, In<ELFT>::MipsGot, + Off + Config->Wordsize, false, &Body, 0}); } + C.Relocations.push_back({Expr, Type, Offset, Addend, &Body}); + return 1; + } + return 0; +} + +// This function is similar to the `handleMipsTlsRelocation`. ARM also does not +// support any relaxations for TLS relocations. ARM is logically similar to Mips +// in how it handles TLS, but Mips uses its own custom GOT which handles some +// of the cases that ARM uses GOT relocations for. +// +// We look for TLS global dynamic and local dynamic relocations, these may +// require the generation of a pair of GOT entries that have associated +// dynamic relocations. When the results of the dynamic relocations can be +// resolved at static link time we do so. This is necessary for static linking +// as there will be no dynamic loader to resolve them at load-time. +// +// The pair of GOT entries created are of the form +// GOT[e0] Module Index (Used to find pointer to TLS block at run-time) +// GOT[e1] Offset of symbol in TLS block +template <class ELFT> +static unsigned handleARMTlsRelocation(uint32_t Type, SymbolBody &Body, + InputSectionBase &C, uint64_t Offset, + int64_t Addend, RelExpr Expr) { + // The Dynamic TLS Module Index Relocation for a symbol defined in an + // executable is always 1. If the target Symbol is not preemtible then + // we know the offset into the TLS block at static link time. + bool NeedDynId = Body.isPreemptible() || Config->Shared; + bool NeedDynOff = Body.isPreemptible(); + + auto AddTlsReloc = [&](uint64_t Off, uint32_t Type, SymbolBody *Dest, + bool Dyn) { + if (Dyn) + In<ELFT>::RelaDyn->addReloc({Type, In<ELFT>::Got, Off, false, Dest, 0}); + else + In<ELFT>::Got->Relocations.push_back({R_ABS, Type, Off, 0, Dest}); }; - if (Expr == R_MIPS_TLSLD || Expr == R_TLSLD_PC) { - if (Got->addTlsIndex() && (Config->Pic || Config->EMachine == EM_ARM)) - addModuleReloc(Body, Got, Got->getTlsIndexOff(), true); + + // Local Dynamic is for access to module local TLS variables, while still + // being suitable for being dynamically loaded via dlopen. + // GOT[e0] is the module index, with a special value of 0 for the current + // module. GOT[e1] is unused. There only needs to be one module index entry. + if (Expr == R_TLSLD_PC && In<ELFT>::Got->addTlsIndex()) { + AddTlsReloc(In<ELFT>::Got->getTlsIndexOff(), Target->TlsModuleIndexRel, + NeedDynId ? nullptr : &Body, NeedDynId); C.Relocations.push_back({Expr, Type, Offset, Addend, &Body}); return 1; } - if (Target->isTlsGlobalDynamicRel(Type)) { - if (Got->addDynTlsEntry(Body) && - (Body.isPreemptible() || Config->EMachine == EM_ARM)) { - uintX_t Off = Got->getGlobalDynOffset(Body); - addModuleReloc(Body, Got, Off, false); - if (Body.isPreemptible()) - In<ELFT>::RelaDyn->addReloc({Target->TlsOffsetRel, Got, - Off + (uintX_t)sizeof(uintX_t), false, - &Body, 0}); + + // Global Dynamic is the most general purpose access model. When we know + // the module index and offset of symbol in TLS block we can fill these in + // using static GOT relocations. + if (Expr == R_TLSGD_PC) { + if (In<ELFT>::Got->addDynTlsEntry(Body)) { + uint64_t Off = In<ELFT>::Got->getGlobalDynOffset(Body); + AddTlsReloc(Off, Target->TlsModuleIndexRel, &Body, NeedDynId); + AddTlsReloc(Off + Config->Wordsize, Target->TlsOffsetRel, &Body, + NeedDynOff); } C.Relocations.push_back({Expr, Type, Offset, Addend, &Body}); return 1; @@ -131,30 +190,25 @@ static unsigned handleNoRelaxTlsRelocation( // Returns the number of relocations processed. template <class ELFT> -static unsigned handleTlsRelocation(uint32_t Type, SymbolBody &Body, - InputSectionBase<ELFT> &C, - typename ELFT::uint Offset, - typename ELFT::uint Addend, RelExpr Expr) { +static unsigned +handleTlsRelocation(uint32_t Type, SymbolBody &Body, InputSectionBase &C, + typename ELFT::uint Offset, int64_t Addend, RelExpr Expr) { if (!(C.Flags & SHF_ALLOC)) return 0; if (!Body.isTls()) return 0; - typedef typename ELFT::uint uintX_t; - if (Config->EMachine == EM_ARM) - return handleNoRelaxTlsRelocation<ELFT>(In<ELFT>::Got, Type, Body, C, - Offset, Addend, Expr); + return handleARMTlsRelocation<ELFT>(Type, Body, C, Offset, Addend, Expr); if (Config->EMachine == EM_MIPS) - return handleNoRelaxTlsRelocation<ELFT>(In<ELFT>::MipsGot, Type, Body, C, - Offset, Addend, Expr); + return handleMipsTlsRelocation<ELFT>(Type, Body, C, Offset, Addend, Expr); bool IsPreemptible = isPreemptible(Body, Type); - if ((Expr == R_TLSDESC || Expr == R_TLSDESC_PAGE || Expr == R_TLSDESC_CALL) && + if (isRelExprOneOf<R_TLSDESC, R_TLSDESC_PAGE, R_TLSDESC_CALL>(Expr) && Config->Shared) { if (In<ELFT>::Got->addDynTlsEntry(Body)) { - uintX_t Off = In<ELFT>::Got->getGlobalDynOffset(Body); + uint64_t Off = In<ELFT>::Got->getGlobalDynOffset(Body); In<ELFT>::RelaDyn->addReloc({Target->TlsDescRel, In<ELFT>::Got, Off, !IsPreemptible, &Body, 0}); } @@ -163,7 +217,7 @@ static unsigned handleTlsRelocation(uint32_t Type, SymbolBody &Body, return 1; } - if (Expr == R_TLSLD_PC || Expr == R_TLSLD) { + if (isRelExprOneOf<R_TLSLD_PC, R_TLSLD>(Expr)) { // Local-Dynamic relocs can be relaxed to Local-Exec. if (!Config->Shared) { C.Relocations.push_back( @@ -185,17 +239,17 @@ static unsigned handleTlsRelocation(uint32_t Type, SymbolBody &Body, return 1; } - if (Expr == R_TLSDESC_PAGE || Expr == R_TLSDESC || Expr == R_TLSDESC_CALL || - Target->isTlsGlobalDynamicRel(Type)) { + if (isRelExprOneOf<R_TLSDESC, R_TLSDESC_PAGE, R_TLSDESC_CALL, R_TLSGD, + R_TLSGD_PC>(Expr)) { if (Config->Shared) { if (In<ELFT>::Got->addDynTlsEntry(Body)) { - uintX_t Off = In<ELFT>::Got->getGlobalDynOffset(Body); + uint64_t Off = In<ELFT>::Got->getGlobalDynOffset(Body); In<ELFT>::RelaDyn->addReloc( {Target->TlsModuleIndexRel, In<ELFT>::Got, Off, false, &Body, 0}); // If the symbol is preemptible we need the dynamic linker to write // the offset too. - uintX_t OffsetOff = Off + (uintX_t)sizeof(uintX_t); + uint64_t OffsetOff = Off + Config->Wordsize; if (IsPreemptible) In<ELFT>::RelaDyn->addReloc({Target->TlsOffsetRel, In<ELFT>::Got, OffsetOff, false, &Body, 0}); @@ -216,14 +270,13 @@ static unsigned handleTlsRelocation(uint32_t Type, SymbolBody &Body, if (!Body.isInGot()) { In<ELFT>::Got->addEntry(Body); In<ELFT>::RelaDyn->addReloc({Target->TlsGotRel, In<ELFT>::Got, - Body.getGotOffset<ELFT>(), false, &Body, - 0}); + Body.getGotOffset(), false, &Body, 0}); } - return Target->TlsGdRelaxSkip; + } else { + C.Relocations.push_back( + {Target->adjustRelaxExpr(Type, nullptr, R_RELAX_TLS_GD_TO_LE), Type, + Offset, Addend, &Body}); } - C.Relocations.push_back( - {Target->adjustRelaxExpr(Type, nullptr, R_RELAX_TLS_GD_TO_LE), Type, - Offset, Addend, &Body}); return Target->TlsGdRelaxSkip; } @@ -234,16 +287,14 @@ static unsigned handleTlsRelocation(uint32_t Type, SymbolBody &Body, {R_RELAX_TLS_IE_TO_LE, Type, Offset, Addend, &Body}); return 1; } - return 0; -} -template <endianness E> static int16_t readSignedLo16(const uint8_t *Loc) { - return read32<E>(Loc) & 0xffff; + if (Expr == R_TLSDESC_CALL) + return 1; + return 0; } -template <class RelTy> -static uint32_t getMipsPairType(const RelTy *Rel, const SymbolBody &Sym) { - switch (Rel->getType(Config->Mips64EL)) { +static uint32_t getMipsPairType(uint32_t Type, const SymbolBody &Sym) { + switch (Type) { case R_MIPS_HI16: return R_MIPS_LO16; case R_MIPS_GOT16: @@ -257,72 +308,60 @@ static uint32_t getMipsPairType(const RelTy *Rel, const SymbolBody &Sym) { } } -template <class ELFT, class RelTy> -static int32_t findMipsPairedAddend(const uint8_t *Buf, const uint8_t *BufLoc, - SymbolBody &Sym, const RelTy *Rel, - const RelTy *End) { - uint32_t SymIndex = Rel->getSymbol(Config->Mips64EL); - uint32_t Type = getMipsPairType(Rel, Sym); - - // Some MIPS relocations use addend calculated from addend of the relocation - // itself and addend of paired relocation. ABI requires to compute such - // combined addend in case of REL relocation record format only. - // See p. 4-17 at ftp://www.linux-mips.org/pub/linux/mips/doc/ABI/mipsabi.pdf - if (RelTy::IsRela || Type == R_MIPS_NONE) - return 0; - - for (const RelTy *RI = Rel; RI != End; ++RI) { - if (RI->getType(Config->Mips64EL) != Type) - continue; - if (RI->getSymbol(Config->Mips64EL) != SymIndex) - continue; - const endianness E = ELFT::TargetEndianness; - return ((read32<E>(BufLoc) & 0xffff) << 16) + - readSignedLo16<E>(Buf + RI->r_offset); - } - warn("can't find matching " + toString(Type) + " relocation for " + - toString(Rel->getType(Config->Mips64EL))); - return 0; -} - // True if non-preemptable symbol always has the same value regardless of where // the DSO is loaded. -template <class ELFT> static bool isAbsolute(const SymbolBody &Body) { +static bool isAbsolute(const SymbolBody &Body) { if (Body.isUndefined()) return !Body.isLocal() && Body.symbol()->isWeak(); - if (const auto *DR = dyn_cast<DefinedRegular<ELFT>>(&Body)) + if (const auto *DR = dyn_cast<DefinedRegular>(&Body)) return DR->Section == nullptr; // Absolute symbol. return false; } -template <class ELFT> static bool isAbsoluteValue(const SymbolBody &Body) { - return isAbsolute<ELFT>(Body) || Body.isTls(); +static bool isAbsoluteValue(const SymbolBody &Body) { + return isAbsolute(Body) || Body.isTls(); } +// Returns true if Expr refers a PLT entry. static bool needsPlt(RelExpr Expr) { - return isRelExprOneOf<R_PLT_PC, R_PPC_PLT_OPD, R_PLT, R_PLT_PAGE_PC, - R_THUNK_PLT_PC>(Expr); + return isRelExprOneOf<R_PLT_PC, R_PPC_PLT_OPD, R_PLT, R_PLT_PAGE_PC>(Expr); +} + +// Returns true if Expr refers a GOT entry. Note that this function +// returns false for TLS variables even though they need GOT, because +// TLS variables uses GOT differently than the regular variables. +static bool needsGot(RelExpr Expr) { + return isRelExprOneOf<R_GOT, R_GOT_OFF, R_MIPS_GOT_LOCAL_PAGE, R_MIPS_GOT_OFF, + R_MIPS_GOT_OFF32, R_GOT_PAGE_PC, R_GOT_PC, + R_GOT_FROM_END>(Expr); } // True if this expression is of the form Sym - X, where X is a position in the // file (PC, or GOT for example). static bool isRelExpr(RelExpr Expr) { return isRelExprOneOf<R_PC, R_GOTREL, R_GOTREL_FROM_END, R_MIPS_GOTREL, - R_PAGE_PC, R_RELAX_GOT_PC, R_THUNK_PC, R_THUNK_PLT_PC>( - Expr); + R_PAGE_PC, R_RELAX_GOT_PC>(Expr); } +// Returns true if a given relocation can be computed at link-time. +// +// For instance, we know the offset from a relocation to its target at +// link-time if the relocation is PC-relative and refers a +// non-interposable function in the same executable. This function +// will return true for such relocation. +// +// If this function returns false, that means we need to emit a +// dynamic relocation so that the relocation will be fixed at load-time. template <class ELFT> static bool isStaticLinkTimeConstant(RelExpr E, uint32_t Type, const SymbolBody &Body, - InputSectionBase<ELFT> &S, - typename ELFT::uint RelOff) { + InputSectionBase &S, uint64_t RelOff) { // These expressions always compute a constant if (isRelExprOneOf<R_SIZE, R_GOT_FROM_END, R_GOT_OFF, R_MIPS_GOT_LOCAL_PAGE, - R_MIPS_GOT_OFF, R_MIPS_GOT_OFF32, R_MIPS_TLSGD, - R_GOT_PAGE_PC, R_GOT_PC, R_PLT_PC, R_TLSGD_PC, R_TLSGD, - R_PPC_PLT_OPD, R_TLSDESC_CALL, R_TLSDESC_PAGE, R_HINT, - R_THUNK_PC, R_THUNK_PLT_PC>(E)) + R_MIPS_GOT_OFF, R_MIPS_GOT_OFF32, R_MIPS_GOT_GP_PC, + R_MIPS_TLSGD, R_GOT_PAGE_PC, R_GOT_PC, R_PLT_PC, + R_TLSGD_PC, R_TLSGD, R_PPC_PLT_OPD, R_TLSDESC_CALL, + R_TLSDESC_PAGE, R_HINT>(E)) return true; // These never do, except if the entire file is position dependent or if @@ -332,16 +371,19 @@ static bool isStaticLinkTimeConstant(RelExpr E, uint32_t Type, if (isPreemptible(Body, Type)) return false; - if (!Config->Pic) return true; - bool AbsVal = isAbsoluteValue<ELFT>(Body); + // For the target and the relocation, we want to know if they are + // absolute or relative. + bool AbsVal = isAbsoluteValue(Body); bool RelE = isRelExpr(E); if (AbsVal && !RelE) return true; if (!AbsVal && RelE) return true; + if (!AbsVal && !RelE) + return Target->usesOnlyLowPageBits(Type); // Relative relocation to an absolute value. This is normally unrepresentable, // but if the relocation refers to a weak undefined symbol, we allow it to @@ -351,18 +393,13 @@ static bool isStaticLinkTimeConstant(RelExpr E, uint32_t Type, // Another special case is MIPS _gp_disp symbol which represents offset // between start of a function and '_gp' value and defined as absolute just // to simplify the code. - if (AbsVal && RelE) { - if (Body.isUndefined() && !Body.isLocal() && Body.symbol()->isWeak()) - return true; - if (&Body == ElfSym<ELFT>::MipsGpDisp) - return true; - error(S.getLocation(RelOff) + ": relocation " + toString(Type) + - " cannot refer to absolute symbol '" + toString(Body) + - "' defined in " + toString(Body.File)); + assert(AbsVal && RelE); + if (Body.isUndefined() && !Body.isLocal() && Body.symbol()->isWeak()) return true; - } - return Target->usesOnlyLowPageBits(Type); + error("relocation " + toString(Type) + " cannot refer to absolute symbol: " + + toString(Body) + getLocation<ELFT>(S, Body, RelOff)); + return true; } static RelExpr toPlt(RelExpr Expr) { @@ -389,23 +426,14 @@ static RelExpr fromPlt(RelExpr Expr) { return Expr; } -template <class ELFT> static uint32_t getAlignment(SharedSymbol<ELFT> *SS) { - typedef typename ELFT::uint uintX_t; - - uintX_t SecAlign = SS->file()->getSection(SS->Sym)->sh_addralign; - uintX_t SymValue = SS->Sym.st_value; - int TrailingZeros = - std::min(countTrailingZeros(SecAlign), countTrailingZeros(SymValue)); - return 1 << TrailingZeros; -} - -template <class ELFT> static bool isReadOnly(SharedSymbol<ELFT> *SS) { - typedef typename ELFT::uint uintX_t; +// Returns true if a given shared symbol is in a read-only segment in a DSO. +template <class ELFT> static bool isReadOnly(SharedSymbol *SS) { typedef typename ELFT::Phdr Elf_Phdr; + uint64_t Value = SS->getValue<ELFT>(); // Determine if the symbol is read-only by scanning the DSO's program headers. - uintX_t Value = SS->Sym.st_value; - for (const Elf_Phdr &Phdr : check(SS->file()->getObj().program_headers())) + auto *File = cast<SharedFile<ELFT>>(SS->File); + for (const Elf_Phdr &Phdr : check(File->getObj().program_headers())) if ((Phdr.p_type == ELF::PT_LOAD || Phdr.p_type == ELF::PT_GNU_RELRO) && !(Phdr.p_flags & ELF::PF_W) && Value >= Phdr.p_vaddr && Value < Phdr.p_vaddr + Phdr.p_memsz) @@ -413,62 +441,112 @@ template <class ELFT> static bool isReadOnly(SharedSymbol<ELFT> *SS) { return false; } -// Reserve space in .bss or .bss.rel.ro for copy relocation. -template <class ELFT> static void addCopyRelSymbol(SharedSymbol<ELFT> *SS) { - typedef typename ELFT::uint uintX_t; +// Returns symbols at the same offset as a given symbol, including SS itself. +// +// If two or more symbols are at the same offset, and at least one of +// them are copied by a copy relocation, all of them need to be copied. +// Otherwise, they would refer different places at runtime. +template <class ELFT> +static std::vector<SharedSymbol *> getSymbolsAt(SharedSymbol *SS) { typedef typename ELFT::Sym Elf_Sym; + auto *File = cast<SharedFile<ELFT>>(SS->File); + uint64_t Shndx = SS->getShndx<ELFT>(); + uint64_t Value = SS->getValue<ELFT>(); + + std::vector<SharedSymbol *> Ret; + for (const Elf_Sym &S : File->getGlobalSymbols()) { + if (S.st_shndx != Shndx || S.st_value != Value) + continue; + StringRef Name = check(S.getName(File->getStringTable())); + SymbolBody *Sym = Symtab<ELFT>::X->find(Name); + if (auto *Alias = dyn_cast_or_null<SharedSymbol>(Sym)) + Ret.push_back(Alias); + } + return Ret; +} + +// Reserve space in .bss or .bss.rel.ro for copy relocation. +// +// The copy relocation is pretty much a hack. If you use a copy relocation +// in your program, not only the symbol name but the symbol's size, RW/RO +// bit and alignment become part of the ABI. In addition to that, if the +// symbol has aliases, the aliases become part of the ABI. That's subtle, +// but if you violate that implicit ABI, that can cause very counter- +// intuitive consequences. +// +// So, what is the copy relocation? It's for linking non-position +// independent code to DSOs. In an ideal world, all references to data +// exported by DSOs should go indirectly through GOT. But if object files +// are compiled as non-PIC, all data references are direct. There is no +// way for the linker to transform the code to use GOT, as machine +// instructions are already set in stone in object files. This is where +// the copy relocation takes a role. +// +// A copy relocation instructs the dynamic linker to copy data from a DSO +// to a specified address (which is usually in .bss) at load-time. If the +// static linker (that's us) finds a direct data reference to a DSO +// symbol, it creates a copy relocation, so that the symbol can be +// resolved as if it were in .bss rather than in a DSO. +// +// As you can see in this function, we create a copy relocation for the +// dynamic linker, and the relocation contains not only symbol name but +// various other informtion about the symbol. So, such attributes become a +// part of the ABI. +// +// Note for application developers: I can give you a piece of advice if +// you are writing a shared library. You probably should export only +// functions from your library. You shouldn't export variables. +// +// As an example what can happen when you export variables without knowing +// the semantics of copy relocations, assume that you have an exported +// variable of type T. It is an ABI-breaking change to add new members at +// end of T even though doing that doesn't change the layout of the +// existing members. That's because the space for the new members are not +// reserved in .bss unless you recompile the main program. That means they +// are likely to overlap with other data that happens to be laid out next +// to the variable in .bss. This kind of issue is sometimes very hard to +// debug. What's a solution? Instead of exporting a varaible V from a DSO, +// define an accessor getV(). +template <class ELFT> static void addCopyRelSymbol(SharedSymbol *SS) { // Copy relocation against zero-sized symbol doesn't make sense. - uintX_t SymSize = SS->template getSize<ELFT>(); + uint64_t SymSize = SS->template getSize<ELFT>(); if (SymSize == 0) fatal("cannot create a copy relocation for symbol " + toString(*SS)); // See if this symbol is in a read-only segment. If so, preserve the symbol's // memory protection by reserving space in the .bss.rel.ro section. - bool IsReadOnly = isReadOnly(SS); - OutputSection<ELFT> *CopySec = - IsReadOnly ? Out<ELFT>::BssRelRo : Out<ELFT>::Bss; - - uintX_t Alignment = getAlignment(SS); - uintX_t Off = alignTo(CopySec->Size, Alignment); - CopySec->Size = Off + SymSize; - CopySec->updateAlignment(Alignment); - uintX_t Shndx = SS->Sym.st_shndx; - uintX_t Value = SS->Sym.st_value; + bool IsReadOnly = isReadOnly<ELFT>(SS); + BssSection *Sec = IsReadOnly ? In<ELFT>::BssRelRo : In<ELFT>::Bss; + uint64_t Off = Sec->reserveSpace(SymSize, SS->getAlignment<ELFT>()); + // Look through the DSO's dynamic symbol table for aliases and create a // dynamic symbol for each one. This causes the copy relocation to correctly // interpose any aliases. - for (const Elf_Sym &S : SS->file()->getGlobalSymbols()) { - if (S.st_shndx != Shndx || S.st_value != Value) - continue; - auto *Alias = dyn_cast_or_null<SharedSymbol<ELFT>>( - Symtab<ELFT>::X->find(check(S.getName(SS->file()->getStringTable())))); - if (!Alias) - continue; - Alias->CopyIsInBssRelRo = IsReadOnly; - Alias->CopyOffset = Off; - Alias->NeedsCopyOrPltAddr = true; - Alias->symbol()->IsUsedInRegularObj = true; + for (SharedSymbol *Sym : getSymbolsAt<ELFT>(SS)) { + Sym->NeedsCopy = true; + Sym->CopyRelSec = Sec; + Sym->CopyRelSecOff = Off; + Sym->symbol()->IsUsedInRegularObj = true; } - In<ELFT>::RelaDyn->addReloc({Target->CopyRel, CopySec, Off, false, SS, 0}); + + In<ELFT>::RelaDyn->addReloc({Target->CopyRel, Sec, Off, false, SS, 0}); } template <class ELFT> -static RelExpr adjustExpr(const elf::ObjectFile<ELFT> &File, SymbolBody &Body, - bool IsWrite, RelExpr Expr, uint32_t Type, - const uint8_t *Data, InputSectionBase<ELFT> &S, +static RelExpr adjustExpr(SymbolBody &Body, RelExpr Expr, uint32_t Type, + const uint8_t *Data, InputSectionBase &S, typename ELFT::uint RelOff) { - bool Preemptible = isPreemptible(Body, Type); if (Body.isGnuIFunc()) { Expr = toPlt(Expr); - } else if (!Preemptible) { + } else if (!isPreemptible(Body, Type)) { if (needsPlt(Expr)) Expr = fromPlt(Expr); - if (Expr == R_GOT_PC && !isAbsoluteValue<ELFT>(Body)) + if (Expr == R_GOT_PC && !isAbsoluteValue(Body)) Expr = Target->adjustRelaxExpr(Type, Data, Expr); } - Expr = Target->getThunkExpr(Expr, Type, File, Body); + bool IsWrite = !Config->ZText || (S.Flags & SHF_WRITE); if (IsWrite || isStaticLinkTimeConstant<ELFT>(Expr, Type, Body, S, RelOff)) return Expr; @@ -476,25 +554,34 @@ static RelExpr adjustExpr(const elf::ObjectFile<ELFT> &File, SymbolBody &Body, // only memory. We can hack around it if we are producing an executable and // the refered symbol can be preemepted to refer to the executable. if (Config->Shared || (Config->Pic && !isRelExpr(Expr))) { - error(S.getLocation(RelOff) + ": can't create dynamic relocation " + - toString(Type) + " against " + + error("can't create dynamic relocation " + toString(Type) + " against " + (Body.getName().empty() ? "local symbol in readonly segment" - : "symbol '" + toString(Body) + "'") + - " defined in " + toString(Body.File)); + : "symbol: " + toString(Body)) + + getLocation<ELFT>(S, Body, RelOff)); return Expr; } + if (Body.getVisibility() != STV_DEFAULT) { - error(S.getLocation(RelOff) + ": cannot preempt symbol '" + toString(Body) + - "' defined in " + toString(Body.File)); + error("cannot preempt symbol: " + toString(Body) + + getLocation<ELFT>(S, Body, RelOff)); return Expr; } + if (Body.isObject()) { // Produce a copy relocation. - auto *B = cast<SharedSymbol<ELFT>>(&Body); - if (!B->needsCopy()) - addCopyRelSymbol(B); + auto *B = cast<SharedSymbol>(&Body); + if (!B->NeedsCopy) { + if (Config->ZNocopyreloc) + error("unresolvable relocation " + toString(Type) + + " against symbol '" + toString(*B) + + "'; recompile with -fPIC or remove '-z nocopyreloc'" + + getLocation<ELFT>(S, Body, RelOff)); + + addCopyRelSymbol<ELFT>(B); + } return Expr; } + if (Body.isFunc()) { // This handles a non PIC program call to function in a shared library. In // an ideal world, we could just report an error saying the relocation can @@ -516,61 +603,109 @@ static RelExpr adjustExpr(const elf::ObjectFile<ELFT> &File, SymbolBody &Body, // that points to the real function is a dedicated got entry used by the // plt. That is identified by special relocation types (R_X86_64_JUMP_SLOT, // R_386_JMP_SLOT, etc). - Body.NeedsCopyOrPltAddr = true; + Body.NeedsPltAddr = true; return toPlt(Expr); } - error("symbol '" + toString(Body) + "' defined in " + toString(Body.File) + - " is missing type"); + error("symbol '" + toString(Body) + "' defined in " + toString(Body.File) + + " has no type"); return Expr; } +// Returns an addend of a given relocation. If it is RELA, an addend +// is in a relocation itself. If it is REL, we need to read it from an +// input section. +template <class ELFT, class RelTy> +static int64_t computeAddend(const RelTy &Rel, const uint8_t *Buf) { + uint32_t Type = Rel.getType(Config->IsMips64EL); + int64_t A = RelTy::IsRela + ? getAddend<ELFT>(Rel) + : Target->getImplicitAddend(Buf + Rel.r_offset, Type); + + if (Config->EMachine == EM_PPC64 && Config->Pic && Type == R_PPC64_TOC) + A += getPPC64TocBase(); + return A; +} + +// MIPS has an odd notion of "paired" relocations to calculate addends. +// For example, if a relocation is of R_MIPS_HI16, there must be a +// R_MIPS_LO16 relocation after that, and an addend is calculated using +// the two relocations. template <class ELFT, class RelTy> -static typename ELFT::uint computeAddend(const elf::ObjectFile<ELFT> &File, - const uint8_t *SectionData, - const RelTy *End, const RelTy &RI, - RelExpr Expr, SymbolBody &Body) { - typedef typename ELFT::uint uintX_t; - - uint32_t Type = RI.getType(Config->Mips64EL); - uintX_t Addend = getAddend<ELFT>(RI); - const uint8_t *BufLoc = SectionData + RI.r_offset; - if (!RelTy::IsRela) - Addend += Target->getImplicitAddend(BufLoc, Type); - if (Config->EMachine == EM_MIPS) { - Addend += findMipsPairedAddend<ELFT>(SectionData, BufLoc, Body, &RI, End); - if (Type == R_MIPS_LO16 && Expr == R_PC) - // R_MIPS_LO16 expression has R_PC type iif the target is _gp_disp - // symbol. In that case we should use the following formula for - // calculation "AHL + GP - P + 4". Let's add 4 right here. - // For details see p. 4-19 at - // ftp://www.linux-mips.org/pub/linux/mips/doc/ABI/mipsabi.pdf - Addend += 4; - if (Expr == R_MIPS_GOTREL && Body.isLocal()) - Addend += File.MipsGp0; +static int64_t computeMipsAddend(const RelTy &Rel, InputSectionBase &Sec, + RelExpr Expr, SymbolBody &Body, + const RelTy *End) { + if (Expr == R_MIPS_GOTREL && Body.isLocal()) + return Sec.getFile<ELFT>()->MipsGp0; + + // The ABI says that the paired relocation is used only for REL. + // See p. 4-17 at ftp://www.linux-mips.org/pub/linux/mips/doc/ABI/mipsabi.pdf + if (RelTy::IsRela) + return 0; + + uint32_t Type = Rel.getType(Config->IsMips64EL); + uint32_t PairTy = getMipsPairType(Type, Body); + if (PairTy == R_MIPS_NONE) + return 0; + + const uint8_t *Buf = Sec.Data.data(); + uint32_t SymIndex = Rel.getSymbol(Config->IsMips64EL); + + // To make things worse, paired relocations might not be contiguous in + // the relocation table, so we need to do linear search. *sigh* + for (const RelTy *RI = &Rel; RI != End; ++RI) { + if (RI->getType(Config->IsMips64EL) != PairTy) + continue; + if (RI->getSymbol(Config->IsMips64EL) != SymIndex) + continue; + + endianness E = Config->Endianness; + int32_t Hi = (read32(Buf + Rel.r_offset, E) & 0xffff) << 16; + int32_t Lo = SignExtend32<16>(read32(Buf + RI->r_offset, E)); + return Hi + Lo; } - if (Config->Pic && Config->EMachine == EM_PPC64 && Type == R_PPC64_TOC) - Addend += getPPC64TocBase(); - return Addend; + + warn("can't find matching " + toString(PairTy) + " relocation for " + + toString(Type)); + return 0; } template <class ELFT> -static void reportUndefined(SymbolBody &Sym, InputSectionBase<ELFT> &S, - typename ELFT::uint Offset) { - if (Config->UnresolvedSymbols == UnresolvedPolicy::Ignore) +static void reportUndefined(SymbolBody &Sym, InputSectionBase &S, + uint64_t Offset) { + if (Config->UnresolvedSymbols == UnresolvedPolicy::IgnoreAll) return; - if (Config->Shared && Sym.symbol()->Visibility == STV_DEFAULT && - Config->UnresolvedSymbols != UnresolvedPolicy::NoUndef) + bool CanBeExternal = Sym.symbol()->computeBinding() != STB_LOCAL && + Sym.getVisibility() == STV_DEFAULT; + if (Config->UnresolvedSymbols == UnresolvedPolicy::Ignore && CanBeExternal) return; std::string Msg = - S.getLocation(Offset) + ": undefined symbol '" + toString(Sym) + "'"; + "undefined symbol: " + toString(Sym) + "\n>>> referenced by "; - if (Config->UnresolvedSymbols == UnresolvedPolicy::Warn) + std::string Src = S.getSrcMsg<ELFT>(Offset); + if (!Src.empty()) + Msg += Src + "\n>>> "; + Msg += S.getObjMsg<ELFT>(Offset); + + if (Config->UnresolvedSymbols == UnresolvedPolicy::WarnAll || + (Config->UnresolvedSymbols == UnresolvedPolicy::Warn && CanBeExternal)) { warn(Msg); - else + } else { error(Msg); + + if (Config->ArchiveWithoutSymbolsSeen) { + message("At least one archive listed no symbols in its index." + " This can happen when creating archives with a version" + " of ar that does not understand the object files in" + " the archive. For example, if you are using LLVM" + " bitcode objects (such as created by -flto), you may" + " need to use llvm-ar or GNU ar with a plugin."); + // Reset to false so that we print the message only once. + Config->ArchiveWithoutSymbolsSeen = false; + } + } } template <class RelTy> @@ -584,11 +719,95 @@ mergeMipsN32RelTypes(uint32_t Type, uint32_t Offset, RelTy *I, RelTy *E) { uint32_t Processed = 0; for (; I != E && Offset == I->r_offset; ++I) { ++Processed; - Type |= I->getType(Config->Mips64EL) << (8 * Processed); + Type |= I->getType(Config->IsMips64EL) << (8 * Processed); } return std::make_pair(Type, Processed); } +// .eh_frame sections are mergeable input sections, so their input +// offsets are not linearly mapped to output section. For each input +// offset, we need to find a section piece containing the offset and +// add the piece's base address to the input offset to compute the +// output offset. That isn't cheap. +// +// This class is to speed up the offset computation. When we process +// relocations, we access offsets in the monotonically increasing +// order. So we can optimize for that access pattern. +// +// For sections other than .eh_frame, this class doesn't do anything. +namespace { +class OffsetGetter { +public: + explicit OffsetGetter(InputSectionBase &Sec) { + if (auto *Eh = dyn_cast<EhInputSection>(&Sec)) { + P = Eh->Pieces; + Size = Eh->Pieces.size(); + } + } + + // Translates offsets in input sections to offsets in output sections. + // Given offset must increase monotonically. We assume that P is + // sorted by InputOff. + uint64_t get(uint64_t Off) { + if (P.empty()) + return Off; + + while (I != Size && P[I].InputOff + P[I].size() <= Off) + ++I; + if (I == Size) + return Off; + + // P must be contiguous, so there must be no holes in between. + assert(P[I].InputOff <= Off && "Relocation not in any piece"); + + // Offset -1 means that the piece is dead (i.e. garbage collected). + if (P[I].OutputOff == -1) + return -1; + return P[I].OutputOff + Off - P[I].InputOff; + } + +private: + ArrayRef<EhSectionPiece> P; + size_t I = 0; + size_t Size; +}; +} // namespace + +template <class ELFT, class GotPltSection> +static void addPltEntry(PltSection *Plt, GotPltSection *GotPlt, + RelocationSection<ELFT> *Rel, uint32_t Type, + SymbolBody &Sym, bool UseSymVA) { + Plt->addEntry<ELFT>(Sym); + GotPlt->addEntry(Sym); + Rel->addReloc({Type, GotPlt, Sym.getGotPltOffset(), UseSymVA, &Sym, 0}); +} + +template <class ELFT> +static void addGotEntry(SymbolBody &Sym, bool Preemptible) { + In<ELFT>::Got->addEntry(Sym); + + uint64_t Off = Sym.getGotOffset(); + uint32_t DynType; + RelExpr Expr = R_ABS; + + if (Sym.isTls()) { + DynType = Target->TlsGotRel; + Expr = R_TLS; + } else if (!Preemptible && Config->Pic && !isAbsolute(Sym)) { + DynType = Target->RelativeRel; + } else { + DynType = Target->GotRel; + } + + bool Constant = !Preemptible && !(Config->Pic && !isAbsolute(Sym)); + if (!Constant) + In<ELFT>::RelaDyn->addReloc( + {DynType, In<ELFT>::Got, Off, !Preemptible, &Sym, 0}); + + if (Constant || (!Config->IsRela && !Preemptible)) + In<ELFT>::Got->Relocations.push_back({Expr, DynType, Off, 0, &Sym}); +} + // The reason we have to do this early scan is as follows // * To mmap the output file, we need to know the size // * For that, we need to know how many dynamic relocs we will have. @@ -603,114 +822,104 @@ mergeMipsN32RelTypes(uint32_t Type, uint32_t Offset, RelTy *I, RelTy *E) { // complicates things for the dynamic linker and means we would have to reserve // space for the extra PT_LOAD even if we end up not using it. template <class ELFT, class RelTy> -static void scanRelocs(InputSectionBase<ELFT> &C, ArrayRef<RelTy> Rels) { - typedef typename ELFT::uint uintX_t; - - bool IsWrite = C.Flags & SHF_WRITE; - - auto AddDyn = [=](const DynamicReloc<ELFT> &Reloc) { - In<ELFT>::RelaDyn->addReloc(Reloc); - }; - - const elf::ObjectFile<ELFT> *File = C.getFile(); - ArrayRef<uint8_t> SectionData = C.Data; - const uint8_t *Buf = SectionData.begin(); +static void scanRelocs(InputSectionBase &Sec, ArrayRef<RelTy> Rels) { + OffsetGetter GetOffset(Sec); - ArrayRef<EhSectionPiece> Pieces; - if (auto *Eh = dyn_cast<EhInputSection<ELFT>>(&C)) - Pieces = Eh->Pieces; - - ArrayRef<EhSectionPiece>::iterator PieceI = Pieces.begin(); - ArrayRef<EhSectionPiece>::iterator PieceE = Pieces.end(); - - for (auto I = Rels.begin(), E = Rels.end(); I != E; ++I) { - const RelTy &RI = *I; - SymbolBody &Body = File->getRelocTargetSym(RI); - uint32_t Type = RI.getType(Config->Mips64EL); + for (auto I = Rels.begin(), End = Rels.end(); I != End; ++I) { + const RelTy &Rel = *I; + SymbolBody &Body = Sec.getFile<ELFT>()->getRelocTargetSym(Rel); + uint32_t Type = Rel.getType(Config->IsMips64EL); if (Config->MipsN32Abi) { uint32_t Processed; std::tie(Type, Processed) = - mergeMipsN32RelTypes(Type, RI.r_offset, I + 1, E); + mergeMipsN32RelTypes(Type, Rel.r_offset, I + 1, End); I += Processed; } - // We only report undefined symbols if they are referenced somewhere in the - // code. + // Compute the offset of this section in the output section. + uint64_t Offset = GetOffset.get(Rel.r_offset); + if (Offset == uint64_t(-1)) + continue; + + // Report undefined symbols. The fact that we report undefined + // symbols here means that we report undefined symbols only when + // they have relocations pointing to them. We don't care about + // undefined symbols that are in dead-stripped sections. if (!Body.isLocal() && Body.isUndefined() && !Body.symbol()->isWeak()) - reportUndefined(Body, C, RI.r_offset); + reportUndefined<ELFT>(Body, Sec, Rel.r_offset); + + RelExpr Expr = + Target->getRelExpr(Type, Body, Sec.Data.begin() + Rel.r_offset); + + // Ignore "hint" relocations because they are only markers for relaxation. + if (isRelExprOneOf<R_HINT, R_NONE>(Expr)) + continue; - RelExpr Expr = Target->getRelExpr(Type, Body); bool Preemptible = isPreemptible(Body, Type); - Expr = adjustExpr(*File, Body, IsWrite, Expr, Type, Buf + RI.r_offset, C, - RI.r_offset); + Expr = adjustExpr<ELFT>(Body, Expr, Type, Sec.Data.data() + Rel.r_offset, + Sec, Rel.r_offset); if (ErrorCount) continue; - // Skip a relocation that points to a dead piece - // in a eh_frame section. - while (PieceI != PieceE && - (PieceI->InputOff + PieceI->size() <= RI.r_offset)) - ++PieceI; - - // Compute the offset of this section in the output section. We do it here - // to try to compute it only once. - uintX_t Offset; - if (PieceI != PieceE) { - assert(PieceI->InputOff <= RI.r_offset && "Relocation not in any piece"); - if (PieceI->OutputOff == -1) - continue; - Offset = PieceI->OutputOff + RI.r_offset - PieceI->InputOff; - } else { - Offset = RI.r_offset; - } - // This relocation does not require got entry, but it is relative to got and // needs it to be created. Here we request for that. - if (Expr == R_GOTONLY_PC || Expr == R_GOTONLY_PC_FROM_END || - Expr == R_GOTREL || Expr == R_GOTREL_FROM_END || Expr == R_PPC_TOC) + if (isRelExprOneOf<R_GOTONLY_PC, R_GOTONLY_PC_FROM_END, R_GOTREL, + R_GOTREL_FROM_END, R_PPC_TOC>(Expr)) In<ELFT>::Got->HasGotOffRel = true; - uintX_t Addend = computeAddend(*File, Buf, E, RI, Expr, Body); + // Read an addend. + int64_t Addend = computeAddend<ELFT>(Rel, Sec.Data.data()); + if (Config->EMachine == EM_MIPS) + Addend += computeMipsAddend<ELFT>(Rel, Sec, Expr, Body, End); + // Process some TLS relocations, including relaxing TLS relocations. + // Note that this function does not handle all TLS relocations. if (unsigned Processed = - handleTlsRelocation<ELFT>(Type, Body, C, Offset, Addend, Expr)) { + handleTlsRelocation<ELFT>(Type, Body, Sec, Offset, Addend, Expr)) { I += (Processed - 1); continue; } - // Ignore "hint" and TLS Descriptor call relocation because they are - // only markers for relaxation. - if (isRelExprOneOf<R_HINT, R_TLSDESC_CALL>(Expr)) - continue; + // If a relocation needs PLT, we create PLT and GOTPLT slots for the symbol. + if (needsPlt(Expr) && !Body.isInPlt()) { + if (Body.isGnuIFunc() && !Preemptible) + addPltEntry(InX::Iplt, In<ELFT>::IgotPlt, In<ELFT>::RelaIplt, + Target->IRelativeRel, Body, true); + else + addPltEntry(InX::Plt, In<ELFT>::GotPlt, In<ELFT>::RelaPlt, + Target->PltRel, Body, !Preemptible); + } - if (needsPlt(Expr) || - isRelExprOneOf<R_THUNK_ABS, R_THUNK_PC, R_THUNK_PLT_PC>(Expr) || - refersToGotEntry(Expr) || !isPreemptible(Body, Type)) { - // If the relocation points to something in the file, we can process it. - bool Constant = - isStaticLinkTimeConstant<ELFT>(Expr, Type, Body, C, RI.r_offset); - - // If the output being produced is position independent, the final value - // is still not known. In that case we still need some help from the - // dynamic linker. We can however do better than just copying the incoming - // relocation. We can process some of it and and just ask the dynamic - // linker to add the load address. - if (!Constant) - AddDyn({Target->RelativeRel, &C, Offset, true, &Body, Addend}); - - // If the produced value is a constant, we just remember to write it - // when outputting this section. We also have to do it if the format - // uses Elf_Rel, since in that case the written value is the addend. - if (Constant || !RelTy::IsRela) - C.Relocations.push_back({Expr, Type, Offset, Addend, &Body}); - } else { + // Create a GOT slot if a relocation needs GOT. + if (needsGot(Expr)) { + if (Config->EMachine == EM_MIPS) { + // MIPS ABI has special rules to process GOT entries and doesn't + // require relocation entries for them. A special case is TLS + // relocations. In that case dynamic loader applies dynamic + // relocations to initialize TLS GOT entries. + // See "Global Offset Table" in Chapter 5 in the following document + // for detailed description: + // ftp://www.linux-mips.org/pub/linux/mips/doc/ABI/mipsabi.pdf + In<ELFT>::MipsGot->addEntry(Body, Addend, Expr); + if (Body.isTls() && Body.isPreemptible()) + In<ELFT>::RelaDyn->addReloc({Target->TlsGotRel, In<ELFT>::MipsGot, + Body.getGotOffset(), false, &Body, 0}); + } else if (!Body.isInGot()) { + addGotEntry<ELFT>(Body, Preemptible); + } + } + + if (!needsPlt(Expr) && !needsGot(Expr) && isPreemptible(Body, Type)) { // We don't know anything about the finaly symbol. Just ask the dynamic // linker to handle the relocation for us. if (!Target->isPicRel(Type)) - error(C.getLocation(Offset) + ": relocation " + toString(Type) + - " cannot be used against shared object; recompile with -fPIC."); - AddDyn({Target->getDynRel(Type), &C, Offset, false, &Body, Addend}); + error("relocation " + toString(Type) + + " cannot be used against shared object; recompile with -fPIC" + + getLocation<ELFT>(Sec, Body, Offset)); + + In<ELFT>::RelaDyn->addReloc( + {Target->getDynRel(Type), &Sec, Offset, false, &Body, Addend}); // MIPS ABI turns using of GOT and dynamic relocations inside out. // While regular ABI uses dynamic relocations to fill up GOT entries @@ -732,114 +941,163 @@ static void scanRelocs(InputSectionBase<ELFT> &C, ArrayRef<RelTy> Rels) { continue; } - // At this point we are done with the relocated position. Some relocations - // also require us to create a got or plt entry. - - // If a relocation needs PLT, we create a PLT and a GOT slot for the symbol. - if (needsPlt(Expr)) { - if (Body.isInPlt()) - continue; - - if (Body.isGnuIFunc() && !Preemptible) { - In<ELFT>::Iplt->addEntry(Body); - In<ELFT>::IgotPlt->addEntry(Body); - In<ELFT>::RelaIplt->addReloc({Target->IRelativeRel, In<ELFT>::IgotPlt, - Body.getGotPltOffset<ELFT>(), - !Preemptible, &Body, 0}); - } else { - In<ELFT>::Plt->addEntry(Body); - In<ELFT>::GotPlt->addEntry(Body); - In<ELFT>::RelaPlt->addReloc({Target->PltRel, In<ELFT>::GotPlt, - Body.getGotPltOffset<ELFT>(), !Preemptible, - &Body, 0}); - } - continue; - } + // If the relocation points to something in the file, we can process it. + bool IsConstant = + isStaticLinkTimeConstant<ELFT>(Expr, Type, Body, Sec, Rel.r_offset); - if (refersToGotEntry(Expr)) { - if (Config->EMachine == EM_MIPS) { - // MIPS ABI has special rules to process GOT entries and doesn't - // require relocation entries for them. A special case is TLS - // relocations. In that case dynamic loader applies dynamic - // relocations to initialize TLS GOT entries. - // See "Global Offset Table" in Chapter 5 in the following document - // for detailed description: - // ftp://www.linux-mips.org/pub/linux/mips/doc/ABI/mipsabi.pdf - In<ELFT>::MipsGot->addEntry(Body, Addend, Expr); - if (Body.isTls() && Body.isPreemptible()) - AddDyn({Target->TlsGotRel, In<ELFT>::MipsGot, - Body.getGotOffset<ELFT>(), false, &Body, 0}); - continue; - } + // If the output being produced is position independent, the final value + // is still not known. In that case we still need some help from the + // dynamic linker. We can however do better than just copying the incoming + // relocation. We can process some of it and and just ask the dynamic + // linker to add the load address. + if (!IsConstant) + In<ELFT>::RelaDyn->addReloc( + {Target->RelativeRel, &Sec, Offset, true, &Body, Addend}); - if (Body.isInGot()) - continue; - - In<ELFT>::Got->addEntry(Body); - uintX_t Off = Body.getGotOffset<ELFT>(); - uint32_t DynType; - RelExpr GotRE = R_ABS; - if (Body.isTls()) { - DynType = Target->TlsGotRel; - GotRE = R_TLS; - } else if (!Preemptible && Config->Pic && !isAbsolute<ELFT>(Body)) - DynType = Target->RelativeRel; - else - DynType = Target->GotRel; - - // FIXME: this logic is almost duplicated above. - bool Constant = !Preemptible && !(Config->Pic && !isAbsolute<ELFT>(Body)); - if (!Constant) - AddDyn({DynType, In<ELFT>::Got, Off, !Preemptible, &Body, 0}); - if (Constant || (!RelTy::IsRela && !Preemptible)) - In<ELFT>::Got->Relocations.push_back({GotRE, DynType, Off, 0, &Body}); - continue; - } + // If the produced value is a constant, we just remember to write it + // when outputting this section. We also have to do it if the format + // uses Elf_Rel, since in that case the written value is the addend. + if (IsConstant || !RelTy::IsRela) + Sec.Relocations.push_back({Expr, Type, Offset, Addend, &Body}); } } -template <class ELFT> void scanRelocations(InputSectionBase<ELFT> &S) { +template <class ELFT> void elf::scanRelocations(InputSectionBase &S) { if (S.AreRelocsRela) - scanRelocs(S, S.relas()); + scanRelocs<ELFT>(S, S.relas<ELFT>()); else - scanRelocs(S, S.rels()); + scanRelocs<ELFT>(S, S.rels<ELFT>()); } -template <class ELFT, class RelTy> -static void createThunks(InputSectionBase<ELFT> &C, ArrayRef<RelTy> Rels) { - const elf::ObjectFile<ELFT> *File = C.getFile(); - for (const RelTy &Rel : Rels) { - SymbolBody &Body = File->getRelocTargetSym(Rel); - uint32_t Type = Rel.getType(Config->Mips64EL); - RelExpr Expr = Target->getRelExpr(Type, Body); - if (!isPreemptible(Body, Type) && needsPlt(Expr)) - Expr = fromPlt(Expr); - Expr = Target->getThunkExpr(Expr, Type, *File, Body); - // Some targets might require creation of thunks for relocations. - // Now we support only MIPS which requires LA25 thunk to call PIC - // code from non-PIC one, and ARM which requires interworking. - if (Expr == R_THUNK_ABS || Expr == R_THUNK_PC || Expr == R_THUNK_PLT_PC) { - auto *Sec = cast<InputSection<ELFT>>(&C); - addThunk<ELFT>(Type, Body, *Sec); +// Insert the Thunks for OutputSection OS into their designated place +// in the Sections vector, and recalculate the InputSection output section +// offsets. +// This may invalidate any output section offsets stored outside of InputSection +template <class ELFT> +void ThunkCreator<ELFT>::mergeThunks(OutputSection *OS, + std::vector<ThunkSection *> &Thunks) { + // Order Thunks in ascending OutSecOff + auto ThunkCmp = [](const ThunkSection *A, const ThunkSection *B) { + return A->OutSecOff < B->OutSecOff; + }; + std::stable_sort(Thunks.begin(), Thunks.end(), ThunkCmp); + + // Merge sorted vectors of Thunks and InputSections by OutSecOff + std::vector<InputSection *> Tmp; + Tmp.reserve(OS->Sections.size() + Thunks.size()); + auto MergeCmp = [](const InputSection *A, const InputSection *B) { + // std::merge requires a strict weak ordering. + if (A->OutSecOff < B->OutSecOff) + return true; + if (A->OutSecOff == B->OutSecOff) + // Check if Thunk is immediately before any specific Target InputSection + // for example Mips LA25 Thunks. + if (auto *TA = dyn_cast<ThunkSection>(A)) + if (TA && TA->getTargetInputSection() == B) + return true; + return false; + }; + std::merge(OS->Sections.begin(), OS->Sections.end(), Thunks.begin(), + Thunks.end(), std::back_inserter(Tmp), MergeCmp); + OS->Sections = std::move(Tmp); + OS->assignOffsets(); +} + +template <class ELFT> +ThunkSection *ThunkCreator<ELFT>::getOSThunkSec(ThunkSection *&TS, + OutputSection *OS) { + if (TS == nullptr) { + uint32_t Off = 0; + for (auto *IS : OS->Sections) { + Off = IS->OutSecOff + IS->getSize(); + if ((IS->Flags & SHF_EXECINSTR) == 0) + break; } + TS = make<ThunkSection>(OS, Off); + ThunkSections[OS].push_back(TS); } + return TS; } -template <class ELFT> void createThunks(InputSectionBase<ELFT> &S) { - if (S.AreRelocsRela) - createThunks(S, S.relas()); - else - createThunks(S, S.rels()); +template <class ELFT> +ThunkSection *ThunkCreator<ELFT>::getISThunkSec(InputSection *IS, + OutputSection *OS) { + ThunkSection *TS = ThunkedSections.lookup(IS); + if (TS) + return TS; + auto *TOS = cast<OutputSection>(IS->OutSec); + TS = make<ThunkSection>(TOS, IS->OutSecOff); + ThunkSections[TOS].push_back(TS); + ThunkedSections[IS] = TS; + return TS; } -template void scanRelocations<ELF32LE>(InputSectionBase<ELF32LE> &); -template void scanRelocations<ELF32BE>(InputSectionBase<ELF32BE> &); -template void scanRelocations<ELF64LE>(InputSectionBase<ELF64LE> &); -template void scanRelocations<ELF64BE>(InputSectionBase<ELF64BE> &); - -template void createThunks<ELF32LE>(InputSectionBase<ELF32LE> &); -template void createThunks<ELF32BE>(InputSectionBase<ELF32BE> &); -template void createThunks<ELF64LE>(InputSectionBase<ELF64LE> &); -template void createThunks<ELF64BE>(InputSectionBase<ELF64BE> &); +template <class ELFT> +std::pair<Thunk *, bool> ThunkCreator<ELFT>::getThunk(SymbolBody &Body, + uint32_t Type) { + auto res = ThunkedSymbols.insert({&Body, nullptr}); + if (res.second) + res.first->second = addThunk<ELFT>(Type, Body); + return std::make_pair(res.first->second, res.second); } + +// Process all relocations from the InputSections that have been assigned +// to OutputSections and redirect through Thunks if needed. +// +// createThunks must be called after scanRelocs has created the Relocations for +// each InputSection. It must be called before the static symbol table is +// finalized. If any Thunks are added to an OutputSection the output section +// offsets of the InputSections will change. +// +// FIXME: All Thunks are assumed to be in range of the relocation. Range +// extension Thunks are not yet supported. +template <class ELFT> +bool ThunkCreator<ELFT>::createThunks( + ArrayRef<OutputSection *> OutputSections) { + // Create all the Thunks and insert them into synthetic ThunkSections. The + // ThunkSections are later inserted back into the OutputSection. + + // We separate the creation of ThunkSections from the insertion of the + // ThunkSections back into the OutputSection as ThunkSections are not always + // inserted into the same OutputSection as the caller. + for (OutputSection *OS : OutputSections) { + ThunkSection *OSTS = nullptr; + for (InputSection *IS : OS->Sections) { + for (Relocation &Rel : IS->Relocations) { + SymbolBody &Body = *Rel.Sym; + if (!Target->needsThunk(Rel.Expr, Rel.Type, IS->File, Body)) + continue; + Thunk *T; + bool IsNew; + std::tie(T, IsNew) = getThunk(Body, Rel.Type); + if (IsNew) { + // Find or create a ThunkSection for the new Thunk + ThunkSection *TS; + if (auto *TIS = T->getTargetInputSection()) + TS = getISThunkSec(TIS, OS); + else + TS = getOSThunkSec(OSTS, OS); + TS->addThunk(T); + } + // Redirect relocation to Thunk, we never go via the PLT to a Thunk + Rel.Sym = T->ThunkSym; + Rel.Expr = fromPlt(Rel.Expr); + } + } + } + + // Merge all created synthetic ThunkSections back into OutputSection + for (auto &KV : ThunkSections) + mergeThunks(KV.first, KV.second); + return !ThunkSections.empty(); } + +template void elf::scanRelocations<ELF32LE>(InputSectionBase &); +template void elf::scanRelocations<ELF32BE>(InputSectionBase &); +template void elf::scanRelocations<ELF64LE>(InputSectionBase &); +template void elf::scanRelocations<ELF64BE>(InputSectionBase &); + +template class elf::ThunkCreator<ELF32LE>; +template class elf::ThunkCreator<ELF32BE>; +template class elf::ThunkCreator<ELF64LE>; +template class elf::ThunkCreator<ELF64BE>; diff --git a/ELF/Relocations.h b/ELF/Relocations.h index b5825bdd5e59..f8f0f11e14a9 100644 --- a/ELF/Relocations.h +++ b/ELF/Relocations.h @@ -11,13 +11,16 @@ #define LLD_ELF_RELOCATIONS_H #include "lld/Core/LLVM.h" +#include "llvm/ADT/DenseMap.h" +#include <map> +#include <vector> namespace lld { namespace elf { class SymbolBody; -class InputSectionData; -template <class ELFT> class InputSection; -template <class ELFT> class InputSectionBase; +class InputSection; +class InputSectionBase; +class OutputSection; // List of target-independent relocation types. Relocations read // from files are converted to these types so that the main code @@ -34,39 +37,39 @@ enum RelExpr { R_GOT_PAGE_PC, R_GOT_PC, R_HINT, + R_MIPS_GOTREL, + R_MIPS_GOT_GP, + R_MIPS_GOT_GP_PC, R_MIPS_GOT_LOCAL_PAGE, R_MIPS_GOT_OFF, R_MIPS_GOT_OFF32, - R_MIPS_GOTREL, R_MIPS_TLSGD, R_MIPS_TLSLD, R_NEG_TLS, + R_NONE, R_PAGE_PC, R_PC, R_PLT, - R_PLT_PC, R_PLT_PAGE_PC, + R_PLT_PC, R_PPC_OPD, R_PPC_PLT_OPD, R_PPC_TOC, R_RELAX_GOT_PC, R_RELAX_GOT_PC_NOPIC, R_RELAX_TLS_GD_TO_IE, - R_RELAX_TLS_GD_TO_IE_END, R_RELAX_TLS_GD_TO_IE_ABS, + R_RELAX_TLS_GD_TO_IE_END, R_RELAX_TLS_GD_TO_IE_PAGE_PC, R_RELAX_TLS_GD_TO_LE, R_RELAX_TLS_GD_TO_LE_NEG, R_RELAX_TLS_IE_TO_LE, R_RELAX_TLS_LD_TO_LE, R_SIZE, - R_THUNK_ABS, - R_THUNK_PC, - R_THUNK_PLT_PC, R_TLS, R_TLSDESC, - R_TLSDESC_PAGE, R_TLSDESC_CALL, + R_TLSDESC_PAGE, R_TLSGD, R_TLSGD_PC, R_TLSLD, @@ -107,21 +110,44 @@ struct Relocation { RelExpr Expr; uint32_t Type; uint64_t Offset; - uint64_t Addend; + int64_t Addend; SymbolBody *Sym; }; -template <class ELFT> void scanRelocations(InputSectionBase<ELFT> &); +template <class ELFT> void scanRelocations(InputSectionBase &); -template <class ELFT> void createThunks(InputSectionBase<ELFT> &); +class ThunkSection; +class Thunk; +template <class ELFT> class ThunkCreator { +public: + // Return true if Thunks have been added to OutputSections + bool createThunks(ArrayRef<OutputSection *> OutputSections); + +private: + void mergeThunks(OutputSection *OS, std::vector<ThunkSection *> &Thunks); + ThunkSection *getOSThunkSec(ThunkSection *&TS, OutputSection *OS); + ThunkSection *getISThunkSec(InputSection *IS, OutputSection *OS); + std::pair<Thunk *, bool> getThunk(SymbolBody &Body, uint32_t Type); + + // Track Symbols that already have a Thunk + llvm::DenseMap<SymbolBody *, Thunk *> ThunkedSymbols; + + // Track InputSections that have a ThunkSection placed in front + llvm::DenseMap<InputSection *, ThunkSection *> ThunkedSections; + + // Track the ThunksSections that need to be inserted into an OutputSection + std::map<OutputSection *, std::vector<ThunkSection *>> ThunkSections; +}; + +// Return a int64_t to make sure we get the sign extension out of the way as +// early as possible. template <class ELFT> -static inline typename ELFT::uint getAddend(const typename ELFT::Rel &Rel) { +static inline int64_t getAddend(const typename ELFT::Rel &Rel) { return 0; } - template <class ELFT> -static inline typename ELFT::uint getAddend(const typename ELFT::Rela &Rel) { +static inline int64_t getAddend(const typename ELFT::Rela &Rel) { return Rel.r_addend; } } diff --git a/ELF/ScriptLexer.cpp b/ELF/ScriptLexer.cpp new file mode 100644 index 000000000000..86720de3527c --- /dev/null +++ b/ELF/ScriptLexer.cpp @@ -0,0 +1,285 @@ +//===- ScriptLexer.cpp ----------------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines a lexer for the linker script. +// +// The linker script's grammar is not complex but ambiguous due to the +// lack of the formal specification of the language. What we are trying to +// do in this and other files in LLD is to make a "reasonable" linker +// script processor. +// +// Among simplicity, compatibility and efficiency, we put the most +// emphasis on simplicity when we wrote this lexer. Compatibility with the +// GNU linkers is important, but we did not try to clone every tiny corner +// case of their lexers, as even ld.bfd and ld.gold are subtly different +// in various corner cases. We do not care much about efficiency because +// the time spent in parsing linker scripts is usually negligible. +// +// Our grammar of the linker script is LL(2), meaning that it needs at +// most two-token lookahead to parse. The only place we need two-token +// lookahead is labels in version scripts, where we need to parse "local :" +// as if "local:". +// +// Overall, this lexer works fine for most linker scripts. There might +// be room for improving compatibility, but that's probably not at the +// top of our todo list. +// +//===----------------------------------------------------------------------===// + +#include "ScriptLexer.h" +#include "Error.h" +#include "llvm/ADT/Twine.h" + +using namespace llvm; +using namespace lld; +using namespace lld::elf; + +// Returns a whole line containing the current token. +StringRef ScriptLexer::getLine() { + StringRef S = getCurrentMB().getBuffer(); + StringRef Tok = Tokens[Pos - 1]; + + size_t Pos = S.rfind('\n', Tok.data() - S.data()); + if (Pos != StringRef::npos) + S = S.substr(Pos + 1); + return S.substr(0, S.find_first_of("\r\n")); +} + +// Returns 1-based line number of the current token. +size_t ScriptLexer::getLineNumber() { + StringRef S = getCurrentMB().getBuffer(); + StringRef Tok = Tokens[Pos - 1]; + return S.substr(0, Tok.data() - S.data()).count('\n') + 1; +} + +// Returns 0-based column number of the current token. +size_t ScriptLexer::getColumnNumber() { + StringRef Tok = Tokens[Pos - 1]; + return Tok.data() - getLine().data(); +} + +std::string ScriptLexer::getCurrentLocation() { + std::string Filename = getCurrentMB().getBufferIdentifier(); + if (!Pos) + return Filename; + return (Filename + ":" + Twine(getLineNumber())).str(); +} + +ScriptLexer::ScriptLexer(MemoryBufferRef MB) { tokenize(MB); } + +// We don't want to record cascading errors. Keep only the first one. +void ScriptLexer::setError(const Twine &Msg) { + if (Error) + return; + Error = true; + + if (!Pos) { + error(getCurrentLocation() + ": " + Msg); + return; + } + + std::string S = getCurrentLocation() + ": "; + error(S + Msg); + error(S + getLine()); + error(S + std::string(getColumnNumber(), ' ') + "^"); +} + +// Split S into linker script tokens. +void ScriptLexer::tokenize(MemoryBufferRef MB) { + std::vector<StringRef> Vec; + MBs.push_back(MB); + StringRef S = MB.getBuffer(); + StringRef Begin = S; + + for (;;) { + S = skipSpace(S); + if (S.empty()) + break; + + // Quoted token. Note that double-quote characters are parts of a token + // because, in a glob match context, only unquoted tokens are interpreted + // as glob patterns. Double-quoted tokens are literal patterns in that + // context. + if (S.startswith("\"")) { + size_t E = S.find("\"", 1); + if (E == StringRef::npos) { + StringRef Filename = MB.getBufferIdentifier(); + size_t Lineno = Begin.substr(0, S.data() - Begin.data()).count('\n'); + error(Filename + ":" + Twine(Lineno + 1) + ": unclosed quote"); + return; + } + + Vec.push_back(S.take_front(E + 1)); + S = S.substr(E + 1); + continue; + } + + // Unquoted token. This is more relaxed than tokens in C-like language, + // so that you can write "file-name.cpp" as one bare token, for example. + size_t Pos = S.find_first_not_of( + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" + "0123456789_.$/\\~=+[]*?-!<>^:"); + + // A character that cannot start a word (which is usually a + // punctuation) forms a single character token. + if (Pos == 0) + Pos = 1; + Vec.push_back(S.substr(0, Pos)); + S = S.substr(Pos); + } + + Tokens.insert(Tokens.begin() + Pos, Vec.begin(), Vec.end()); +} + +// Skip leading whitespace characters or comments. +StringRef ScriptLexer::skipSpace(StringRef S) { + for (;;) { + if (S.startswith("/*")) { + size_t E = S.find("*/", 2); + if (E == StringRef::npos) { + error("unclosed comment in a linker script"); + return ""; + } + S = S.substr(E + 2); + continue; + } + if (S.startswith("#")) { + size_t E = S.find('\n', 1); + if (E == StringRef::npos) + E = S.size() - 1; + S = S.substr(E + 1); + continue; + } + size_t Size = S.size(); + S = S.ltrim(); + if (S.size() == Size) + return S; + } +} + +// An erroneous token is handled as if it were the last token before EOF. +bool ScriptLexer::atEOF() { return Error || Tokens.size() == Pos; } + +// Split a given string as an expression. +// This function returns "3", "*" and "5" for "3*5" for example. +static std::vector<StringRef> tokenizeExpr(StringRef S) { + StringRef Ops = "+-*/:"; // List of operators + + // Quoted strings are literal strings, so we don't want to split it. + if (S.startswith("\"")) + return {S}; + + // Split S with +-*/ as separators. + std::vector<StringRef> Ret; + while (!S.empty()) { + size_t E = S.find_first_of(Ops); + + // No need to split if there is no operator. + if (E == StringRef::npos) { + Ret.push_back(S); + break; + } + + // Get a token before the opreator. + if (E != 0) + Ret.push_back(S.substr(0, E)); + + // Get the operator as a token. + Ret.push_back(S.substr(E, 1)); + S = S.substr(E + 1); + } + return Ret; +} + +// In contexts where expressions are expected, the lexer should apply +// different tokenization rules than the default one. By default, +// arithmetic operator characters are regular characters, but in the +// expression context, they should be independent tokens. +// +// For example, "foo*3" should be tokenized to "foo", "*" and "3" only +// in the expression context. +// +// This function may split the current token into multiple tokens. +void ScriptLexer::maybeSplitExpr() { + if (!InExpr || Error || atEOF()) + return; + + std::vector<StringRef> V = tokenizeExpr(Tokens[Pos]); + if (V.size() == 1) + return; + Tokens.erase(Tokens.begin() + Pos); + Tokens.insert(Tokens.begin() + Pos, V.begin(), V.end()); +} + +StringRef ScriptLexer::next() { + maybeSplitExpr(); + + if (Error) + return ""; + if (atEOF()) { + setError("unexpected EOF"); + return ""; + } + return Tokens[Pos++]; +} + +StringRef ScriptLexer::peek() { + StringRef Tok = next(); + if (Error) + return ""; + Pos = Pos - 1; + return Tok; +} + +bool ScriptLexer::consume(StringRef Tok) { + if (peek() == Tok) { + skip(); + return true; + } + return false; +} + +// Consumes Tok followed by ":". Space is allowed between Tok and ":". +bool ScriptLexer::consumeLabel(StringRef Tok) { + if (consume((Tok + ":").str())) + return true; + if (Tokens.size() >= Pos + 2 && Tokens[Pos] == Tok && + Tokens[Pos + 1] == ":") { + Pos += 2; + return true; + } + return false; +} + +void ScriptLexer::skip() { (void)next(); } + +void ScriptLexer::expect(StringRef Expect) { + if (Error) + return; + StringRef Tok = next(); + if (Tok != Expect) + setError(Expect + " expected, but got " + Tok); +} + +// Returns true if S encloses T. +static bool encloses(StringRef S, StringRef T) { + return S.bytes_begin() <= T.bytes_begin() && T.bytes_end() <= S.bytes_end(); +} + +MemoryBufferRef ScriptLexer::getCurrentMB() { + // Find input buffer containing the current token. + assert(!MBs.empty()); + if (!Pos) + return MBs[0]; + + for (MemoryBufferRef MB : MBs) + if (encloses(MB.getBuffer(), Tokens[Pos - 1])) + return MB; + llvm_unreachable("getCurrentMB: failed to find a token"); +} diff --git a/ELF/ScriptLexer.h b/ELF/ScriptLexer.h new file mode 100644 index 000000000000..64d6d9204864 --- /dev/null +++ b/ELF/ScriptLexer.h @@ -0,0 +1,56 @@ +//===- ScriptLexer.h --------------------------------------------*- C++ -*-===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_ELF_SCRIPT_LEXER_H +#define LLD_ELF_SCRIPT_LEXER_H + +#include "lld/Core/LLVM.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/MemoryBuffer.h" +#include <utility> +#include <vector> + +namespace lld { +namespace elf { + +class ScriptLexer { +public: + explicit ScriptLexer(MemoryBufferRef MB); + + void setError(const Twine &Msg); + void tokenize(MemoryBufferRef MB); + static StringRef skipSpace(StringRef S); + bool atEOF(); + StringRef next(); + StringRef peek(); + void skip(); + bool consume(StringRef Tok); + void expect(StringRef Expect); + bool consumeLabel(StringRef Tok); + std::string getCurrentLocation(); + + std::vector<MemoryBufferRef> MBs; + std::vector<StringRef> Tokens; + bool InExpr = false; + size_t Pos = 0; + bool Error = false; + +private: + void maybeSplitExpr(); + StringRef getLine(); + size_t getLineNumber(); + size_t getColumnNumber(); + + MemoryBufferRef getCurrentMB(); +}; + +} // namespace elf +} // namespace lld + +#endif diff --git a/ELF/ScriptParser.cpp b/ELF/ScriptParser.cpp index c740685a15a1..032ecd50f3e3 100644 --- a/ELF/ScriptParser.cpp +++ b/ELF/ScriptParser.cpp @@ -7,194 +7,1171 @@ // //===----------------------------------------------------------------------===// // -// This file contains the base parser class for linker script and dynamic -// list. +// This file contains a recursive-descendent parser for linker scripts. +// Parsed results are stored to Config and Script global objects. // //===----------------------------------------------------------------------===// #include "ScriptParser.h" -#include "Error.h" -#include "llvm/ADT/Twine.h" +#include "Config.h" +#include "Driver.h" +#include "InputSection.h" +#include "LinkerScript.h" +#include "Memory.h" +#include "OutputSections.h" +#include "ScriptLexer.h" +#include "Symbols.h" +#include "Target.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/ELF.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/Path.h" +#include <cassert> +#include <limits> +#include <vector> using namespace llvm; +using namespace llvm::ELF; +using namespace llvm::support::endian; using namespace lld; using namespace lld::elf; -// Returns a whole line containing the current token. -StringRef ScriptParserBase::getLine() { - StringRef S = getCurrentMB().getBuffer(); - StringRef Tok = Tokens[Pos - 1]; +static bool isUnderSysroot(StringRef Path); - size_t Pos = S.rfind('\n', Tok.data() - S.data()); - if (Pos != StringRef::npos) - S = S.substr(Pos + 1); - return S.substr(0, S.find_first_of("\r\n")); +namespace { +class ScriptParser final : ScriptLexer { +public: + ScriptParser(MemoryBufferRef MB) + : ScriptLexer(MB), + IsUnderSysroot(isUnderSysroot(MB.getBufferIdentifier())) {} + + void readLinkerScript(); + void readVersionScript(); + void readDynamicList(); + +private: + void addFile(StringRef Path); + + void readAsNeeded(); + void readEntry(); + void readExtern(); + void readGroup(); + void readInclude(); + void readMemory(); + void readOutput(); + void readOutputArch(); + void readOutputFormat(); + void readPhdrs(); + void readSearchDir(); + void readSections(); + void readVersion(); + void readVersionScriptCommand(); + + SymbolAssignment *readAssignment(StringRef Name); + BytesDataCommand *readBytesDataCommand(StringRef Tok); + uint32_t readFill(); + uint32_t parseFill(StringRef Tok); + OutputSectionCommand *readOutputSectionDescription(StringRef OutSec); + std::vector<StringRef> readOutputSectionPhdrs(); + InputSectionDescription *readInputSectionDescription(StringRef Tok); + StringMatcher readFilePatterns(); + std::vector<SectionPattern> readInputSectionsList(); + InputSectionDescription *readInputSectionRules(StringRef FilePattern); + unsigned readPhdrType(); + SortSectionPolicy readSortKind(); + SymbolAssignment *readProvideHidden(bool Provide, bool Hidden); + SymbolAssignment *readProvideOrAssignment(StringRef Tok); + void readSort(); + AssertCommand *readAssert(); + Expr readAssertExpr(); + + uint64_t readMemoryAssignment(StringRef, StringRef, StringRef); + std::pair<uint32_t, uint32_t> readMemoryAttributes(); + + Expr readExpr(); + Expr readExpr1(Expr Lhs, int MinPrec); + StringRef readParenLiteral(); + Expr readPrimary(); + Expr readTernary(Expr Cond); + Expr readParenExpr(); + + // For parsing version script. + std::vector<SymbolVersion> readVersionExtern(); + void readAnonymousDeclaration(); + void readVersionDeclaration(StringRef VerStr); + + std::pair<std::vector<SymbolVersion>, std::vector<SymbolVersion>> + readSymbols(); + + bool IsUnderSysroot; +}; +} // namespace + +static bool isUnderSysroot(StringRef Path) { + if (Config->Sysroot == "") + return false; + for (; !Path.empty(); Path = sys::path::parent_path(Path)) + if (sys::fs::equivalent(Config->Sysroot, Path)) + return true; + return false; } -// Returns 1-based line number of the current token. -size_t ScriptParserBase::getLineNumber() { - StringRef S = getCurrentMB().getBuffer(); - StringRef Tok = Tokens[Pos - 1]; - return S.substr(0, Tok.data() - S.data()).count('\n') + 1; +// Some operations only support one non absolute value. Move the +// absolute one to the right hand side for convenience. +static void moveAbsRight(ExprValue &A, ExprValue &B) { + if (A.isAbsolute()) + std::swap(A, B); + if (!B.isAbsolute()) + error("At least one side of the expression must be absolute"); } -// Returns 0-based column number of the current token. -size_t ScriptParserBase::getColumnNumber() { - StringRef Tok = Tokens[Pos - 1]; - return Tok.data() - getLine().data(); +static ExprValue add(ExprValue A, ExprValue B) { + moveAbsRight(A, B); + return {A.Sec, A.ForceAbsolute, A.Val + B.getValue()}; } -std::string ScriptParserBase::getCurrentLocation() { - std::string Filename = getCurrentMB().getBufferIdentifier(); - if (!Pos) - return Filename; - return (Filename + ":" + Twine(getLineNumber())).str(); +static ExprValue sub(ExprValue A, ExprValue B) { + return {A.Sec, A.Val - B.getValue()}; } -ScriptParserBase::ScriptParserBase(MemoryBufferRef MB) { tokenize(MB); } +static ExprValue mul(ExprValue A, ExprValue B) { + return A.getValue() * B.getValue(); +} -// We don't want to record cascading errors. Keep only the first one. -void ScriptParserBase::setError(const Twine &Msg) { - if (Error) +static ExprValue div(ExprValue A, ExprValue B) { + if (uint64_t BV = B.getValue()) + return A.getValue() / BV; + error("division by zero"); + return 0; +} + +static ExprValue bitAnd(ExprValue A, ExprValue B) { + moveAbsRight(A, B); + return {A.Sec, A.ForceAbsolute, + (A.getValue() & B.getValue()) - A.getSecAddr()}; +} + +static ExprValue bitOr(ExprValue A, ExprValue B) { + moveAbsRight(A, B); + return {A.Sec, A.ForceAbsolute, + (A.getValue() | B.getValue()) - A.getSecAddr()}; +} + +void ScriptParser::readDynamicList() { + expect("{"); + readAnonymousDeclaration(); + if (!atEOF()) + setError("EOF expected, but got " + next()); +} + +void ScriptParser::readVersionScript() { + readVersionScriptCommand(); + if (!atEOF()) + setError("EOF expected, but got " + next()); +} + +void ScriptParser::readVersionScriptCommand() { + if (consume("{")) { + readAnonymousDeclaration(); return; - Error = true; + } + + while (!atEOF() && !Error && peek() != "}") { + StringRef VerStr = next(); + if (VerStr == "{") { + setError("anonymous version definition is used in " + "combination with other version definitions"); + return; + } + expect("{"); + readVersionDeclaration(VerStr); + } +} + +void ScriptParser::readVersion() { + expect("{"); + readVersionScriptCommand(); + expect("}"); +} - if (!Pos) { - error(getCurrentLocation() + ": " + Msg); +void ScriptParser::readLinkerScript() { + while (!atEOF()) { + StringRef Tok = next(); + if (Tok == ";") + continue; + + if (Tok == "ASSERT") { + Script->Opt.Commands.push_back(readAssert()); + } else if (Tok == "ENTRY") { + readEntry(); + } else if (Tok == "EXTERN") { + readExtern(); + } else if (Tok == "GROUP" || Tok == "INPUT") { + readGroup(); + } else if (Tok == "INCLUDE") { + readInclude(); + } else if (Tok == "MEMORY") { + readMemory(); + } else if (Tok == "OUTPUT") { + readOutput(); + } else if (Tok == "OUTPUT_ARCH") { + readOutputArch(); + } else if (Tok == "OUTPUT_FORMAT") { + readOutputFormat(); + } else if (Tok == "PHDRS") { + readPhdrs(); + } else if (Tok == "SEARCH_DIR") { + readSearchDir(); + } else if (Tok == "SECTIONS") { + readSections(); + } else if (Tok == "VERSION") { + readVersion(); + } else if (SymbolAssignment *Cmd = readProvideOrAssignment(Tok)) { + Script->Opt.Commands.push_back(Cmd); + } else { + setError("unknown directive: " + Tok); + } + } +} + +void ScriptParser::addFile(StringRef S) { + if (IsUnderSysroot && S.startswith("/")) { + SmallString<128> PathData; + StringRef Path = (Config->Sysroot + S).toStringRef(PathData); + if (sys::fs::exists(Path)) { + Driver->addFile(Saver.save(Path), /*WithLOption=*/false); + return; + } + } + + if (sys::path::is_absolute(S)) { + Driver->addFile(S, /*WithLOption=*/false); + } else if (S.startswith("=")) { + if (Config->Sysroot.empty()) + Driver->addFile(S.substr(1), /*WithLOption=*/false); + else + Driver->addFile(Saver.save(Config->Sysroot + "/" + S.substr(1)), + /*WithLOption=*/false); + } else if (S.startswith("-l")) { + Driver->addLibrary(S.substr(2)); + } else if (sys::fs::exists(S)) { + Driver->addFile(S, /*WithLOption=*/false); + } else { + if (Optional<std::string> Path = findFromSearchPaths(S)) + Driver->addFile(Saver.save(*Path), /*WithLOption=*/true); + else + setError("unable to find " + S); + } +} + +void ScriptParser::readAsNeeded() { + expect("("); + bool Orig = Config->AsNeeded; + Config->AsNeeded = true; + while (!Error && !consume(")")) + addFile(unquote(next())); + Config->AsNeeded = Orig; +} + +void ScriptParser::readEntry() { + // -e <symbol> takes predecence over ENTRY(<symbol>). + expect("("); + StringRef Tok = next(); + if (Config->Entry.empty()) + Config->Entry = Tok; + expect(")"); +} + +void ScriptParser::readExtern() { + expect("("); + while (!Error && !consume(")")) + Config->Undefined.push_back(next()); +} + +void ScriptParser::readGroup() { + expect("("); + while (!Error && !consume(")")) { + if (consume("AS_NEEDED")) + readAsNeeded(); + else + addFile(unquote(next())); + } +} + +void ScriptParser::readInclude() { + StringRef Tok = unquote(next()); + + // https://sourceware.org/binutils/docs/ld/File-Commands.html: + // The file will be searched for in the current directory, and in any + // directory specified with the -L option. + if (sys::fs::exists(Tok)) { + if (Optional<MemoryBufferRef> MB = readFile(Tok)) + tokenize(*MB); + return; + } + if (Optional<std::string> Path = findFromSearchPaths(Tok)) { + if (Optional<MemoryBufferRef> MB = readFile(*Path)) + tokenize(*MB); return; } + setError("cannot open " + Tok); +} + +void ScriptParser::readOutput() { + // -o <file> takes predecence over OUTPUT(<file>). + expect("("); + StringRef Tok = next(); + if (Config->OutputFile.empty()) + Config->OutputFile = unquote(Tok); + expect(")"); +} - std::string S = getCurrentLocation() + ": "; - error(S + Msg); - error(S + getLine()); - error(S + std::string(getColumnNumber(), ' ') + "^"); +void ScriptParser::readOutputArch() { + // OUTPUT_ARCH is ignored for now. + expect("("); + while (!Error && !consume(")")) + skip(); } -// Split S into linker script tokens. -void ScriptParserBase::tokenize(MemoryBufferRef MB) { - std::vector<StringRef> Vec; - MBs.push_back(MB); - StringRef S = MB.getBuffer(); - StringRef Begin = S; +void ScriptParser::readOutputFormat() { + // Error checking only for now. + expect("("); + skip(); + if (consume(")")) + return; + expect(","); + skip(); + expect(","); + skip(); + expect(")"); +} - for (;;) { - S = skipSpace(S); - if (S.empty()) - break; +void ScriptParser::readPhdrs() { + expect("{"); + while (!Error && !consume("}")) { + Script->Opt.PhdrsCommands.push_back( + {next(), PT_NULL, false, false, UINT_MAX, nullptr}); - // Quoted token. Note that double-quote characters are parts of a token - // because, in a glob match context, only unquoted tokens are interpreted - // as glob patterns. Double-quoted tokens are literal patterns in that - // context. - if (S.startswith("\"")) { - size_t E = S.find("\"", 1); - if (E == StringRef::npos) { - StringRef Filename = MB.getBufferIdentifier(); - size_t Lineno = Begin.substr(0, S.data() - Begin.data()).count('\n'); - error(Filename + ":" + Twine(Lineno + 1) + ": unclosed quote"); - return; - } + PhdrsCommand &PhdrCmd = Script->Opt.PhdrsCommands.back(); + PhdrCmd.Type = readPhdrType(); - Vec.push_back(S.take_front(E + 1)); - S = S.substr(E + 1); - continue; + while (!Error && !consume(";")) { + if (consume("FILEHDR")) + PhdrCmd.HasFilehdr = true; + else if (consume("PHDRS")) + PhdrCmd.HasPhdrs = true; + else if (consume("AT")) + PhdrCmd.LMAExpr = readParenExpr(); + else if (consume("FLAGS")) + PhdrCmd.Flags = readParenExpr()().getValue(); + else + setError("unexpected header attribute: " + next()); } + } +} - // Unquoted token. This is more relaxed than tokens in C-like language, - // so that you can write "file-name.cpp" as one bare token, for example. - size_t Pos = S.find_first_not_of( - "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" - "0123456789_.$/\\~=+[]*?-:!<>^"); +void ScriptParser::readSearchDir() { + expect("("); + StringRef Tok = next(); + if (!Config->Nostdlib) + Config->SearchPaths.push_back(unquote(Tok)); + expect(")"); +} + +void ScriptParser::readSections() { + Script->Opt.HasSections = true; + + // -no-rosegment is used to avoid placing read only non-executable sections in + // their own segment. We do the same if SECTIONS command is present in linker + // script. See comment for computeFlags(). + Config->SingleRoRx = true; - // A character that cannot start a word (which is usually a - // punctuation) forms a single character token. - if (Pos == 0) - Pos = 1; - Vec.push_back(S.substr(0, Pos)); - S = S.substr(Pos); + expect("{"); + while (!Error && !consume("}")) { + StringRef Tok = next(); + BaseCommand *Cmd = readProvideOrAssignment(Tok); + if (!Cmd) { + if (Tok == "ASSERT") + Cmd = readAssert(); + else + Cmd = readOutputSectionDescription(Tok); + } + Script->Opt.Commands.push_back(Cmd); } +} - Tokens.insert(Tokens.begin() + Pos, Vec.begin(), Vec.end()); +static int precedence(StringRef Op) { + return StringSwitch<int>(Op) + .Cases("*", "/", 5) + .Cases("+", "-", 4) + .Cases("<<", ">>", 3) + .Cases("<", "<=", ">", ">=", "==", "!=", 2) + .Cases("&", "|", 1) + .Default(-1); } -// Skip leading whitespace characters or comments. -StringRef ScriptParserBase::skipSpace(StringRef S) { - for (;;) { - if (S.startswith("/*")) { - size_t E = S.find("*/", 2); - if (E == StringRef::npos) { - error("unclosed comment in a linker script"); - return ""; +StringMatcher ScriptParser::readFilePatterns() { + std::vector<StringRef> V; + while (!Error && !consume(")")) + V.push_back(next()); + return StringMatcher(V); +} + +SortSectionPolicy ScriptParser::readSortKind() { + if (consume("SORT") || consume("SORT_BY_NAME")) + return SortSectionPolicy::Name; + if (consume("SORT_BY_ALIGNMENT")) + return SortSectionPolicy::Alignment; + if (consume("SORT_BY_INIT_PRIORITY")) + return SortSectionPolicy::Priority; + if (consume("SORT_NONE")) + return SortSectionPolicy::None; + return SortSectionPolicy::Default; +} + +// Reads SECTIONS command contents in the following form: +// +// <contents> ::= <elem>* +// <elem> ::= <exclude>? <glob-pattern> +// <exclude> ::= "EXCLUDE_FILE" "(" <glob-pattern>+ ")" +// +// For example, +// +// *(.foo EXCLUDE_FILE (a.o) .bar EXCLUDE_FILE (b.o) .baz) +// +// is parsed as ".foo", ".bar" with "a.o", and ".baz" with "b.o". +// The semantics of that is section .foo in any file, section .bar in +// any file but a.o, and section .baz in any file but b.o. +std::vector<SectionPattern> ScriptParser::readInputSectionsList() { + std::vector<SectionPattern> Ret; + while (!Error && peek() != ")") { + StringMatcher ExcludeFilePat; + if (consume("EXCLUDE_FILE")) { + expect("("); + ExcludeFilePat = readFilePatterns(); + } + + std::vector<StringRef> V; + while (!Error && peek() != ")" && peek() != "EXCLUDE_FILE") + V.push_back(next()); + + if (!V.empty()) + Ret.push_back({std::move(ExcludeFilePat), StringMatcher(V)}); + else + setError("section pattern is expected"); + } + return Ret; +} + +// Reads contents of "SECTIONS" directive. That directive contains a +// list of glob patterns for input sections. The grammar is as follows. +// +// <patterns> ::= <section-list> +// | <sort> "(" <section-list> ")" +// | <sort> "(" <sort> "(" <section-list> ")" ")" +// +// <sort> ::= "SORT" | "SORT_BY_NAME" | "SORT_BY_ALIGNMENT" +// | "SORT_BY_INIT_PRIORITY" | "SORT_NONE" +// +// <section-list> is parsed by readInputSectionsList(). +InputSectionDescription * +ScriptParser::readInputSectionRules(StringRef FilePattern) { + auto *Cmd = make<InputSectionDescription>(FilePattern); + expect("("); + + while (!Error && !consume(")")) { + SortSectionPolicy Outer = readSortKind(); + SortSectionPolicy Inner = SortSectionPolicy::Default; + std::vector<SectionPattern> V; + if (Outer != SortSectionPolicy::Default) { + expect("("); + Inner = readSortKind(); + if (Inner != SortSectionPolicy::Default) { + expect("("); + V = readInputSectionsList(); + expect(")"); + } else { + V = readInputSectionsList(); } - S = S.substr(E + 2); - continue; + expect(")"); + } else { + V = readInputSectionsList(); } - if (S.startswith("#")) { - size_t E = S.find('\n', 1); - if (E == StringRef::npos) - E = S.size() - 1; - S = S.substr(E + 1); - continue; + + for (SectionPattern &Pat : V) { + Pat.SortInner = Inner; + Pat.SortOuter = Outer; } - size_t Size = S.size(); - S = S.ltrim(); - if (S.size() == Size) - return S; + + std::move(V.begin(), V.end(), std::back_inserter(Cmd->SectionPatterns)); } + return Cmd; } -// An erroneous token is handled as if it were the last token before EOF. -bool ScriptParserBase::atEOF() { return Error || Tokens.size() == Pos; } +InputSectionDescription * +ScriptParser::readInputSectionDescription(StringRef Tok) { + // Input section wildcard can be surrounded by KEEP. + // https://sourceware.org/binutils/docs/ld/Input-Section-Keep.html#Input-Section-Keep + if (Tok == "KEEP") { + expect("("); + StringRef FilePattern = next(); + InputSectionDescription *Cmd = readInputSectionRules(FilePattern); + expect(")"); + Script->Opt.KeptSections.push_back(Cmd); + return Cmd; + } + return readInputSectionRules(Tok); +} + +void ScriptParser::readSort() { + expect("("); + expect("CONSTRUCTORS"); + expect(")"); +} -StringRef ScriptParserBase::next() { - if (Error) - return ""; - if (atEOF()) { - setError("unexpected EOF"); - return ""; +AssertCommand *ScriptParser::readAssert() { + return make<AssertCommand>(readAssertExpr()); +} + +Expr ScriptParser::readAssertExpr() { + expect("("); + Expr E = readExpr(); + expect(","); + StringRef Msg = unquote(next()); + expect(")"); + + return [=] { + if (!E().getValue()) + error(Msg); + return Script->getDot(); + }; +} + +// Reads a FILL(expr) command. We handle the FILL command as an +// alias for =fillexp section attribute, which is different from +// what GNU linkers do. +// https://sourceware.org/binutils/docs/ld/Output-Section-Data.html +uint32_t ScriptParser::readFill() { + expect("("); + uint32_t V = parseFill(next()); + expect(")"); + return V; +} + +OutputSectionCommand * +ScriptParser::readOutputSectionDescription(StringRef OutSec) { + OutputSectionCommand *Cmd = make<OutputSectionCommand>(OutSec); + Cmd->Location = getCurrentLocation(); + + // Read an address expression. + // https://sourceware.org/binutils/docs/ld/Output-Section-Address.html + if (peek() != ":") + Cmd->AddrExpr = readExpr(); + + expect(":"); + + if (consume("AT")) + Cmd->LMAExpr = readParenExpr(); + if (consume("ALIGN")) + Cmd->AlignExpr = readParenExpr(); + if (consume("SUBALIGN")) + Cmd->SubalignExpr = readParenExpr(); + + // Parse constraints. + if (consume("ONLY_IF_RO")) + Cmd->Constraint = ConstraintKind::ReadOnly; + if (consume("ONLY_IF_RW")) + Cmd->Constraint = ConstraintKind::ReadWrite; + expect("{"); + + while (!Error && !consume("}")) { + StringRef Tok = next(); + if (Tok == ";") { + // Empty commands are allowed. Do nothing here. + } else if (SymbolAssignment *Assign = readProvideOrAssignment(Tok)) { + Cmd->Commands.push_back(Assign); + } else if (BytesDataCommand *Data = readBytesDataCommand(Tok)) { + Cmd->Commands.push_back(Data); + } else if (Tok == "ASSERT") { + Cmd->Commands.push_back(readAssert()); + expect(";"); + } else if (Tok == "CONSTRUCTORS") { + // CONSTRUCTORS is a keyword to make the linker recognize C++ ctors/dtors + // by name. This is for very old file formats such as ECOFF/XCOFF. + // For ELF, we should ignore. + } else if (Tok == "FILL") { + Cmd->Filler = readFill(); + } else if (Tok == "SORT") { + readSort(); + } else if (peek() == "(") { + Cmd->Commands.push_back(readInputSectionDescription(Tok)); + } else { + setError("unknown command " + Tok); + } } - return Tokens[Pos++]; + + if (consume(">")) + Cmd->MemoryRegionName = next(); + + Cmd->Phdrs = readOutputSectionPhdrs(); + + if (consume("=")) + Cmd->Filler = parseFill(next()); + else if (peek().startswith("=")) + Cmd->Filler = parseFill(next().drop_front()); + + // Consume optional comma following output section command. + consume(","); + + return Cmd; } -StringRef ScriptParserBase::peek() { +// Parses a given string as a octal/decimal/hexadecimal number and +// returns it as a big-endian number. Used for `=<fillexp>`. +// https://sourceware.org/binutils/docs/ld/Output-Section-Fill.html +// +// When reading a hexstring, ld.bfd handles it as a blob of arbitrary +// size, while ld.gold always handles it as a 32-bit big-endian number. +// We are compatible with ld.gold because it's easier to implement. +uint32_t ScriptParser::parseFill(StringRef Tok) { + uint32_t V = 0; + if (Tok.getAsInteger(0, V)) + setError("invalid filler expression: " + Tok); + + uint32_t Buf; + write32be(&Buf, V); + return Buf; +} + +SymbolAssignment *ScriptParser::readProvideHidden(bool Provide, bool Hidden) { + expect("("); + SymbolAssignment *Cmd = readAssignment(next()); + Cmd->Provide = Provide; + Cmd->Hidden = Hidden; + expect(")"); + expect(";"); + return Cmd; +} + +SymbolAssignment *ScriptParser::readProvideOrAssignment(StringRef Tok) { + SymbolAssignment *Cmd = nullptr; + if (peek() == "=" || peek() == "+=") { + Cmd = readAssignment(Tok); + expect(";"); + } else if (Tok == "PROVIDE") { + Cmd = readProvideHidden(true, false); + } else if (Tok == "HIDDEN") { + Cmd = readProvideHidden(false, true); + } else if (Tok == "PROVIDE_HIDDEN") { + Cmd = readProvideHidden(true, true); + } + return Cmd; +} + +SymbolAssignment *ScriptParser::readAssignment(StringRef Name) { + StringRef Op = next(); + assert(Op == "=" || Op == "+="); + Expr E = readExpr(); + if (Op == "+=") { + std::string Loc = getCurrentLocation(); + E = [=] { return add(Script->getSymbolValue(Loc, Name), E()); }; + } + return make<SymbolAssignment>(Name, E, getCurrentLocation()); +} + +// This is an operator-precedence parser to parse a linker +// script expression. +Expr ScriptParser::readExpr() { + // Our lexer is context-aware. Set the in-expression bit so that + // they apply different tokenization rules. + bool Orig = InExpr; + InExpr = true; + Expr E = readExpr1(readPrimary(), 0); + InExpr = Orig; + return E; +} + +static Expr combine(StringRef Op, Expr L, Expr R) { + if (Op == "+") + return [=] { return add(L(), R()); }; + if (Op == "-") + return [=] { return sub(L(), R()); }; + if (Op == "*") + return [=] { return mul(L(), R()); }; + if (Op == "/") + return [=] { return div(L(), R()); }; + if (Op == "<<") + return [=] { return L().getValue() << R().getValue(); }; + if (Op == ">>") + return [=] { return L().getValue() >> R().getValue(); }; + if (Op == "<") + return [=] { return L().getValue() < R().getValue(); }; + if (Op == ">") + return [=] { return L().getValue() > R().getValue(); }; + if (Op == ">=") + return [=] { return L().getValue() >= R().getValue(); }; + if (Op == "<=") + return [=] { return L().getValue() <= R().getValue(); }; + if (Op == "==") + return [=] { return L().getValue() == R().getValue(); }; + if (Op == "!=") + return [=] { return L().getValue() != R().getValue(); }; + if (Op == "&") + return [=] { return bitAnd(L(), R()); }; + if (Op == "|") + return [=] { return bitOr(L(), R()); }; + llvm_unreachable("invalid operator"); +} + +// This is a part of the operator-precedence parser. This function +// assumes that the remaining token stream starts with an operator. +Expr ScriptParser::readExpr1(Expr Lhs, int MinPrec) { + while (!atEOF() && !Error) { + // Read an operator and an expression. + if (consume("?")) + return readTernary(Lhs); + StringRef Op1 = peek(); + if (precedence(Op1) < MinPrec) + break; + skip(); + Expr Rhs = readPrimary(); + + // Evaluate the remaining part of the expression first if the + // next operator has greater precedence than the previous one. + // For example, if we have read "+" and "3", and if the next + // operator is "*", then we'll evaluate 3 * ... part first. + while (!atEOF()) { + StringRef Op2 = peek(); + if (precedence(Op2) <= precedence(Op1)) + break; + Rhs = readExpr1(Rhs, precedence(Op2)); + } + + Lhs = combine(Op1, Lhs, Rhs); + } + return Lhs; +} + +uint64_t static getConstant(StringRef S) { + if (S == "COMMONPAGESIZE") + return Target->PageSize; + if (S == "MAXPAGESIZE") + return Config->MaxPageSize; + error("unknown constant: " + S); + return 0; +} + +// Parses Tok as an integer. It recognizes hexadecimal (prefixed with +// "0x" or suffixed with "H") and decimal numbers. Decimal numbers may +// have "K" (Ki) or "M" (Mi) suffixes. +static Optional<uint64_t> parseInt(StringRef Tok) { + // Negative number + if (Tok.startswith("-")) { + if (Optional<uint64_t> Val = parseInt(Tok.substr(1))) + return -*Val; + return None; + } + + // Hexadecimal + uint64_t Val; + if (Tok.startswith_lower("0x") && !Tok.substr(2).getAsInteger(16, Val)) + return Val; + if (Tok.endswith_lower("H") && !Tok.drop_back().getAsInteger(16, Val)) + return Val; + + // Decimal + if (Tok.endswith_lower("K")) { + if (Tok.drop_back().getAsInteger(10, Val)) + return None; + return Val * 1024; + } + if (Tok.endswith_lower("M")) { + if (Tok.drop_back().getAsInteger(10, Val)) + return None; + return Val * 1024 * 1024; + } + if (Tok.getAsInteger(10, Val)) + return None; + return Val; +} + +BytesDataCommand *ScriptParser::readBytesDataCommand(StringRef Tok) { + int Size = StringSwitch<int>(Tok) + .Case("BYTE", 1) + .Case("SHORT", 2) + .Case("LONG", 4) + .Case("QUAD", 8) + .Default(-1); + if (Size == -1) + return nullptr; + + return make<BytesDataCommand>(readParenExpr(), Size); +} + +StringRef ScriptParser::readParenLiteral() { + expect("("); StringRef Tok = next(); - if (Error) - return ""; - --Pos; + expect(")"); return Tok; } -bool ScriptParserBase::consume(StringRef Tok) { - if (peek() == Tok) { +Expr ScriptParser::readPrimary() { + if (peek() == "(") + return readParenExpr(); + + if (consume("~")) { + Expr E = readPrimary(); + return [=] { return ~E().getValue(); }; + } + if (consume("-")) { + Expr E = readPrimary(); + return [=] { return -E().getValue(); }; + } + + StringRef Tok = next(); + std::string Location = getCurrentLocation(); + + // Built-in functions are parsed here. + // https://sourceware.org/binutils/docs/ld/Builtin-Functions.html. + if (Tok == "ABSOLUTE") { + Expr Inner = readParenExpr(); + return [=] { + ExprValue I = Inner(); + I.ForceAbsolute = true; + return I; + }; + } + if (Tok == "ADDR") { + StringRef Name = readParenLiteral(); + return [=]() -> ExprValue { + return {Script->getOutputSection(Location, Name), 0}; + }; + } + if (Tok == "ALIGN") { + expect("("); + Expr E = readExpr(); + if (consume(")")) + return [=] { return alignTo(Script->getDot(), E().getValue()); }; + expect(","); + Expr E2 = readExpr(); + expect(")"); + return [=] { return alignTo(E().getValue(), E2().getValue()); }; + } + if (Tok == "ALIGNOF") { + StringRef Name = readParenLiteral(); + return [=] { return Script->getOutputSection(Location, Name)->Alignment; }; + } + if (Tok == "ASSERT") + return readAssertExpr(); + if (Tok == "CONSTANT") { + StringRef Name = readParenLiteral(); + return [=] { return getConstant(Name); }; + } + if (Tok == "DATA_SEGMENT_ALIGN") { + expect("("); + Expr E = readExpr(); + expect(","); + readExpr(); + expect(")"); + return [=] { return alignTo(Script->getDot(), E().getValue()); }; + } + if (Tok == "DATA_SEGMENT_END") { + expect("("); + expect("."); + expect(")"); + return [] { return Script->getDot(); }; + } + if (Tok == "DATA_SEGMENT_RELRO_END") { + // GNU linkers implements more complicated logic to handle + // DATA_SEGMENT_RELRO_END. We instead ignore the arguments and + // just align to the next page boundary for simplicity. + expect("("); + readExpr(); + expect(","); + readExpr(); + expect(")"); + return [] { return alignTo(Script->getDot(), Target->PageSize); }; + } + if (Tok == "DEFINED") { + StringRef Name = readParenLiteral(); + return [=] { return Script->isDefined(Name) ? 1 : 0; }; + } + if (Tok == "LOADADDR") { + StringRef Name = readParenLiteral(); + return [=] { return Script->getOutputSection(Location, Name)->getLMA(); }; + } + if (Tok == "SEGMENT_START") { + expect("("); skip(); - return true; + expect(","); + Expr E = readExpr(); + expect(")"); + return [=] { return E(); }; } - return false; + if (Tok == "SIZEOF") { + StringRef Name = readParenLiteral(); + return [=] { return Script->getOutputSectionSize(Name); }; + } + if (Tok == "SIZEOF_HEADERS") + return [=] { return elf::getHeaderSize(); }; + + // Tok is the dot. + if (Tok == ".") + return [=] { return Script->getSymbolValue(Location, Tok); }; + + // Tok is a literal number. + if (Optional<uint64_t> Val = parseInt(Tok)) + return [=] { return *Val; }; + + // Tok is a symbol name. + if (!isValidCIdentifier(Tok)) + setError("malformed number: " + Tok); + Script->Opt.ReferencedSymbols.push_back(Tok); + return [=] { return Script->getSymbolValue(Location, Tok); }; } -void ScriptParserBase::skip() { (void)next(); } +Expr ScriptParser::readTernary(Expr Cond) { + Expr L = readExpr(); + expect(":"); + Expr R = readExpr(); + return [=] { return Cond().getValue() ? L() : R(); }; +} -void ScriptParserBase::expect(StringRef Expect) { - if (Error) - return; +Expr ScriptParser::readParenExpr() { + expect("("); + Expr E = readExpr(); + expect(")"); + return E; +} + +std::vector<StringRef> ScriptParser::readOutputSectionPhdrs() { + std::vector<StringRef> Phdrs; + while (!Error && peek().startswith(":")) { + StringRef Tok = next(); + Phdrs.push_back((Tok.size() == 1) ? next() : Tok.substr(1)); + } + return Phdrs; +} + +// Read a program header type name. The next token must be a +// name of a program header type or a constant (e.g. "0x3"). +unsigned ScriptParser::readPhdrType() { StringRef Tok = next(); - if (Tok != Expect) - setError(Expect + " expected, but got " + Tok); + if (Optional<uint64_t> Val = parseInt(Tok)) + return *Val; + + unsigned Ret = StringSwitch<unsigned>(Tok) + .Case("PT_NULL", PT_NULL) + .Case("PT_LOAD", PT_LOAD) + .Case("PT_DYNAMIC", PT_DYNAMIC) + .Case("PT_INTERP", PT_INTERP) + .Case("PT_NOTE", PT_NOTE) + .Case("PT_SHLIB", PT_SHLIB) + .Case("PT_PHDR", PT_PHDR) + .Case("PT_TLS", PT_TLS) + .Case("PT_GNU_EH_FRAME", PT_GNU_EH_FRAME) + .Case("PT_GNU_STACK", PT_GNU_STACK) + .Case("PT_GNU_RELRO", PT_GNU_RELRO) + .Case("PT_OPENBSD_RANDOMIZE", PT_OPENBSD_RANDOMIZE) + .Case("PT_OPENBSD_WXNEEDED", PT_OPENBSD_WXNEEDED) + .Case("PT_OPENBSD_BOOTDATA", PT_OPENBSD_BOOTDATA) + .Default(-1); + + if (Ret == (unsigned)-1) { + setError("invalid program header type: " + Tok); + return PT_NULL; + } + return Ret; } -// Returns true if S encloses T. -static bool encloses(StringRef S, StringRef T) { - return S.bytes_begin() <= T.bytes_begin() && T.bytes_end() <= S.bytes_end(); +// Reads an anonymous version declaration. +void ScriptParser::readAnonymousDeclaration() { + std::vector<SymbolVersion> Locals; + std::vector<SymbolVersion> Globals; + std::tie(Locals, Globals) = readSymbols(); + + for (SymbolVersion V : Locals) { + if (V.Name == "*") + Config->DefaultSymbolVersion = VER_NDX_LOCAL; + else + Config->VersionScriptLocals.push_back(V); + } + + for (SymbolVersion V : Globals) + Config->VersionScriptGlobals.push_back(V); + + expect(";"); } -MemoryBufferRef ScriptParserBase::getCurrentMB() { - // Find input buffer containing the current token. - assert(!MBs.empty()); - if (!Pos) - return MBs[0]; +// Reads a non-anonymous version definition, +// e.g. "VerStr { global: foo; bar; local: *; };". +void ScriptParser::readVersionDeclaration(StringRef VerStr) { + // Read a symbol list. + std::vector<SymbolVersion> Locals; + std::vector<SymbolVersion> Globals; + std::tie(Locals, Globals) = readSymbols(); + + for (SymbolVersion V : Locals) { + if (V.Name == "*") + Config->DefaultSymbolVersion = VER_NDX_LOCAL; + else + Config->VersionScriptLocals.push_back(V); + } + + // Create a new version definition and add that to the global symbols. + VersionDefinition Ver; + Ver.Name = VerStr; + Ver.Globals = Globals; + + // User-defined version number starts from 2 because 0 and 1 are + // reserved for VER_NDX_LOCAL and VER_NDX_GLOBAL, respectively. + Ver.Id = Config->VersionDefinitions.size() + 2; + Config->VersionDefinitions.push_back(Ver); + + // Each version may have a parent version. For example, "Ver2" + // defined as "Ver2 { global: foo; local: *; } Ver1;" has "Ver1" + // as a parent. This version hierarchy is, probably against your + // instinct, purely for hint; the runtime doesn't care about it + // at all. In LLD, we simply ignore it. + if (peek() != ";") + skip(); + expect(";"); +} + +// Reads a list of symbols, e.g. "{ global: foo; bar; local: *; };". +std::pair<std::vector<SymbolVersion>, std::vector<SymbolVersion>> +ScriptParser::readSymbols() { + std::vector<SymbolVersion> Locals; + std::vector<SymbolVersion> Globals; + std::vector<SymbolVersion> *V = &Globals; + + while (!Error) { + if (consume("}")) + break; + if (consumeLabel("local")) { + V = &Locals; + continue; + } + if (consumeLabel("global")) { + V = &Globals; + continue; + } + + if (consume("extern")) { + std::vector<SymbolVersion> Ext = readVersionExtern(); + V->insert(V->end(), Ext.begin(), Ext.end()); + } else { + StringRef Tok = next(); + V->push_back({unquote(Tok), false, hasWildcard(Tok)}); + } + expect(";"); + } + return {Locals, Globals}; +} + +// Reads an "extern C++" directive, e.g., +// "extern "C++" { ns::*; "f(int, double)"; };" +std::vector<SymbolVersion> ScriptParser::readVersionExtern() { + StringRef Tok = next(); + bool IsCXX = Tok == "\"C++\""; + if (!IsCXX && Tok != "\"C\"") + setError("Unknown language"); + expect("{"); + + std::vector<SymbolVersion> Ret; + while (!Error && peek() != "}") { + StringRef Tok = next(); + bool HasWildcard = !Tok.startswith("\"") && hasWildcard(Tok); + Ret.push_back({unquote(Tok), IsCXX, HasWildcard}); + expect(";"); + } + + expect("}"); + return Ret; +} + +uint64_t ScriptParser::readMemoryAssignment(StringRef S1, StringRef S2, + StringRef S3) { + if (!consume(S1) && !consume(S2) && !consume(S3)) { + setError("expected one of: " + S1 + ", " + S2 + ", or " + S3); + return 0; + } + expect("="); + return readExpr()().getValue(); +} + +// Parse the MEMORY command as specified in: +// https://sourceware.org/binutils/docs/ld/MEMORY.html +// +// MEMORY { name [(attr)] : ORIGIN = origin, LENGTH = len ... } +void ScriptParser::readMemory() { + expect("{"); + while (!Error && !consume("}")) { + StringRef Name = next(); + + uint32_t Flags = 0; + uint32_t NegFlags = 0; + if (consume("(")) { + std::tie(Flags, NegFlags) = readMemoryAttributes(); + expect(")"); + } + expect(":"); + + uint64_t Origin = readMemoryAssignment("ORIGIN", "org", "o"); + expect(","); + uint64_t Length = readMemoryAssignment("LENGTH", "len", "l"); + + // Add the memory region to the region map (if it doesn't already exist). + auto It = Script->Opt.MemoryRegions.find(Name); + if (It != Script->Opt.MemoryRegions.end()) + setError("region '" + Name + "' already defined"); + else + Script->Opt.MemoryRegions[Name] = {Name, Origin, Length, + Origin, Flags, NegFlags}; + } +} + +// This function parses the attributes used to match against section +// flags when placing output sections in a memory region. These flags +// are only used when an explicit memory region name is not used. +std::pair<uint32_t, uint32_t> ScriptParser::readMemoryAttributes() { + uint32_t Flags = 0; + uint32_t NegFlags = 0; + bool Invert = false; + + for (char C : next().lower()) { + uint32_t Flag = 0; + if (C == '!') + Invert = !Invert; + else if (C == 'w') + Flag = SHF_WRITE; + else if (C == 'x') + Flag = SHF_EXECINSTR; + else if (C == 'a') + Flag = SHF_ALLOC; + else if (C != 'r') + setError("invalid memory region attribute"); + + if (Invert) + NegFlags |= Flag; + else + Flags |= Flag; + } + return {Flags, NegFlags}; +} + +void elf::readLinkerScript(MemoryBufferRef MB) { + ScriptParser(MB).readLinkerScript(); +} + +void elf::readVersionScript(MemoryBufferRef MB) { + ScriptParser(MB).readVersionScript(); +} - for (MemoryBufferRef MB : MBs) - if (encloses(MB.getBuffer(), Tokens[Pos - 1])) - return MB; - llvm_unreachable("getCurrentMB: failed to find a token"); +void elf::readDynamicList(MemoryBufferRef MB) { + ScriptParser(MB).readDynamicList(); } diff --git a/ELF/ScriptParser.h b/ELF/ScriptParser.h index 264c49792337..02f3a2bd9d2c 100644 --- a/ELF/ScriptParser.h +++ b/ELF/ScriptParser.h @@ -11,41 +11,19 @@ #define LLD_ELF_SCRIPT_PARSER_H #include "lld/Core/LLVM.h" -#include "llvm/ADT/StringRef.h" #include "llvm/Support/MemoryBuffer.h" -#include <utility> -#include <vector> namespace lld { namespace elf { -class ScriptParserBase { -public: - explicit ScriptParserBase(MemoryBufferRef MB); - - void setError(const Twine &Msg); - void tokenize(MemoryBufferRef MB); - static StringRef skipSpace(StringRef S); - bool atEOF(); - StringRef next(); - StringRef peek(); - void skip(); - bool consume(StringRef Tok); - void expect(StringRef Expect); - std::string getCurrentLocation(); - - std::vector<MemoryBufferRef> MBs; - std::vector<StringRef> Tokens; - size_t Pos = 0; - bool Error = false; - -private: - StringRef getLine(); - size_t getLineNumber(); - size_t getColumnNumber(); - - MemoryBufferRef getCurrentMB(); -}; +// Parses a linker script. Calling this function updates +// Config and ScriptConfig. +void readLinkerScript(MemoryBufferRef MB); + +// Parses a version script. +void readVersionScript(MemoryBufferRef MB); + +void readDynamicList(MemoryBufferRef MB); } // namespace elf } // namespace lld diff --git a/ELF/Strings.cpp b/ELF/Strings.cpp index ec3d1f1b2b51..29760b492ba9 100644 --- a/ELF/Strings.cpp +++ b/ELF/Strings.cpp @@ -91,9 +91,9 @@ bool elf::isValidCIdentifier(StringRef S) { // Returns the demangled C++ symbol name for Name. Optional<std::string> elf::demangle(StringRef Name) { - // __cxa_demangle can be used to demangle strings other than symbol + // itaniumDemangle can be used to demangle strings other than symbol // names which do not necessarily start with "_Z". Name can be - // either a C or C++ symbol. Don't call __cxa_demangle if the name + // either a C or C++ symbol. Don't call itaniumDemangle if the name // does not look like a C++ symbol name to avoid getting unexpected // result for a C symbol that happens to match a mangled type name. if (!Name.startswith("_Z")) diff --git a/ELF/SymbolTable.cpp b/ELF/SymbolTable.cpp index ce257933c267..42b4fdc26faf 100644 --- a/ELF/SymbolTable.cpp +++ b/ELF/SymbolTable.cpp @@ -75,7 +75,7 @@ template <class ELFT> void SymbolTable<ELFT>::addFile(InputFile *File) { } if (Config->Trace) - outs() << toString(File) << "\n"; + message(toString(File)); // .so file if (auto *F = dyn_cast<SharedFile<ELFT>>(File)) { @@ -115,7 +115,7 @@ template <class ELFT> void SymbolTable<ELFT>::addCombinedLTOObject() { // Compile bitcode files and replace bitcode symbols. LTO.reset(new BitcodeCompiler); for (BitcodeFile *F : BitcodeFiles) - LTO->add<ELFT>(*F); + LTO->add(*F); for (InputFile *File : LTO->compile()) { ObjectFile<ELFT> *Obj = cast<ObjectFile<ELFT>>(File); @@ -126,19 +126,19 @@ template <class ELFT> void SymbolTable<ELFT>::addCombinedLTOObject() { } template <class ELFT> -DefinedRegular<ELFT> *SymbolTable<ELFT>::addAbsolute(StringRef Name, - uint8_t Visibility, - uint8_t Binding) { +DefinedRegular *SymbolTable<ELFT>::addAbsolute(StringRef Name, + uint8_t Visibility, + uint8_t Binding) { Symbol *Sym = addRegular(Name, Visibility, STT_NOTYPE, 0, 0, Binding, nullptr, nullptr); - return cast<DefinedRegular<ELFT>>(Sym->body()); + return cast<DefinedRegular>(Sym->body()); } // Add Name as an "ignored" symbol. An ignored symbol is a regular // linker-synthesized defined symbol, but is only defined if needed. template <class ELFT> -DefinedRegular<ELFT> *SymbolTable<ELFT>::addIgnored(StringRef Name, - uint8_t Visibility) { +DefinedRegular *SymbolTable<ELFT>::addIgnored(StringRef Name, + uint8_t Visibility) { SymbolBody *S = find(Name); if (!S || S->isInCurrentDSO()) return nullptr; @@ -191,7 +191,7 @@ std::pair<Symbol *, bool> SymbolTable<ELFT>::insert(StringRef Name) { Symbol *Sym; if (IsNew) { - Sym = new (BAlloc) Symbol; + Sym = make<Symbol>(); Sym->InVersionScript = false; Sym->Binding = STB_WEAK; Sym->Visibility = STV_DEFAULT; @@ -206,13 +206,6 @@ std::pair<Symbol *, bool> SymbolTable<ELFT>::insert(StringRef Name) { return {Sym, IsNew}; } -// Construct a string in the form of "Sym in File1 and File2". -// Used to construct an error message. -static std::string conflictMsg(SymbolBody *Existing, InputFile *NewFile) { - return "'" + toString(*Existing) + "' in " + toString(Existing->File) + - " and " + toString(NewFile); -} - // Find an existing symbol or create and insert a new one, then apply the given // attributes. template <class ELFT> @@ -226,13 +219,19 @@ SymbolTable<ELFT>::insert(StringRef Name, uint8_t Type, uint8_t Visibility, // Merge in the new symbol's visibility. S->Visibility = getMinVisibility(S->Visibility, Visibility); + if (!CanOmitFromDynSym && (Config->Shared || Config->ExportDynamic)) S->ExportDynamic = true; + if (IsUsedInRegularObj) S->IsUsedInRegularObj = true; + if (!WasInserted && S->body()->Type != SymbolBody::UnknownType && - ((Type == STT_TLS) != S->body()->isTls())) - error("TLS attribute mismatch for symbol " + conflictMsg(S->body(), File)); + ((Type == STT_TLS) != S->body()->isTls())) { + error("TLS attribute mismatch: " + toString(*S->body()) + + "\n>>> defined in " + toString(S->body()->File) + + "\n>>> defined in " + toString(File)); + } return {S, WasInserted}; } @@ -252,18 +251,22 @@ Symbol *SymbolTable<ELFT>::addUndefined(StringRef Name, bool IsLocal, InputFile *File) { Symbol *S; bool WasInserted; + uint8_t Visibility = getVisibility(StOther); std::tie(S, WasInserted) = - insert(Name, Type, getVisibility(StOther), CanOmitFromDynSym, File); - if (WasInserted) { + insert(Name, Type, Visibility, CanOmitFromDynSym, File); + // An undefined symbol with non default visibility must be satisfied + // in the same DSO. + if (WasInserted || + (isa<SharedSymbol>(S->body()) && Visibility != STV_DEFAULT)) { S->Binding = Binding; - replaceBody<Undefined<ELFT>>(S, Name, IsLocal, StOther, Type, File); + replaceBody<Undefined>(S, Name, IsLocal, StOther, Type, File); return S; } if (Binding != STB_WEAK) { if (S->body()->isShared() || S->body()->isLazy()) S->Binding = Binding; - if (auto *SS = dyn_cast<SharedSymbol<ELFT>>(S->body())) - SS->file()->IsUsed = true; + if (auto *SS = dyn_cast<SharedSymbol>(S->body())) + cast<SharedFile<ELFT>>(SS->File)->IsUsed = true; } if (auto *L = dyn_cast<Lazy>(S->body())) { // An undefined weak will not fetch archive members, but we have to remember @@ -309,7 +312,7 @@ static int compareDefinedNonCommon(Symbol *S, bool WasInserted, uint8_t Binding, if (Config->WarnCommon) warn("common " + S->body()->getName() + " is overridden"); return 1; - } else if (auto *R = dyn_cast<DefinedRegular<ELFT>>(B)) { + } else if (auto *R = dyn_cast<DefinedRegular>(B)) { if (R->Section == nullptr && Binding == STB_GLOBAL && IsAbsolute && R->Value == Value) return -1; @@ -319,7 +322,7 @@ static int compareDefinedNonCommon(Symbol *S, bool WasInserted, uint8_t Binding, template <class ELFT> Symbol *SymbolTable<ELFT>::addCommon(StringRef N, uint64_t Size, - uint64_t Alignment, uint8_t Binding, + uint32_t Alignment, uint8_t Binding, uint8_t StOther, uint8_t Type, InputFile *File) { Symbol *S; @@ -349,40 +352,56 @@ Symbol *SymbolTable<ELFT>::addCommon(StringRef N, uint64_t Size, return S; } -static void print(const Twine &Msg) { +static void warnOrError(const Twine &Msg) { if (Config->AllowMultipleDefinition) warn(Msg); else error(Msg); } -static void reportDuplicate(SymbolBody *Existing, InputFile *NewFile) { - print("duplicate symbol " + conflictMsg(Existing, NewFile)); +static void reportDuplicate(SymbolBody *Sym, InputFile *NewFile) { + warnOrError("duplicate symbol: " + toString(*Sym) + + "\n>>> defined in " + toString(Sym->File) + + "\n>>> defined in " + toString(NewFile)); } template <class ELFT> -static void reportDuplicate(SymbolBody *Existing, - InputSectionBase<ELFT> *ErrSec, +static void reportDuplicate(SymbolBody *Sym, InputSectionBase *ErrSec, typename ELFT::uint ErrOffset) { - DefinedRegular<ELFT> *D = dyn_cast<DefinedRegular<ELFT>>(Existing); + DefinedRegular *D = dyn_cast<DefinedRegular>(Sym); if (!D || !D->Section || !ErrSec) { - reportDuplicate(Existing, ErrSec ? ErrSec->getFile() : nullptr); + reportDuplicate(Sym, ErrSec ? ErrSec->getFile<ELFT>() : nullptr); return; } - std::string OldLoc = D->Section->getLocation(D->Value); - std::string NewLoc = ErrSec->getLocation(ErrOffset); - - print(NewLoc + ": duplicate symbol '" + toString(*Existing) + "'"); - print(OldLoc + ": previous definition was here"); + // Construct and print an error message in the form of: + // + // ld.lld: error: duplicate symbol: foo + // >>> defined at bar.c:30 + // >>> bar.o (/home/alice/src/bar.o) + // >>> defined at baz.c:563 + // >>> baz.o in archive libbaz.a + auto *Sec1 = cast<InputSectionBase>(D->Section); + std::string Src1 = Sec1->getSrcMsg<ELFT>(D->Value); + std::string Obj1 = Sec1->getObjMsg<ELFT>(D->Value); + std::string Src2 = ErrSec->getSrcMsg<ELFT>(ErrOffset); + std::string Obj2 = ErrSec->getObjMsg<ELFT>(ErrOffset); + + std::string Msg = "duplicate symbol: " + toString(*Sym) + "\n>>> defined at "; + if (!Src1.empty()) + Msg += Src1 + "\n>>> "; + Msg += Obj1 + "\n>>> defined at "; + if (!Src2.empty()) + Msg += Src2 + "\n>>> "; + Msg += Obj2; + warnOrError(Msg); } template <typename ELFT> Symbol *SymbolTable<ELFT>::addRegular(StringRef Name, uint8_t StOther, - uint8_t Type, uintX_t Value, uintX_t Size, - uint8_t Binding, - InputSectionBase<ELFT> *Section, - InputFile *File) { + uint8_t Type, uint64_t Value, + uint64_t Size, uint8_t Binding, + SectionBase *Section, InputFile *File) { Symbol *S; bool WasInserted; std::tie(S, WasInserted) = insert(Name, Type, getVisibility(StOther), @@ -390,32 +409,16 @@ Symbol *SymbolTable<ELFT>::addRegular(StringRef Name, uint8_t StOther, int Cmp = compareDefinedNonCommon<ELFT>(S, WasInserted, Binding, Section == nullptr, Value); if (Cmp > 0) - replaceBody<DefinedRegular<ELFT>>(S, Name, /*IsLocal=*/false, StOther, Type, - Value, Size, Section, File); + replaceBody<DefinedRegular>(S, Name, /*IsLocal=*/false, StOther, Type, + Value, Size, Section, File); else if (Cmp == 0) - reportDuplicate(S->body(), Section, Value); + reportDuplicate<ELFT>(S->body(), + dyn_cast_or_null<InputSectionBase>(Section), Value); return S; } template <typename ELFT> -Symbol *SymbolTable<ELFT>::addSynthetic(StringRef N, - const OutputSectionBase *Section, - uintX_t Value, uint8_t StOther) { - Symbol *S; - bool WasInserted; - std::tie(S, WasInserted) = insert(N, STT_NOTYPE, getVisibility(StOther), - /*CanOmitFromDynSym*/ false, nullptr); - int Cmp = compareDefinedNonCommon<ELFT>(S, WasInserted, STB_GLOBAL, - /*IsAbsolute*/ false, /*Value*/ 0); - if (Cmp > 0) - replaceBody<DefinedSynthetic>(S, N, Value, Section); - else if (Cmp == 0) - reportDuplicate(S->body(), nullptr); - return S; -} - -template <typename ELFT> -void SymbolTable<ELFT>::addShared(SharedFile<ELFT> *F, StringRef Name, +void SymbolTable<ELFT>::addShared(SharedFile<ELFT> *File, StringRef Name, const Elf_Sym &Sym, const typename ELFT::Verdef *Verdef) { // DSO symbols do not affect visibility in the output, so we pass STV_DEFAULT @@ -423,15 +426,21 @@ void SymbolTable<ELFT>::addShared(SharedFile<ELFT> *F, StringRef Name, // unchanged. Symbol *S; bool WasInserted; - std::tie(S, WasInserted) = - insert(Name, Sym.getType(), STV_DEFAULT, /*CanOmitFromDynSym*/ true, F); + std::tie(S, WasInserted) = insert(Name, Sym.getType(), STV_DEFAULT, + /*CanOmitFromDynSym*/ true, File); // Make sure we preempt DSO symbols with default visibility. if (Sym.getVisibility() == STV_DEFAULT) S->ExportDynamic = true; - if (WasInserted || isa<Undefined<ELFT>>(S->body())) { - replaceBody<SharedSymbol<ELFT>>(S, F, Name, Sym, Verdef); + + SymbolBody *Body = S->body(); + // An undefined symbol with non default visibility must be satisfied + // in the same DSO. + if (WasInserted || + (isa<Undefined>(Body) && Body->getVisibility() == STV_DEFAULT)) { + replaceBody<SharedSymbol>(S, File, Name, Sym.st_other, Sym.getType(), &Sym, + Verdef); if (!S->isWeak()) - F->IsUsed = true; + File->IsUsed = true; } } @@ -446,8 +455,8 @@ Symbol *SymbolTable<ELFT>::addBitcode(StringRef Name, uint8_t Binding, int Cmp = compareDefinedNonCommon<ELFT>(S, WasInserted, Binding, /*IsAbs*/ false, /*Value*/ 0); if (Cmp > 0) - replaceBody<DefinedRegular<ELFT>>(S, Name, /*IsLocal=*/false, StOther, Type, - 0, 0, nullptr, F); + replaceBody<DefinedRegular>(S, Name, /*IsLocal=*/false, StOther, Type, 0, 0, + nullptr, F); else if (Cmp == 0) reportDuplicate(S->body(), F); return S; diff --git a/ELF/SymbolTable.h b/ELF/SymbolTable.h index f39dbd1e2e18..a5395f5beaa1 100644 --- a/ELF/SymbolTable.h +++ b/ELF/SymbolTable.h @@ -19,7 +19,6 @@ namespace lld { namespace elf { class Lazy; -class OutputSectionBase; struct Symbol; // SymbolTable is a bucket of all known symbols, including defined, @@ -36,7 +35,6 @@ struct Symbol; // is one add* function per symbol type. template <class ELFT> class SymbolTable { typedef typename ELFT::Sym Elf_Sym; - typedef typename ELFT::uint uintX_t; public: void addFile(InputFile *File); @@ -47,11 +45,11 @@ public: ArrayRef<BinaryFile *> getBinaryFiles() const { return BinaryFiles; } ArrayRef<SharedFile<ELFT> *> getSharedFiles() const { return SharedFiles; } - DefinedRegular<ELFT> *addAbsolute(StringRef Name, - uint8_t Visibility = llvm::ELF::STV_HIDDEN, - uint8_t Binding = llvm::ELF::STB_GLOBAL); - DefinedRegular<ELFT> *addIgnored(StringRef Name, - uint8_t Visibility = llvm::ELF::STV_HIDDEN); + DefinedRegular *addAbsolute(StringRef Name, + uint8_t Visibility = llvm::ELF::STV_HIDDEN, + uint8_t Binding = llvm::ELF::STB_GLOBAL); + DefinedRegular *addIgnored(StringRef Name, + uint8_t Visibility = llvm::ELF::STV_HIDDEN); Symbol *addUndefined(StringRef Name); Symbol *addUndefined(StringRef Name, bool IsLocal, uint8_t Binding, @@ -59,11 +57,8 @@ public: InputFile *File); Symbol *addRegular(StringRef Name, uint8_t StOther, uint8_t Type, - uintX_t Value, uintX_t Size, uint8_t Binding, - InputSectionBase<ELFT> *Section, InputFile *File); - - Symbol *addSynthetic(StringRef N, const OutputSectionBase *Section, - uintX_t Value, uint8_t StOther); + uint64_t Value, uint64_t Size, uint8_t Binding, + SectionBase *Section, InputFile *File); void addShared(SharedFile<ELFT> *F, StringRef Name, const Elf_Sym &Sym, const typename ELFT::Verdef *Verdef); @@ -73,10 +68,15 @@ public: Symbol *addBitcode(StringRef Name, uint8_t Binding, uint8_t StOther, uint8_t Type, bool CanOmitFromDynSym, BitcodeFile *File); - Symbol *addCommon(StringRef N, uint64_t Size, uint64_t Alignment, + Symbol *addCommon(StringRef N, uint64_t Size, uint32_t Alignment, uint8_t Binding, uint8_t StOther, uint8_t Type, InputFile *File); + std::pair<Symbol *, bool> insert(StringRef Name); + std::pair<Symbol *, bool> insert(StringRef Name, uint8_t Type, + uint8_t Visibility, bool CanOmitFromDynSym, + InputFile *File); + void scanUndefinedFlags(); void scanShlibUndefined(); void scanVersionScript(); @@ -87,14 +87,7 @@ public: void trace(StringRef Name); void wrap(StringRef Name); - std::vector<InputSectionBase<ELFT> *> Sections; - private: - std::pair<Symbol *, bool> insert(StringRef Name); - std::pair<Symbol *, bool> insert(StringRef Name, uint8_t Type, - uint8_t Visibility, bool CanOmitFromDynSym, - InputFile *File); - std::vector<SymbolBody *> findByVersion(SymbolVersion Ver); std::vector<SymbolBody *> findAllByVersion(SymbolVersion Ver); diff --git a/ELF/Symbols.cpp b/ELF/Symbols.cpp index 43af44ec4b84..86f3162cae29 100644 --- a/ELF/Symbols.cpp +++ b/ELF/Symbols.cpp @@ -28,62 +28,89 @@ using namespace llvm::ELF; using namespace lld; using namespace lld::elf; -template <class ELFT> -static typename ELFT::uint getSymVA(const SymbolBody &Body, - typename ELFT::uint &Addend) { - typedef typename ELFT::uint uintX_t; - +DefinedRegular *ElfSym::Bss; +DefinedRegular *ElfSym::Etext1; +DefinedRegular *ElfSym::Etext2; +DefinedRegular *ElfSym::Edata1; +DefinedRegular *ElfSym::Edata2; +DefinedRegular *ElfSym::End1; +DefinedRegular *ElfSym::End2; +DefinedRegular *ElfSym::MipsGp; +DefinedRegular *ElfSym::MipsGpDisp; +DefinedRegular *ElfSym::MipsLocalGp; + +static uint64_t getSymVA(const SymbolBody &Body, int64_t &Addend) { switch (Body.kind()) { - case SymbolBody::DefinedSyntheticKind: { - auto &D = cast<DefinedSynthetic>(Body); - const OutputSectionBase *Sec = D.Section; - if (!Sec) - return D.Value; - if (D.Value == uintX_t(-1)) - return Sec->Addr + Sec->Size; - return Sec->Addr + D.Value; - } case SymbolBody::DefinedRegularKind: { - auto &D = cast<DefinedRegular<ELFT>>(Body); - InputSectionBase<ELFT> *IS = D.Section; + auto &D = cast<DefinedRegular>(Body); + SectionBase *IS = D.Section; + if (auto *ISB = dyn_cast_or_null<InputSectionBase>(IS)) + IS = ISB->Repl; // According to the ELF spec reference to a local symbol from outside // the group are not allowed. Unfortunately .eh_frame breaks that rule // and must be treated specially. For now we just replace the symbol with // 0. - if (IS == &InputSection<ELFT>::Discarded) + if (IS == &InputSection::Discarded) return 0; // This is an absolute symbol. if (!IS) return D.Value; - uintX_t Offset = D.Value; + uint64_t Offset = D.Value; + + // An object in an SHF_MERGE section might be referenced via a + // section symbol (as a hack for reducing the number of local + // symbols). + // Depending on the addend, the reference via a section symbol + // refers to a different object in the merge section. + // Since the objects in the merge section are not necessarily + // contiguous in the output, the addend can thus affect the final + // VA in a non-linear way. + // To make this work, we incorporate the addend into the section + // offset (and zero out the addend for later processing) so that + // we find the right object in the section. if (D.isSection()) { Offset += Addend; Addend = 0; } - uintX_t VA = (IS->OutSec ? IS->OutSec->Addr : 0) + IS->getOffset(Offset); + + const OutputSection *OutSec = IS->getOutputSection(); + + // In the typical case, this is actually very simple and boils + // down to adding together 3 numbers: + // 1. The address of the output section. + // 2. The offset of the input section within the output section. + // 3. The offset within the input section (this addition happens + // inside InputSection::getOffset). + // + // If you understand the data structures involved with this next + // line (and how they get built), then you have a pretty good + // understanding of the linker. + uint64_t VA = (OutSec ? OutSec->Addr : 0) + IS->getOffset(Offset); + if (D.isTls() && !Config->Relocatable) { - if (!Out<ELFT>::TlsPhdr) + if (!Out::TlsPhdr) fatal(toString(D.File) + " has a STT_TLS symbol but doesn't have a PT_TLS section"); - return VA - Out<ELFT>::TlsPhdr->p_vaddr; + return VA - Out::TlsPhdr->p_vaddr; } return VA; } case SymbolBody::DefinedCommonKind: if (!Config->DefineCommon) return 0; - return In<ELFT>::Common->OutSec->Addr + In<ELFT>::Common->OutSecOff + + return InX::Common->OutSec->Addr + InX::Common->OutSecOff + cast<DefinedCommon>(Body).Offset; case SymbolBody::SharedKind: { - auto &SS = cast<SharedSymbol<ELFT>>(Body); - if (!SS.NeedsCopyOrPltAddr) - return 0; - if (SS.isFunc()) - return Body.getPltVA<ELFT>(); - return SS.getBssSectionForCopy()->Addr + SS.CopyOffset; + auto &SS = cast<SharedSymbol>(Body); + if (SS.NeedsCopy) + return SS.CopyRelSec->OutSec->Addr + SS.CopyRelSec->OutSecOff + + SS.CopyRelSecOff; + if (SS.NeedsPltAddr) + return Body.getPltVA(); + return 0; } case SymbolBody::UndefinedKind: return 0; @@ -97,10 +124,9 @@ static typename ELFT::uint getSymVA(const SymbolBody &Body, SymbolBody::SymbolBody(Kind K, StringRefZ Name, bool IsLocal, uint8_t StOther, uint8_t Type) - : SymbolKind(K), NeedsCopyOrPltAddr(false), IsLocal(IsLocal), + : SymbolKind(K), NeedsCopy(false), NeedsPltAddr(false), IsLocal(IsLocal), IsInGlobalMipsGot(false), Is32BitMipsGot(false), IsInIplt(false), - IsInIgot(false), CopyIsInBssRelRo(false), Type(Type), StOther(StOther), - Name(Name) {} + IsInIgot(false), Type(Type), StOther(StOther), Name(Name) {} // Returns true if a symbol can be replaced at load-time by a symbol // with the same name defined in other ELF executable or DSO. @@ -112,7 +138,7 @@ bool SymbolBody::isPreemptible() const { // symbols with copy relocations (which resolve to .bss) or preempt plt // entries (which resolve to that plt entry). if (isShared()) - return !NeedsCopyOrPltAddr; + return !NeedsCopy && !NeedsPltAddr; // That's all that can be preempted in a non-DSO. if (!Config->Shared) @@ -132,65 +158,68 @@ bool SymbolBody::isPreemptible() const { return true; } -template <class ELFT> bool SymbolBody::hasThunk() const { - if (auto *DR = dyn_cast<DefinedRegular<ELFT>>(this)) - return DR->ThunkData != nullptr; - if (auto *S = dyn_cast<SharedSymbol<ELFT>>(this)) - return S->ThunkData != nullptr; - return false; -} - -template <class ELFT> -typename ELFT::uint SymbolBody::getVA(typename ELFT::uint Addend) const { - typename ELFT::uint OutVA = getSymVA<ELFT>(*this, Addend); +uint64_t SymbolBody::getVA(int64_t Addend) const { + uint64_t OutVA = getSymVA(*this, Addend); return OutVA + Addend; } template <class ELFT> typename ELFT::uint SymbolBody::getGotVA() const { - return In<ELFT>::Got->getVA() + getGotOffset<ELFT>(); + return In<ELFT>::Got->getVA() + getGotOffset(); } -template <class ELFT> typename ELFT::uint SymbolBody::getGotOffset() const { +uint64_t SymbolBody::getGotOffset() const { return GotIndex * Target->GotEntrySize; } -template <class ELFT> typename ELFT::uint SymbolBody::getGotPltVA() const { +uint64_t SymbolBody::getGotPltVA() const { if (this->IsInIgot) - return In<ELFT>::IgotPlt->getVA() + getGotPltOffset<ELFT>(); - return In<ELFT>::GotPlt->getVA() + getGotPltOffset<ELFT>(); + return InX::IgotPlt->getVA() + getGotPltOffset(); + return InX::GotPlt->getVA() + getGotPltOffset(); } -template <class ELFT> typename ELFT::uint SymbolBody::getGotPltOffset() const { +uint64_t SymbolBody::getGotPltOffset() const { return GotPltIndex * Target->GotPltEntrySize; } -template <class ELFT> typename ELFT::uint SymbolBody::getPltVA() const { +uint64_t SymbolBody::getPltVA() const { if (this->IsInIplt) - return In<ELFT>::Iplt->getVA() + PltIndex * Target->PltEntrySize; - return In<ELFT>::Plt->getVA() + Target->PltHeaderSize + + return InX::Iplt->getVA() + PltIndex * Target->PltEntrySize; + return InX::Plt->getVA() + Target->PltHeaderSize + PltIndex * Target->PltEntrySize; } -template <class ELFT> typename ELFT::uint SymbolBody::getThunkVA() const { - if (const auto *DR = dyn_cast<DefinedRegular<ELFT>>(this)) - return DR->ThunkData->getVA(); - if (const auto *S = dyn_cast<SharedSymbol<ELFT>>(this)) - return S->ThunkData->getVA(); - if (const auto *S = dyn_cast<Undefined<ELFT>>(this)) - return S->ThunkData->getVA(); - fatal("getThunkVA() not supported for Symbol class\n"); -} - template <class ELFT> typename ELFT::uint SymbolBody::getSize() const { if (const auto *C = dyn_cast<DefinedCommon>(this)) return C->Size; - if (const auto *DR = dyn_cast<DefinedRegular<ELFT>>(this)) + if (const auto *DR = dyn_cast<DefinedRegular>(this)) return DR->Size; - if (const auto *S = dyn_cast<SharedSymbol<ELFT>>(this)) - return S->Sym.st_size; + if (const auto *S = dyn_cast<SharedSymbol>(this)) + return S->getSize<ELFT>(); return 0; } +OutputSection *SymbolBody::getOutputSection() const { + if (auto *S = dyn_cast<DefinedRegular>(this)) { + if (S->Section) + return S->Section->getOutputSection(); + return nullptr; + } + + if (auto *S = dyn_cast<SharedSymbol>(this)) { + if (S->NeedsCopy) + return S->CopyRelSec->OutSec; + return nullptr; + } + + if (isa<DefinedCommon>(this)) { + if (Config->DefineCommon) + return InX::Common->OutSec; + return nullptr; + } + + return nullptr; +} + // If a symbol name contains '@', the characters after that is // a symbol version name. This function parses that. void SymbolBody::parseSymbolVersion() { @@ -234,27 +263,25 @@ Defined::Defined(Kind K, StringRefZ Name, bool IsLocal, uint8_t StOther, uint8_t Type) : SymbolBody(K, Name, IsLocal, StOther, Type) {} -template <class ELFT> bool DefinedRegular<ELFT>::isMipsPIC() const { +template <class ELFT> bool DefinedRegular::isMipsPIC() const { if (!Section || !isFunc()) return false; return (this->StOther & STO_MIPS_MIPS16) == STO_MIPS_PIC || - (Section->getFile()->getObj().getHeader()->e_flags & EF_MIPS_PIC); + (cast<InputSectionBase>(Section) + ->template getFile<ELFT>() + ->getObj() + .getHeader() + ->e_flags & + EF_MIPS_PIC); } -template <typename ELFT> -Undefined<ELFT>::Undefined(StringRefZ Name, bool IsLocal, uint8_t StOther, - uint8_t Type, InputFile *File) +Undefined::Undefined(StringRefZ Name, bool IsLocal, uint8_t StOther, + uint8_t Type, InputFile *File) : SymbolBody(SymbolBody::UndefinedKind, Name, IsLocal, StOther, Type) { this->File = File; } -template <typename ELFT> -OutputSection<ELFT> *SharedSymbol<ELFT>::getBssSectionForCopy() const { - assert(needsCopy()); - return CopyIsInBssRelRo ? Out<ELFT>::BssRelRo : Out<ELFT>::Bss; -} - -DefinedCommon::DefinedCommon(StringRef Name, uint64_t Size, uint64_t Alignment, +DefinedCommon::DefinedCommon(StringRef Name, uint64_t Size, uint32_t Alignment, uint8_t StOther, uint8_t Type, InputFile *File) : Defined(SymbolBody::DefinedCommonKind, Name, /*IsLocal=*/false, StOther, Type), @@ -262,6 +289,17 @@ DefinedCommon::DefinedCommon(StringRef Name, uint64_t Size, uint64_t Alignment, this->File = File; } +// If a shared symbol is referred via a copy relocation, its alignment +// becomes part of the ABI. This function returns a symbol alignment. +// Because symbols don't have alignment attributes, we need to infer that. +template <class ELFT> uint32_t SharedSymbol::getAlignment() const { + auto *File = cast<SharedFile<ELFT>>(this->File); + uint32_t SecAlign = File->getSection(getSym<ELFT>())->sh_addralign; + uint64_t SymValue = getSym<ELFT>().st_value; + uint32_t SymAlign = uint32_t(1) << countTrailingZeros(SymValue); + return std::min(SecAlign, SymAlign); +} + InputFile *Lazy::fetch() { if (auto *S = dyn_cast<LazyArchive>(this)) return S->fetch(); @@ -319,15 +357,15 @@ bool Symbol::includeInDynsym() const { // Print out a log message for --trace-symbol. void elf::printTraceSymbol(Symbol *Sym) { SymbolBody *B = Sym->body(); - outs() << toString(B->File); - + std::string S; if (B->isUndefined()) - outs() << ": reference to "; + S = ": reference to "; else if (B->isCommon()) - outs() << ": common definition of "; + S = ": common definition of "; else - outs() << ": definition of "; - outs() << B->getName() << "\n"; + S = ": definition of "; + + message(toString(B->File) + S + B->getName()); } // Returns a symbol for an error message. @@ -338,62 +376,22 @@ std::string lld::toString(const SymbolBody &B) { return B.getName(); } -template bool SymbolBody::hasThunk<ELF32LE>() const; -template bool SymbolBody::hasThunk<ELF32BE>() const; -template bool SymbolBody::hasThunk<ELF64LE>() const; -template bool SymbolBody::hasThunk<ELF64BE>() const; - -template uint32_t SymbolBody::template getVA<ELF32LE>(uint32_t) const; -template uint32_t SymbolBody::template getVA<ELF32BE>(uint32_t) const; -template uint64_t SymbolBody::template getVA<ELF64LE>(uint64_t) const; -template uint64_t SymbolBody::template getVA<ELF64BE>(uint64_t) const; - template uint32_t SymbolBody::template getGotVA<ELF32LE>() const; template uint32_t SymbolBody::template getGotVA<ELF32BE>() const; template uint64_t SymbolBody::template getGotVA<ELF64LE>() const; template uint64_t SymbolBody::template getGotVA<ELF64BE>() const; -template uint32_t SymbolBody::template getGotOffset<ELF32LE>() const; -template uint32_t SymbolBody::template getGotOffset<ELF32BE>() const; -template uint64_t SymbolBody::template getGotOffset<ELF64LE>() const; -template uint64_t SymbolBody::template getGotOffset<ELF64BE>() const; - -template uint32_t SymbolBody::template getGotPltVA<ELF32LE>() const; -template uint32_t SymbolBody::template getGotPltVA<ELF32BE>() const; -template uint64_t SymbolBody::template getGotPltVA<ELF64LE>() const; -template uint64_t SymbolBody::template getGotPltVA<ELF64BE>() const; - -template uint32_t SymbolBody::template getThunkVA<ELF32LE>() const; -template uint32_t SymbolBody::template getThunkVA<ELF32BE>() const; -template uint64_t SymbolBody::template getThunkVA<ELF64LE>() const; -template uint64_t SymbolBody::template getThunkVA<ELF64BE>() const; - -template uint32_t SymbolBody::template getGotPltOffset<ELF32LE>() const; -template uint32_t SymbolBody::template getGotPltOffset<ELF32BE>() const; -template uint64_t SymbolBody::template getGotPltOffset<ELF64LE>() const; -template uint64_t SymbolBody::template getGotPltOffset<ELF64BE>() const; - -template uint32_t SymbolBody::template getPltVA<ELF32LE>() const; -template uint32_t SymbolBody::template getPltVA<ELF32BE>() const; -template uint64_t SymbolBody::template getPltVA<ELF64LE>() const; -template uint64_t SymbolBody::template getPltVA<ELF64BE>() const; - template uint32_t SymbolBody::template getSize<ELF32LE>() const; template uint32_t SymbolBody::template getSize<ELF32BE>() const; template uint64_t SymbolBody::template getSize<ELF64LE>() const; template uint64_t SymbolBody::template getSize<ELF64BE>() const; -template class elf::Undefined<ELF32LE>; -template class elf::Undefined<ELF32BE>; -template class elf::Undefined<ELF64LE>; -template class elf::Undefined<ELF64BE>; - -template class elf::SharedSymbol<ELF32LE>; -template class elf::SharedSymbol<ELF32BE>; -template class elf::SharedSymbol<ELF64LE>; -template class elf::SharedSymbol<ELF64BE>; +template bool DefinedRegular::template isMipsPIC<ELF32LE>() const; +template bool DefinedRegular::template isMipsPIC<ELF32BE>() const; +template bool DefinedRegular::template isMipsPIC<ELF64LE>() const; +template bool DefinedRegular::template isMipsPIC<ELF64BE>() const; -template class elf::DefinedRegular<ELF32LE>; -template class elf::DefinedRegular<ELF32BE>; -template class elf::DefinedRegular<ELF64LE>; -template class elf::DefinedRegular<ELF64BE>; +template uint32_t SharedSymbol::template getAlignment<ELF32LE>() const; +template uint32_t SharedSymbol::template getAlignment<ELF32BE>() const; +template uint32_t SharedSymbol::template getAlignment<ELF64LE>() const; +template uint32_t SharedSymbol::template getAlignment<ELF64BE>() const; diff --git a/ELF/Symbols.h b/ELF/Symbols.h index 7acb89ad0718..39a0c0f7b4df 100644 --- a/ELF/Symbols.h +++ b/ELF/Symbols.h @@ -30,8 +30,7 @@ class BitcodeFile; class InputFile; class LazyObjectFile; template <class ELFT> class ObjectFile; -template <class ELFT> class OutputSection; -class OutputSectionBase; +class OutputSection; template <class ELFT> class SharedFile; struct Symbol; @@ -44,8 +43,7 @@ public: DefinedRegularKind = DefinedFirst, SharedKind, DefinedCommonKind, - DefinedSyntheticKind, - DefinedLast = DefinedSyntheticKind, + DefinedLast = DefinedCommonKind, UndefinedKind, LazyArchiveKind, LazyObjectKind, @@ -76,18 +74,16 @@ public: bool isInGot() const { return GotIndex != -1U; } bool isInPlt() const { return PltIndex != -1U; } - template <class ELFT> bool hasThunk() const; - template <class ELFT> - typename ELFT::uint getVA(typename ELFT::uint Addend = 0) const; + uint64_t getVA(int64_t Addend = 0) const; - template <class ELFT> typename ELFT::uint getGotOffset() const; + uint64_t getGotOffset() const; template <class ELFT> typename ELFT::uint getGotVA() const; - template <class ELFT> typename ELFT::uint getGotPltOffset() const; - template <class ELFT> typename ELFT::uint getGotPltVA() const; - template <class ELFT> typename ELFT::uint getPltVA() const; - template <class ELFT> typename ELFT::uint getThunkVA() const; + uint64_t getGotPltOffset() const; + uint64_t getGotPltVA() const; + uint64_t getPltVA() const; template <class ELFT> typename ELFT::uint getSize() const; + OutputSection *getOutputSection() const; // The file from which this symbol was created. InputFile *File = nullptr; @@ -105,9 +101,13 @@ protected: const unsigned SymbolKind : 8; public: - // True if the linker has to generate a copy relocation for this shared - // symbol or if the symbol should point to its plt entry. - unsigned NeedsCopyOrPltAddr : 1; + // True if the linker has to generate a copy relocation. + // For SharedSymbol only. + unsigned NeedsCopy : 1; + + // True the symbol should point to its PLT entry. + // For SharedSymbol only. + unsigned NeedsPltAddr : 1; // True if this is a local symbol. unsigned IsLocal : 1; @@ -124,11 +124,6 @@ public: // True if this symbol is in the Igot sub-section of the .got.plt or .got. unsigned IsInIgot : 1; - // True if this is a shared symbol in a read-only segment which requires a - // copy relocation. This causes space for the symbol to be allocated in the - // .bss.rel.ro section. - unsigned CopyIsInBssRelRo : 1; - // The following fields have the same meaning as the ELF symbol attributes. uint8_t Type; // symbol type uint8_t StOther; // st_other field value @@ -160,7 +155,7 @@ public: class DefinedCommon : public Defined { public: - DefinedCommon(StringRef N, uint64_t Size, uint64_t Alignment, uint8_t StOther, + DefinedCommon(StringRef N, uint64_t Size, uint32_t Alignment, uint8_t StOther, uint8_t Type, InputFile *File); static bool classof(const SymbolBody *S) { @@ -172,77 +167,35 @@ public: uint64_t Offset; // The maximum alignment we have seen for this symbol. - uint64_t Alignment; + uint32_t Alignment; uint64_t Size; }; // Regular defined symbols read from object file symbol tables. -template <class ELFT> class DefinedRegular : public Defined { - typedef typename ELFT::Sym Elf_Sym; - typedef typename ELFT::uint uintX_t; - +class DefinedRegular : public Defined { public: DefinedRegular(StringRefZ Name, bool IsLocal, uint8_t StOther, uint8_t Type, - uintX_t Value, uintX_t Size, InputSectionBase<ELFT> *Section, + uint64_t Value, uint64_t Size, SectionBase *Section, InputFile *File) : Defined(SymbolBody::DefinedRegularKind, Name, IsLocal, StOther, Type), - Value(Value), Size(Size), - Section(Section ? Section->Repl : NullInputSection) { + Value(Value), Size(Size), Section(Section) { this->File = File; } // Return true if the symbol is a PIC function. - bool isMipsPIC() const; + template <class ELFT> bool isMipsPIC() const; static bool classof(const SymbolBody *S) { return S->kind() == SymbolBody::DefinedRegularKind; } - uintX_t Value; - uintX_t Size; - - // The input section this symbol belongs to. Notice that this is - // a reference to a pointer. We are using two levels of indirections - // because of ICF. If ICF decides two sections need to be merged, it - // manipulates this Section pointers so that they point to the same - // section. This is a bit tricky, so be careful to not be confused. - // If this is null, the symbol is an absolute symbol. - InputSectionBase<ELFT> *&Section; - - // If non-null the symbol has a Thunk that may be used as an alternative - // destination for callers of this Symbol. - Thunk<ELFT> *ThunkData = nullptr; - -private: - static InputSectionBase<ELFT> *NullInputSection; -}; - -template <class ELFT> -InputSectionBase<ELFT> *DefinedRegular<ELFT>::NullInputSection; - -// DefinedSynthetic is a class to represent linker-generated ELF symbols. -// The difference from the regular symbol is that DefinedSynthetic symbols -// don't belong to any input files or sections. Thus, its constructor -// takes an output section to calculate output VA, etc. -// If Section is null, this symbol is relative to the image base. -class DefinedSynthetic : public Defined { -public: - DefinedSynthetic(StringRef Name, uint64_t Value, - const OutputSectionBase *Section) - : Defined(SymbolBody::DefinedSyntheticKind, Name, /*IsLocal=*/false, - llvm::ELF::STV_HIDDEN, 0 /* Type */), - Value(Value), Section(Section) {} - - static bool classof(const SymbolBody *S) { - return S->kind() == SymbolBody::DefinedSyntheticKind; - } - uint64_t Value; - const OutputSectionBase *Section; + uint64_t Size; + SectionBase *Section; }; -template <class ELFT> class Undefined : public SymbolBody { +class Undefined : public SymbolBody { public: Undefined(StringRefZ Name, bool IsLocal, uint8_t StOther, uint8_t Type, InputFile *F); @@ -250,53 +203,51 @@ public: static bool classof(const SymbolBody *S) { return S->kind() == UndefinedKind; } - - // If non-null the symbol has a Thunk that may be used as an alternative - // destination for callers of this Symbol. When linking a DSO undefined - // symbols are implicitly imported, the symbol lookup will be performed by - // the dynamic loader. A call to an undefined symbol will be given a PLT - // entry and on ARM this may need a Thunk if the caller is in Thumb state. - Thunk<ELFT> *ThunkData = nullptr; - InputFile *file() { return this->File; } }; -template <class ELFT> class SharedSymbol : public Defined { - typedef typename ELFT::Sym Elf_Sym; - typedef typename ELFT::Verdef Elf_Verdef; - typedef typename ELFT::uint uintX_t; - +class SharedSymbol : public Defined { public: static bool classof(const SymbolBody *S) { return S->kind() == SymbolBody::SharedKind; } - SharedSymbol(SharedFile<ELFT> *F, StringRef Name, const Elf_Sym &Sym, - const Elf_Verdef *Verdef) - : Defined(SymbolBody::SharedKind, Name, /*IsLocal=*/false, Sym.st_other, - Sym.getType()), - Sym(Sym), Verdef(Verdef) { + SharedSymbol(InputFile *File, StringRef Name, uint8_t StOther, uint8_t Type, + const void *ElfSym, const void *Verdef) + : Defined(SymbolBody::SharedKind, Name, /*IsLocal=*/false, StOther, Type), + Verdef(Verdef), ElfSym(ElfSym) { // IFuncs defined in DSOs are treated as functions by the static linker. if (isGnuIFunc()) Type = llvm::ELF::STT_FUNC; - this->File = F; + this->File = File; } - SharedFile<ELFT> *file() { return (SharedFile<ELFT> *)this->File; } + template <class ELFT> uint64_t getShndx() const { + return getSym<ELFT>().st_shndx; + } - const Elf_Sym &Sym; + template <class ELFT> uint64_t getValue() const { + return getSym<ELFT>().st_value; + } + + template <class ELFT> uint64_t getSize() const { + return getSym<ELFT>().st_size; + } + + template <class ELFT> uint32_t getAlignment() const; // This field is a pointer to the symbol's version definition. - const Elf_Verdef *Verdef; + const void *Verdef; - // CopyOffset is significant only when needsCopy() is true. - uintX_t CopyOffset = 0; + // CopyRelSec and CopyRelSecOff are significant only when NeedsCopy is true. + InputSection *CopyRelSec; + uint64_t CopyRelSecOff; - // If non-null the symbol has a Thunk that may be used as an alternative - // destination for callers of this Symbol. - Thunk<ELFT> *ThunkData = nullptr; - bool needsCopy() const { return this->NeedsCopyOrPltAddr && !this->isFunc(); } +private: + template <class ELFT> const typename ELFT::Sym &getSym() const { + return *(const typename ELFT::Sym *)ElfSym; + } - OutputSection<ELFT> *getBssSectionForCopy() const; + const void *ElfSym; }; // This class represents a symbol defined in an archive file. It is @@ -350,39 +301,28 @@ public: // Some linker-generated symbols need to be created as // DefinedRegular symbols. -template <class ELFT> struct ElfSym { - // The content for __ehdr_start symbol. - static DefinedRegular<ELFT> *EhdrStart; - - // The content for _etext and etext symbols. - static DefinedRegular<ELFT> *Etext; - static DefinedRegular<ELFT> *Etext2; - - // The content for _edata and edata symbols. - static DefinedRegular<ELFT> *Edata; - static DefinedRegular<ELFT> *Edata2; - - // The content for _end and end symbols. - static DefinedRegular<ELFT> *End; - static DefinedRegular<ELFT> *End2; - - // The content for _gp_disp/__gnu_local_gp symbols for MIPS target. - static DefinedRegular<ELFT> *MipsGpDisp; - static DefinedRegular<ELFT> *MipsLocalGp; - static DefinedRegular<ELFT> *MipsGp; +struct ElfSym { + // __bss_start + static DefinedRegular *Bss; + + // etext and _etext + static DefinedRegular *Etext1; + static DefinedRegular *Etext2; + + // edata and _edata + static DefinedRegular *Edata1; + static DefinedRegular *Edata2; + + // end and _end + static DefinedRegular *End1; + static DefinedRegular *End2; + + // _gp, _gp_disp and __gnu_local_gp symbols. Only for MIPS. + static DefinedRegular *MipsGp; + static DefinedRegular *MipsGpDisp; + static DefinedRegular *MipsLocalGp; }; -template <class ELFT> DefinedRegular<ELFT> *ElfSym<ELFT>::EhdrStart; -template <class ELFT> DefinedRegular<ELFT> *ElfSym<ELFT>::Etext; -template <class ELFT> DefinedRegular<ELFT> *ElfSym<ELFT>::Etext2; -template <class ELFT> DefinedRegular<ELFT> *ElfSym<ELFT>::Edata; -template <class ELFT> DefinedRegular<ELFT> *ElfSym<ELFT>::Edata2; -template <class ELFT> DefinedRegular<ELFT> *ElfSym<ELFT>::End; -template <class ELFT> DefinedRegular<ELFT> *ElfSym<ELFT>::End2; -template <class ELFT> DefinedRegular<ELFT> *ElfSym<ELFT>::MipsGpDisp; -template <class ELFT> DefinedRegular<ELFT> *ElfSym<ELFT>::MipsLocalGp; -template <class ELFT> DefinedRegular<ELFT> *ElfSym<ELFT>::MipsGp; - // A real symbol object, SymbolBody, is usually stored within a Symbol. There's // always one Symbol for each symbol name. The resolver updates the SymbolBody // stored in the Body field of this object as it resolves symbols. Symbol also @@ -426,13 +366,9 @@ struct Symbol { // This field is used to store the Symbol's SymbolBody. This instantiation of // AlignedCharArrayUnion gives us a struct with a char array field that is - // large and aligned enough to store any derived class of SymbolBody. We - // assume that the size and alignment of ELF64LE symbols is sufficient for any - // ELFT, and we verify this with the static_asserts in replaceBody. - llvm::AlignedCharArrayUnion< - DefinedCommon, DefinedRegular<llvm::object::ELF64LE>, DefinedSynthetic, - Undefined<llvm::object::ELF64LE>, SharedSymbol<llvm::object::ELF64LE>, - LazyArchive, LazyObject> + // large and aligned enough to store any derived class of SymbolBody. + llvm::AlignedCharArrayUnion<DefinedCommon, DefinedRegular, Undefined, + SharedSymbol, LazyArchive, LazyObject> Body; SymbolBody *body() { return reinterpret_cast<SymbolBody *>(Body.buffer); } diff --git a/ELF/SyntheticSections.cpp b/ELF/SyntheticSections.cpp index b673a4ece1d2..7009d3d34f66 100644 --- a/ELF/SyntheticSections.cpp +++ b/ELF/SyntheticSections.cpp @@ -27,6 +27,8 @@ #include "Threads.h" #include "Writer.h" #include "lld/Config/Version.h" +#include "llvm/DebugInfo/DWARF/DWARFDebugPubTable.h" +#include "llvm/Object/ELFObjectFile.h" #include "llvm/Support/Dwarf.h" #include "llvm/Support/Endian.h" #include "llvm/Support/MD5.h" @@ -45,6 +47,12 @@ using namespace llvm::support::endian; using namespace lld; using namespace lld::elf; +uint64_t SyntheticSection::getVA() const { + if (this->OutSec) + return this->OutSec->Addr + this->OutSecOff; + return 0; +} + template <class ELFT> static std::vector<DefinedCommon *> getCommonSymbols() { std::vector<DefinedCommon *> V; for (Symbol *S : Symtab<ELFT>::X->getSymbols()) @@ -54,35 +62,24 @@ template <class ELFT> static std::vector<DefinedCommon *> getCommonSymbols() { } // Find all common symbols and allocate space for them. -template <class ELFT> InputSection<ELFT> *elf::createCommonSection() { - auto *Ret = make<InputSection<ELFT>>(SHF_ALLOC | SHF_WRITE, SHT_NOBITS, 1, - ArrayRef<uint8_t>(), "COMMON"); - Ret->Live = true; - +template <class ELFT> InputSection *elf::createCommonSection() { if (!Config->DefineCommon) - return Ret; + return nullptr; // Sort the common symbols by alignment as an heuristic to pack them better. std::vector<DefinedCommon *> Syms = getCommonSymbols<ELFT>(); + if (Syms.empty()) + return nullptr; + std::stable_sort(Syms.begin(), Syms.end(), [](const DefinedCommon *A, const DefinedCommon *B) { return A->Alignment > B->Alignment; }); - // Assign offsets to symbols. - size_t Size = 0; - size_t Alignment = 1; - for (DefinedCommon *Sym : Syms) { - Alignment = std::max<size_t>(Alignment, Sym->Alignment); - Size = alignTo(Size, Sym->Alignment); - - // Compute symbol offset relative to beginning of input section. - Sym->Offset = Size; - Size += Sym->Size; - } - Ret->Alignment = Alignment; - Ret->Data = makeArrayRef<uint8_t>(nullptr, Size); - return Ret; + BssSection *Sec = make<BssSection>("COMMON"); + for (DefinedCommon *Sym : Syms) + Sym->Offset = Sec->reserveSpace(Sym->Size, Sym->Alignment); + return Sec; } // Returns an LLD version string. @@ -102,14 +99,15 @@ static ArrayRef<uint8_t> getVersion() { // With this feature, you can identify LLD-generated binaries easily // by "objdump -s -j .comment <file>". // The returned object is a mergeable string section. -template <class ELFT> MergeInputSection<ELFT> *elf::createCommentSection() { +template <class ELFT> MergeInputSection *elf::createCommentSection() { typename ELFT::Shdr Hdr = {}; Hdr.sh_flags = SHF_MERGE | SHF_STRINGS; Hdr.sh_type = SHT_PROGBITS; Hdr.sh_entsize = 1; Hdr.sh_addralign = 1; - auto *Ret = make<MergeInputSection<ELFT>>(/*file=*/nullptr, &Hdr, ".comment"); + auto *Ret = + make<MergeInputSection>((ObjectFile<ELFT> *)nullptr, &Hdr, ".comment"); Ret->Data = getVersion(); Ret->splitIntoPieces(); return Ret; @@ -118,8 +116,10 @@ template <class ELFT> MergeInputSection<ELFT> *elf::createCommentSection() { // .MIPS.abiflags section. template <class ELFT> MipsAbiFlagsSection<ELFT>::MipsAbiFlagsSection(Elf_Mips_ABIFlags Flags) - : SyntheticSection<ELFT>(SHF_ALLOC, SHT_MIPS_ABIFLAGS, 8, ".MIPS.abiflags"), - Flags(Flags) {} + : SyntheticSection(SHF_ALLOC, SHT_MIPS_ABIFLAGS, 8, ".MIPS.abiflags"), + Flags(Flags) { + this->Entsize = sizeof(Elf_Mips_ABIFlags); +} template <class ELFT> void MipsAbiFlagsSection<ELFT>::writeTo(uint8_t *Buf) { memcpy(Buf, &Flags, sizeof(Flags)); @@ -130,13 +130,13 @@ MipsAbiFlagsSection<ELFT> *MipsAbiFlagsSection<ELFT>::create() { Elf_Mips_ABIFlags Flags = {}; bool Create = false; - for (InputSectionBase<ELFT> *Sec : Symtab<ELFT>::X->Sections) { - if (!Sec->Live || Sec->Type != SHT_MIPS_ABIFLAGS) + for (InputSectionBase *Sec : InputSections) { + if (Sec->Type != SHT_MIPS_ABIFLAGS) continue; Sec->Live = false; Create = true; - std::string Filename = toString(Sec->getFile()); + std::string Filename = toString(Sec->getFile<ELFT>()); const size_t Size = Sec->Data.size(); // Older version of BFD (such as the default FreeBSD linker) concatenate // .MIPS.abiflags instead of merging. To allow for this case (or potential @@ -175,8 +175,10 @@ MipsAbiFlagsSection<ELFT> *MipsAbiFlagsSection<ELFT>::create() { // .MIPS.options section. template <class ELFT> MipsOptionsSection<ELFT>::MipsOptionsSection(Elf_Mips_RegInfo Reginfo) - : SyntheticSection<ELFT>(SHF_ALLOC, SHT_MIPS_OPTIONS, 8, ".MIPS.options"), - Reginfo(Reginfo) {} + : SyntheticSection(SHF_ALLOC, SHT_MIPS_OPTIONS, 8, ".MIPS.options"), + Reginfo(Reginfo) { + this->Entsize = sizeof(Elf_Mips_Options) + sizeof(Elf_Mips_RegInfo); +} template <class ELFT> void MipsOptionsSection<ELFT>::writeTo(uint8_t *Buf) { auto *Options = reinterpret_cast<Elf_Mips_Options *>(Buf); @@ -197,13 +199,13 @@ MipsOptionsSection<ELFT> *MipsOptionsSection<ELFT>::create() { Elf_Mips_RegInfo Reginfo = {}; bool Create = false; - for (InputSectionBase<ELFT> *Sec : Symtab<ELFT>::X->Sections) { - if (!Sec->Live || Sec->Type != SHT_MIPS_OPTIONS) + for (InputSectionBase *Sec : InputSections) { + if (Sec->Type != SHT_MIPS_OPTIONS) continue; Sec->Live = false; Create = true; - std::string Filename = toString(Sec->getFile()); + std::string Filename = toString(Sec->getFile<ELFT>()); ArrayRef<uint8_t> D = Sec->Data; while (!D.empty()) { @@ -217,7 +219,7 @@ MipsOptionsSection<ELFT> *MipsOptionsSection<ELFT>::create() { if (Config->Relocatable && Opt->getRegInfo().ri_gp_value) error(Filename + ": unsupported non-zero ri_gp_value"); Reginfo.ri_gprmask |= Opt->getRegInfo().ri_gprmask; - Sec->getFile()->MipsGp0 = Opt->getRegInfo().ri_gp_value; + Sec->getFile<ELFT>()->MipsGp0 = Opt->getRegInfo().ri_gp_value; break; } @@ -235,8 +237,10 @@ MipsOptionsSection<ELFT> *MipsOptionsSection<ELFT>::create() { // MIPS .reginfo section. template <class ELFT> MipsReginfoSection<ELFT>::MipsReginfoSection(Elf_Mips_RegInfo Reginfo) - : SyntheticSection<ELFT>(SHF_ALLOC, SHT_MIPS_REGINFO, 4, ".reginfo"), - Reginfo(Reginfo) {} + : SyntheticSection(SHF_ALLOC, SHT_MIPS_REGINFO, 4, ".reginfo"), + Reginfo(Reginfo) { + this->Entsize = sizeof(Elf_Mips_RegInfo); +} template <class ELFT> void MipsReginfoSection<ELFT>::writeTo(uint8_t *Buf) { if (!Config->Relocatable) @@ -253,22 +257,24 @@ MipsReginfoSection<ELFT> *MipsReginfoSection<ELFT>::create() { Elf_Mips_RegInfo Reginfo = {}; bool Create = false; - for (InputSectionBase<ELFT> *Sec : Symtab<ELFT>::X->Sections) { - if (!Sec->Live || Sec->Type != SHT_MIPS_REGINFO) + for (InputSectionBase *Sec : InputSections) { + if (Sec->Type != SHT_MIPS_REGINFO) continue; Sec->Live = false; Create = true; if (Sec->Data.size() != sizeof(Elf_Mips_RegInfo)) { - error(toString(Sec->getFile()) + ": invalid size of .reginfo section"); + error(toString(Sec->getFile<ELFT>()) + + ": invalid size of .reginfo section"); return nullptr; } auto *R = reinterpret_cast<const Elf_Mips_RegInfo *>(Sec->Data.data()); if (Config->Relocatable && R->ri_gp_value) - error(toString(Sec->getFile()) + ": unsupported non-zero ri_gp_value"); + error(toString(Sec->getFile<ELFT>()) + + ": unsupported non-zero ri_gp_value"); Reginfo.ri_gprmask |= R->ri_gprmask; - Sec->getFile()->MipsGp0 = R->ri_gp_value; + Sec->getFile<ELFT>()->MipsGp0 = R->ri_gp_value; }; if (Create) @@ -276,15 +282,25 @@ MipsReginfoSection<ELFT> *MipsReginfoSection<ELFT>::create() { return nullptr; } -template <class ELFT> InputSection<ELFT> *elf::createInterpSection() { - auto *Ret = make<InputSection<ELFT>>(SHF_ALLOC, SHT_PROGBITS, 1, - ArrayRef<uint8_t>(), ".interp"); - Ret->Live = true; - +InputSection *elf::createInterpSection() { // StringSaver guarantees that the returned string ends with '\0'. StringRef S = Saver.save(Config->DynamicLinker); - Ret->Data = {(const uint8_t *)S.data(), S.size() + 1}; - return Ret; + ArrayRef<uint8_t> Contents = {(const uint8_t *)S.data(), S.size() + 1}; + + auto *Sec = + make<InputSection>(SHF_ALLOC, SHT_PROGBITS, 1, Contents, ".interp"); + Sec->Live = true; + return Sec; +} + +template <class ELFT> +SymbolBody *elf::addSyntheticLocal(StringRef Name, uint8_t Type, uint64_t Value, + uint64_t Size, InputSectionBase *Section) { + auto *S = make<DefinedRegular>(Name, /*IsLocal*/ true, STV_DEFAULT, Type, + Value, Size, Section, nullptr); + if (In<ELFT>::SymTab) + In<ELFT>::SymTab->addSymbol(S); + return S; } static size_t getHashSize() { @@ -303,16 +319,15 @@ static size_t getHashSize() { } } -template <class ELFT> -BuildIdSection<ELFT>::BuildIdSection() - : SyntheticSection<ELFT>(SHF_ALLOC, SHT_NOTE, 1, ".note.gnu.build-id"), +BuildIdSection::BuildIdSection() + : SyntheticSection(SHF_ALLOC, SHT_NOTE, 1, ".note.gnu.build-id"), HashSize(getHashSize()) {} -template <class ELFT> void BuildIdSection<ELFT>::writeTo(uint8_t *Buf) { - const endianness E = ELFT::TargetEndianness; - write32<E>(Buf, 4); // Name size - write32<E>(Buf + 4, HashSize); // Content size - write32<E>(Buf + 8, NT_GNU_BUILD_ID); // Type +void BuildIdSection::writeTo(uint8_t *Buf) { + endianness E = Config->Endianness; + write32(Buf, 4, E); // Name size + write32(Buf + 4, HashSize, E); // Content size + write32(Buf + 8, NT_GNU_BUILD_ID, E); // Type memcpy(Buf + 12, "GNU", 4); // Name string HashBuf = Buf + 16; } @@ -334,23 +349,33 @@ static std::vector<ArrayRef<uint8_t>> split(ArrayRef<uint8_t> Arr, // In order to utilize multiple cores, we first split data into 1MB // chunks, compute a hash for each chunk, and then compute a hash value // of the hash values. -template <class ELFT> -void BuildIdSection<ELFT>::computeHash( +void BuildIdSection::computeHash( llvm::ArrayRef<uint8_t> Data, std::function<void(uint8_t *Dest, ArrayRef<uint8_t> Arr)> HashFn) { std::vector<ArrayRef<uint8_t>> Chunks = split(Data, 1024 * 1024); std::vector<uint8_t> Hashes(Chunks.size() * HashSize); // Compute hash values. - forLoop(0, Chunks.size(), - [&](size_t I) { HashFn(Hashes.data() + I * HashSize, Chunks[I]); }); + parallelFor(0, Chunks.size(), [&](size_t I) { + HashFn(Hashes.data() + I * HashSize, Chunks[I]); + }); // Write to the final output buffer. HashFn(HashBuf, Hashes); } -template <class ELFT> -void BuildIdSection<ELFT>::writeBuildId(ArrayRef<uint8_t> Buf) { +BssSection::BssSection(StringRef Name) + : SyntheticSection(SHF_ALLOC | SHF_WRITE, SHT_NOBITS, 0, Name) {} + +size_t BssSection::reserveSpace(uint64_t Size, uint32_t Alignment) { + if (OutSec) + OutSec->updateAlignment(Alignment); + this->Size = alignTo(this->Size, Alignment) + Size; + this->Alignment = std::max(this->Alignment, Alignment); + return this->Size - Size; +} + +void BuildIdSection::writeBuildId(ArrayRef<uint8_t> Buf) { switch (Config->BuildId) { case BuildIdKind::Fast: computeHash(Buf, [](uint8_t *Dest, ArrayRef<uint8_t> Arr) { @@ -380,9 +405,216 @@ void BuildIdSection<ELFT>::writeBuildId(ArrayRef<uint8_t> Buf) { } template <class ELFT> +EhFrameSection<ELFT>::EhFrameSection() + : SyntheticSection(SHF_ALLOC, SHT_PROGBITS, 1, ".eh_frame") {} + +// Search for an existing CIE record or create a new one. +// CIE records from input object files are uniquified by their contents +// and where their relocations point to. +template <class ELFT> +template <class RelTy> +CieRecord *EhFrameSection<ELFT>::addCie(EhSectionPiece &Piece, + ArrayRef<RelTy> Rels) { + auto *Sec = cast<EhInputSection>(Piece.ID); + const endianness E = ELFT::TargetEndianness; + if (read32<E>(Piece.data().data() + 4) != 0) + fatal(toString(Sec) + ": CIE expected at beginning of .eh_frame"); + + SymbolBody *Personality = nullptr; + unsigned FirstRelI = Piece.FirstRelocation; + if (FirstRelI != (unsigned)-1) + Personality = + &Sec->template getFile<ELFT>()->getRelocTargetSym(Rels[FirstRelI]); + + // Search for an existing CIE by CIE contents/relocation target pair. + CieRecord *Cie = &CieMap[{Piece.data(), Personality}]; + + // If not found, create a new one. + if (Cie->Piece == nullptr) { + Cie->Piece = &Piece; + Cies.push_back(Cie); + } + return Cie; +} + +// There is one FDE per function. Returns true if a given FDE +// points to a live function. +template <class ELFT> +template <class RelTy> +bool EhFrameSection<ELFT>::isFdeLive(EhSectionPiece &Piece, + ArrayRef<RelTy> Rels) { + auto *Sec = cast<EhInputSection>(Piece.ID); + unsigned FirstRelI = Piece.FirstRelocation; + if (FirstRelI == (unsigned)-1) + return false; + const RelTy &Rel = Rels[FirstRelI]; + SymbolBody &B = Sec->template getFile<ELFT>()->getRelocTargetSym(Rel); + auto *D = dyn_cast<DefinedRegular>(&B); + if (!D || !D->Section) + return false; + auto *Target = + cast<InputSectionBase>(cast<InputSectionBase>(D->Section)->Repl); + return Target && Target->Live; +} + +// .eh_frame is a sequence of CIE or FDE records. In general, there +// is one CIE record per input object file which is followed by +// a list of FDEs. This function searches an existing CIE or create a new +// one and associates FDEs to the CIE. +template <class ELFT> +template <class RelTy> +void EhFrameSection<ELFT>::addSectionAux(EhInputSection *Sec, + ArrayRef<RelTy> Rels) { + const endianness E = ELFT::TargetEndianness; + + DenseMap<size_t, CieRecord *> OffsetToCie; + for (EhSectionPiece &Piece : Sec->Pieces) { + // The empty record is the end marker. + if (Piece.size() == 4) + return; + + size_t Offset = Piece.InputOff; + uint32_t ID = read32<E>(Piece.data().data() + 4); + if (ID == 0) { + OffsetToCie[Offset] = addCie(Piece, Rels); + continue; + } + + uint32_t CieOffset = Offset + 4 - ID; + CieRecord *Cie = OffsetToCie[CieOffset]; + if (!Cie) + fatal(toString(Sec) + ": invalid CIE reference"); + + if (!isFdeLive(Piece, Rels)) + continue; + Cie->FdePieces.push_back(&Piece); + NumFdes++; + } +} + +template <class ELFT> +void EhFrameSection<ELFT>::addSection(InputSectionBase *C) { + auto *Sec = cast<EhInputSection>(C); + Sec->EHSec = this; + updateAlignment(Sec->Alignment); + Sections.push_back(Sec); + for (auto *DS : Sec->DependentSections) + DependentSections.push_back(DS); + + // .eh_frame is a sequence of CIE or FDE records. This function + // splits it into pieces so that we can call + // SplitInputSection::getSectionPiece on the section. + Sec->split<ELFT>(); + if (Sec->Pieces.empty()) + return; + + if (Sec->NumRelocations) { + if (Sec->AreRelocsRela) + addSectionAux(Sec, Sec->template relas<ELFT>()); + else + addSectionAux(Sec, Sec->template rels<ELFT>()); + return; + } + addSectionAux(Sec, makeArrayRef<Elf_Rela>(nullptr, nullptr)); +} + +template <class ELFT> +static void writeCieFde(uint8_t *Buf, ArrayRef<uint8_t> D) { + memcpy(Buf, D.data(), D.size()); + + // Fix the size field. -4 since size does not include the size field itself. + const endianness E = ELFT::TargetEndianness; + write32<E>(Buf, alignTo(D.size(), sizeof(typename ELFT::uint)) - 4); +} + +template <class ELFT> void EhFrameSection<ELFT>::finalizeContents() { + if (this->Size) + return; // Already finalized. + + size_t Off = 0; + for (CieRecord *Cie : Cies) { + Cie->Piece->OutputOff = Off; + Off += alignTo(Cie->Piece->size(), Config->Wordsize); + + for (EhSectionPiece *Fde : Cie->FdePieces) { + Fde->OutputOff = Off; + Off += alignTo(Fde->size(), Config->Wordsize); + } + } + this->Size = Off; +} + +template <class ELFT> static uint64_t readFdeAddr(uint8_t *Buf, int Size) { + const endianness E = ELFT::TargetEndianness; + switch (Size) { + case DW_EH_PE_udata2: + return read16<E>(Buf); + case DW_EH_PE_udata4: + return read32<E>(Buf); + case DW_EH_PE_udata8: + return read64<E>(Buf); + case DW_EH_PE_absptr: + if (ELFT::Is64Bits) + return read64<E>(Buf); + return read32<E>(Buf); + } + fatal("unknown FDE size encoding"); +} + +// Returns the VA to which a given FDE (on a mmap'ed buffer) is applied to. +// We need it to create .eh_frame_hdr section. +template <class ELFT> +uint64_t EhFrameSection<ELFT>::getFdePc(uint8_t *Buf, size_t FdeOff, + uint8_t Enc) { + // The starting address to which this FDE applies is + // stored at FDE + 8 byte. + size_t Off = FdeOff + 8; + uint64_t Addr = readFdeAddr<ELFT>(Buf + Off, Enc & 0x7); + if ((Enc & 0x70) == DW_EH_PE_absptr) + return Addr; + if ((Enc & 0x70) == DW_EH_PE_pcrel) + return Addr + this->OutSec->Addr + Off; + fatal("unknown FDE size relative encoding"); +} + +template <class ELFT> void EhFrameSection<ELFT>::writeTo(uint8_t *Buf) { + const endianness E = ELFT::TargetEndianness; + for (CieRecord *Cie : Cies) { + size_t CieOffset = Cie->Piece->OutputOff; + writeCieFde<ELFT>(Buf + CieOffset, Cie->Piece->data()); + + for (EhSectionPiece *Fde : Cie->FdePieces) { + size_t Off = Fde->OutputOff; + writeCieFde<ELFT>(Buf + Off, Fde->data()); + + // FDE's second word should have the offset to an associated CIE. + // Write it. + write32<E>(Buf + Off + 4, Off + 4 - CieOffset); + } + } + + for (EhInputSection *S : Sections) + S->template relocate<ELFT>(Buf, nullptr); + + // Construct .eh_frame_hdr. .eh_frame_hdr is a binary search table + // to get a FDE from an address to which FDE is applied. So here + // we obtain two addresses and pass them to EhFrameHdr object. + if (In<ELFT>::EhFrameHdr) { + for (CieRecord *Cie : Cies) { + uint8_t Enc = getFdeEncoding<ELFT>(Cie->Piece); + for (SectionPiece *Fde : Cie->FdePieces) { + uint64_t Pc = getFdePc(Buf, Fde->OutputOff, Enc); + uint64_t FdeVA = this->OutSec->Addr + Fde->OutputOff; + In<ELFT>::EhFrameHdr->addFde(Pc, FdeVA); + } + } + } +} + +template <class ELFT> GotSection<ELFT>::GotSection() - : SyntheticSection<ELFT>(SHF_ALLOC | SHF_WRITE, SHT_PROGBITS, - Target->GotEntrySize, ".got") {} + : SyntheticSection(SHF_ALLOC | SHF_WRITE, SHT_PROGBITS, + Target->GotEntrySize, ".got") {} template <class ELFT> void GotSection<ELFT>::addEntry(SymbolBody &Sym) { Sym.GotIndex = NumEntries; @@ -403,25 +635,23 @@ template <class ELFT> bool GotSection<ELFT>::addDynTlsEntry(SymbolBody &Sym) { template <class ELFT> bool GotSection<ELFT>::addTlsIndex() { if (TlsIndexOff != uint32_t(-1)) return false; - TlsIndexOff = NumEntries * sizeof(uintX_t); + TlsIndexOff = NumEntries * Config->Wordsize; NumEntries += 2; return true; } template <class ELFT> -typename GotSection<ELFT>::uintX_t -GotSection<ELFT>::getGlobalDynAddr(const SymbolBody &B) const { - return this->getVA() + B.GlobalDynIndex * sizeof(uintX_t); +uint64_t GotSection<ELFT>::getGlobalDynAddr(const SymbolBody &B) const { + return this->getVA() + B.GlobalDynIndex * Config->Wordsize; } template <class ELFT> -typename GotSection<ELFT>::uintX_t -GotSection<ELFT>::getGlobalDynOffset(const SymbolBody &B) const { - return B.GlobalDynIndex * sizeof(uintX_t); +uint64_t GotSection<ELFT>::getGlobalDynOffset(const SymbolBody &B) const { + return B.GlobalDynIndex * Config->Wordsize; } -template <class ELFT> void GotSection<ELFT>::finalize() { - Size = NumEntries * sizeof(uintX_t); +template <class ELFT> void GotSection<ELFT>::finalizeContents() { + Size = NumEntries * Config->Wordsize; } template <class ELFT> bool GotSection<ELFT>::empty() const { @@ -431,17 +661,14 @@ template <class ELFT> bool GotSection<ELFT>::empty() const { } template <class ELFT> void GotSection<ELFT>::writeTo(uint8_t *Buf) { - this->relocate(Buf, Buf + Size); + this->template relocate<ELFT>(Buf, Buf + Size); } -template <class ELFT> -MipsGotSection<ELFT>::MipsGotSection() - : SyntheticSection<ELFT>(SHF_ALLOC | SHF_WRITE | SHF_MIPS_GPREL, - SHT_PROGBITS, 16, ".got") {} +MipsGotSection::MipsGotSection() + : SyntheticSection(SHF_ALLOC | SHF_WRITE | SHF_MIPS_GPREL, SHT_PROGBITS, 16, + ".got") {} -template <class ELFT> -void MipsGotSection<ELFT>::addEntry(SymbolBody &Sym, uintX_t Addend, - RelExpr Expr) { +void MipsGotSection::addEntry(SymbolBody &Sym, int64_t Addend, RelExpr Expr) { // For "true" local symbols which can be referenced from the same module // only compiler creates two instructions for address loading: // @@ -472,7 +699,8 @@ void MipsGotSection<ELFT>::addEntry(SymbolBody &Sym, uintX_t Addend, // sections referenced by GOT relocations. Then later in the `finalize` // method calculate number of "pages" required to cover all saved output // section and allocate appropriate number of GOT entries. - PageIndexMap.insert({cast<DefinedRegular<ELFT>>(&Sym)->Section->OutSec, 0}); + auto *DefSym = cast<DefinedRegular>(&Sym); + PageIndexMap.insert({DefSym->Section->getOutputSection(), 0}); return; } if (Sym.isTls()) { @@ -483,7 +711,7 @@ void MipsGotSection<ELFT>::addEntry(SymbolBody &Sym, uintX_t Addend, TlsEntries.push_back(&Sym); return; } - auto AddEntry = [&](SymbolBody &S, uintX_t A, GotEntries &Items) { + auto AddEntry = [&](SymbolBody &S, uint64_t A, GotEntries &Items) { if (S.isInGot() && !A) return; size_t NewIndex = Items.size(); @@ -508,8 +736,7 @@ void MipsGotSection<ELFT>::addEntry(SymbolBody &Sym, uintX_t Addend, } } -template <class ELFT> -bool MipsGotSection<ELFT>::addDynTlsEntry(SymbolBody &Sym) { +bool MipsGotSection::addDynTlsEntry(SymbolBody &Sym) { if (Sym.GlobalDynIndex != -1U) return false; Sym.GlobalDynIndex = TlsEntries.size(); @@ -521,10 +748,10 @@ bool MipsGotSection<ELFT>::addDynTlsEntry(SymbolBody &Sym) { // Reserves TLS entries for a TLS module ID and a TLS block offset. // In total it takes two GOT slots. -template <class ELFT> bool MipsGotSection<ELFT>::addTlsIndex() { +bool MipsGotSection::addTlsIndex() { if (TlsIndexOff != uint32_t(-1)) return false; - TlsIndexOff = TlsEntries.size() * sizeof(uintX_t); + TlsIndexOff = TlsEntries.size() * Config->Wordsize; TlsEntries.push_back(nullptr); TlsEntries.push_back(nullptr); return true; @@ -538,25 +765,21 @@ static uint64_t getMipsPageCount(uint64_t Size) { return (Size + 0xfffe) / 0xffff + 1; } -template <class ELFT> -typename MipsGotSection<ELFT>::uintX_t -MipsGotSection<ELFT>::getPageEntryOffset(const SymbolBody &B, - uintX_t Addend) const { - const OutputSectionBase *OutSec = - cast<DefinedRegular<ELFT>>(&B)->Section->OutSec; - uintX_t SecAddr = getMipsPageAddr(OutSec->Addr); - uintX_t SymAddr = getMipsPageAddr(B.getVA<ELFT>(Addend)); - uintX_t Index = PageIndexMap.lookup(OutSec) + (SymAddr - SecAddr) / 0xffff; +uint64_t MipsGotSection::getPageEntryOffset(const SymbolBody &B, + int64_t Addend) const { + const OutputSection *OutSec = + cast<DefinedRegular>(&B)->Section->getOutputSection(); + uint64_t SecAddr = getMipsPageAddr(OutSec->Addr); + uint64_t SymAddr = getMipsPageAddr(B.getVA(Addend)); + uint64_t Index = PageIndexMap.lookup(OutSec) + (SymAddr - SecAddr) / 0xffff; assert(Index < PageEntriesNum); - return (HeaderEntriesNum + Index) * sizeof(uintX_t); + return (HeaderEntriesNum + Index) * Config->Wordsize; } -template <class ELFT> -typename MipsGotSection<ELFT>::uintX_t -MipsGotSection<ELFT>::getBodyEntryOffset(const SymbolBody &B, - uintX_t Addend) const { +uint64_t MipsGotSection::getBodyEntryOffset(const SymbolBody &B, + int64_t Addend) const { // Calculate offset of the GOT entries block: TLS, global, local. - uintX_t Index = HeaderEntriesNum + PageEntriesNum; + uint64_t Index = HeaderEntriesNum + PageEntriesNum; if (B.isTls()) Index += LocalEntries.size() + LocalEntries32.size() + GlobalEntries.size(); else if (B.IsInGlobalMipsGot) @@ -571,35 +794,33 @@ MipsGotSection<ELFT>::getBodyEntryOffset(const SymbolBody &B, assert(It != EntryIndexMap.end()); Index += It->second; } - return Index * sizeof(uintX_t); + return Index * Config->Wordsize; } -template <class ELFT> -typename MipsGotSection<ELFT>::uintX_t -MipsGotSection<ELFT>::getTlsOffset() const { - return (getLocalEntriesNum() + GlobalEntries.size()) * sizeof(uintX_t); +uint64_t MipsGotSection::getTlsOffset() const { + return (getLocalEntriesNum() + GlobalEntries.size()) * Config->Wordsize; } -template <class ELFT> -typename MipsGotSection<ELFT>::uintX_t -MipsGotSection<ELFT>::getGlobalDynOffset(const SymbolBody &B) const { - return B.GlobalDynIndex * sizeof(uintX_t); +uint64_t MipsGotSection::getGlobalDynOffset(const SymbolBody &B) const { + return B.GlobalDynIndex * Config->Wordsize; } -template <class ELFT> -const SymbolBody *MipsGotSection<ELFT>::getFirstGlobalEntry() const { +const SymbolBody *MipsGotSection::getFirstGlobalEntry() const { return GlobalEntries.empty() ? nullptr : GlobalEntries.front().first; } -template <class ELFT> -unsigned MipsGotSection<ELFT>::getLocalEntriesNum() const { +unsigned MipsGotSection::getLocalEntriesNum() const { return HeaderEntriesNum + PageEntriesNum + LocalEntries.size() + LocalEntries32.size(); } -template <class ELFT> void MipsGotSection<ELFT>::finalize() { +void MipsGotSection::finalizeContents() { + updateAllocSize(); +} + +void MipsGotSection::updateAllocSize() { PageEntriesNum = 0; - for (std::pair<const OutputSectionBase *, size_t> &P : PageIndexMap) { + for (std::pair<const OutputSection *, size_t> &P : PageIndexMap) { // For each output section referenced by GOT page relocations calculate // and save into PageIndexMap an upper bound of MIPS GOT entries required // to store page addresses of local symbols. We assume the worst case - @@ -610,27 +831,33 @@ template <class ELFT> void MipsGotSection<ELFT>::finalize() { PageEntriesNum += getMipsPageCount(P.first->Size); } Size = (getLocalEntriesNum() + GlobalEntries.size() + TlsEntries.size()) * - sizeof(uintX_t); + Config->Wordsize; } -template <class ELFT> bool MipsGotSection<ELFT>::empty() const { +bool MipsGotSection::empty() const { // We add the .got section to the result for dynamic MIPS target because // its address and properties are mentioned in the .dynamic section. return Config->Relocatable; } -template <class ELFT> -typename MipsGotSection<ELFT>::uintX_t MipsGotSection<ELFT>::getGp() const { - return ElfSym<ELFT>::MipsGp->template getVA<ELFT>(0); +uint64_t MipsGotSection::getGp() const { + return ElfSym::MipsGp->getVA(0); } -template <class ELFT> -static void writeUint(uint8_t *Buf, typename ELFT::uint Val) { - typedef typename ELFT::uint uintX_t; - write<uintX_t, ELFT::TargetEndianness, sizeof(uintX_t)>(Buf, Val); +static uint64_t readUint(uint8_t *Buf) { + if (Config->Is64) + return read64(Buf, Config->Endianness); + return read32(Buf, Config->Endianness); +} + +static void writeUint(uint8_t *Buf, uint64_t Val) { + if (Config->Is64) + write64(Buf, Val, Config->Endianness); + else + write32(Buf, Val, Config->Endianness); } -template <class ELFT> void MipsGotSection<ELFT>::writeTo(uint8_t *Buf) { +void MipsGotSection::writeTo(uint8_t *Buf) { // Set the MSB of the second GOT slot. This is not required by any // MIPS ABI documentation, though. // @@ -645,25 +872,24 @@ template <class ELFT> void MipsGotSection<ELFT>::writeTo(uint8_t *Buf) { // we've been doing this for years, it is probably a safe bet to // keep doing this for now. We really need to revisit this to see // if we had to do this. - auto *P = reinterpret_cast<typename ELFT::Off *>(Buf); - P[1] = uintX_t(1) << (ELFT::Is64Bits ? 63 : 31); - Buf += HeaderEntriesNum * sizeof(uintX_t); + writeUint(Buf + Config->Wordsize, (uint64_t)1 << (Config->Wordsize * 8 - 1)); + Buf += HeaderEntriesNum * Config->Wordsize; // Write 'page address' entries to the local part of the GOT. - for (std::pair<const OutputSectionBase *, size_t> &L : PageIndexMap) { + for (std::pair<const OutputSection *, size_t> &L : PageIndexMap) { size_t PageCount = getMipsPageCount(L.first->Size); - uintX_t FirstPageAddr = getMipsPageAddr(L.first->Addr); + uint64_t FirstPageAddr = getMipsPageAddr(L.first->Addr); for (size_t PI = 0; PI < PageCount; ++PI) { - uint8_t *Entry = Buf + (L.second + PI) * sizeof(uintX_t); - writeUint<ELFT>(Entry, FirstPageAddr + PI * 0x10000); + uint8_t *Entry = Buf + (L.second + PI) * Config->Wordsize; + writeUint(Entry, FirstPageAddr + PI * 0x10000); } } - Buf += PageEntriesNum * sizeof(uintX_t); + Buf += PageEntriesNum * Config->Wordsize; auto AddEntry = [&](const GotEntry &SA) { uint8_t *Entry = Buf; - Buf += sizeof(uintX_t); + Buf += Config->Wordsize; const SymbolBody *Body = SA.first; - uintX_t VA = Body->template getVA<ELFT>(SA.second); - writeUint<ELFT>(Entry, VA); + uint64_t VA = Body->getVA(SA.second); + writeUint(Entry, VA); }; std::for_each(std::begin(LocalEntries), std::end(LocalEntries), AddEntry); std::for_each(std::begin(LocalEntries32), std::end(LocalEntries32), AddEntry); @@ -674,86 +900,83 @@ template <class ELFT> void MipsGotSection<ELFT>::writeTo(uint8_t *Buf) { // for thread-local storage. // https://www.linux-mips.org/wiki/NPTL if (TlsIndexOff != -1U && !Config->Pic) - writeUint<ELFT>(Buf + TlsIndexOff, 1); + writeUint(Buf + TlsIndexOff, 1); for (const SymbolBody *B : TlsEntries) { if (!B || B->isPreemptible()) continue; - uintX_t VA = B->getVA<ELFT>(); + uint64_t VA = B->getVA(); if (B->GotIndex != -1U) { - uint8_t *Entry = Buf + B->GotIndex * sizeof(uintX_t); - writeUint<ELFT>(Entry, VA - 0x7000); + uint8_t *Entry = Buf + B->GotIndex * Config->Wordsize; + writeUint(Entry, VA - 0x7000); } if (B->GlobalDynIndex != -1U) { - uint8_t *Entry = Buf + B->GlobalDynIndex * sizeof(uintX_t); - writeUint<ELFT>(Entry, 1); - Entry += sizeof(uintX_t); - writeUint<ELFT>(Entry, VA - 0x8000); + uint8_t *Entry = Buf + B->GlobalDynIndex * Config->Wordsize; + writeUint(Entry, 1); + Entry += Config->Wordsize; + writeUint(Entry, VA - 0x8000); } } } -template <class ELFT> -GotPltSection<ELFT>::GotPltSection() - : SyntheticSection<ELFT>(SHF_ALLOC | SHF_WRITE, SHT_PROGBITS, - Target->GotPltEntrySize, ".got.plt") {} +GotPltSection::GotPltSection() + : SyntheticSection(SHF_ALLOC | SHF_WRITE, SHT_PROGBITS, + Target->GotPltEntrySize, ".got.plt") {} -template <class ELFT> void GotPltSection<ELFT>::addEntry(SymbolBody &Sym) { +void GotPltSection::addEntry(SymbolBody &Sym) { Sym.GotPltIndex = Target->GotPltHeaderEntriesNum + Entries.size(); Entries.push_back(&Sym); } -template <class ELFT> size_t GotPltSection<ELFT>::getSize() const { +size_t GotPltSection::getSize() const { return (Target->GotPltHeaderEntriesNum + Entries.size()) * Target->GotPltEntrySize; } -template <class ELFT> void GotPltSection<ELFT>::writeTo(uint8_t *Buf) { +void GotPltSection::writeTo(uint8_t *Buf) { Target->writeGotPltHeader(Buf); Buf += Target->GotPltHeaderEntriesNum * Target->GotPltEntrySize; for (const SymbolBody *B : Entries) { Target->writeGotPlt(Buf, *B); - Buf += sizeof(uintX_t); + Buf += Config->Wordsize; } } // On ARM the IgotPltSection is part of the GotSection, on other Targets it is // part of the .got.plt -template <class ELFT> -IgotPltSection<ELFT>::IgotPltSection() - : SyntheticSection<ELFT>(SHF_ALLOC | SHF_WRITE, SHT_PROGBITS, - Target->GotPltEntrySize, - Config->EMachine == EM_ARM ? ".got" : ".got.plt") { -} +IgotPltSection::IgotPltSection() + : SyntheticSection(SHF_ALLOC | SHF_WRITE, SHT_PROGBITS, + Target->GotPltEntrySize, + Config->EMachine == EM_ARM ? ".got" : ".got.plt") {} -template <class ELFT> void IgotPltSection<ELFT>::addEntry(SymbolBody &Sym) { +void IgotPltSection::addEntry(SymbolBody &Sym) { Sym.IsInIgot = true; Sym.GotPltIndex = Entries.size(); Entries.push_back(&Sym); } -template <class ELFT> size_t IgotPltSection<ELFT>::getSize() const { +size_t IgotPltSection::getSize() const { return Entries.size() * Target->GotPltEntrySize; } -template <class ELFT> void IgotPltSection<ELFT>::writeTo(uint8_t *Buf) { +void IgotPltSection::writeTo(uint8_t *Buf) { for (const SymbolBody *B : Entries) { Target->writeIgotPlt(Buf, *B); - Buf += sizeof(uintX_t); + Buf += Config->Wordsize; } } -template <class ELFT> -StringTableSection<ELFT>::StringTableSection(StringRef Name, bool Dynamic) - : SyntheticSection<ELFT>(Dynamic ? (uintX_t)SHF_ALLOC : 0, SHT_STRTAB, 1, - Name), - Dynamic(Dynamic) {} +StringTableSection::StringTableSection(StringRef Name, bool Dynamic) + : SyntheticSection(Dynamic ? (uint64_t)SHF_ALLOC : 0, SHT_STRTAB, 1, Name), + Dynamic(Dynamic) { + // ELF string tables start with a NUL byte. + addString(""); +} // Adds a string to the string table. If HashIt is true we hash and check for // duplicates. It is optional because the name of global symbols are already // uniqued and hashing them again has a big cost for a small value: uniquing // them with some other string that happens to be the same. -template <class ELFT> -unsigned StringTableSection<ELFT>::addString(StringRef S, bool HashIt) { +unsigned StringTableSection::addString(StringRef S, bool HashIt) { if (HashIt) { auto R = StringMap.insert(std::make_pair(S, this->Size)); if (!R.second) @@ -765,9 +988,7 @@ unsigned StringTableSection<ELFT>::addString(StringRef S, bool HashIt) { return Ret; } -template <class ELFT> void StringTableSection<ELFT>::writeTo(uint8_t *Buf) { - // ELF string tables start with NUL byte, so advance the pointer by one. - ++Buf; +void StringTableSection::writeTo(uint8_t *Buf) { for (StringRef S : Strings) { memcpy(Buf, S.data(), S.size()); Buf += S.size() + 1; @@ -781,9 +1002,10 @@ static unsigned getVerDefNum() { return Config->VersionDefinitions.size() + 1; } template <class ELFT> DynamicSection<ELFT>::DynamicSection() - : SyntheticSection<ELFT>(SHF_ALLOC | SHF_WRITE, SHT_DYNAMIC, - sizeof(uintX_t), ".dynamic") { + : SyntheticSection(SHF_ALLOC | SHF_WRITE, SHT_DYNAMIC, Config->Wordsize, + ".dynamic") { this->Entsize = ELFT::Is64Bits ? 16 : 8; + // .dynamic section is not writable on MIPS. // See "Special Section" in Chapter 4 in the following document: // ftp://www.linux-mips.org/pub/linux/mips/doc/ABI/mipsabi.pdf @@ -816,6 +1038,8 @@ template <class ELFT> void DynamicSection<ELFT>::addEntries() { DtFlags |= DF_SYMBOLIC; if (Config->ZNodelete) DtFlags1 |= DF_1_NODELETE; + if (Config->ZNodlopen) + DtFlags1 |= DF_1_NOOPEN; if (Config->ZNow) { DtFlags |= DF_BIND_NOW; DtFlags1 |= DF_1_NOW; @@ -835,17 +1059,17 @@ template <class ELFT> void DynamicSection<ELFT>::addEntries() { } // Add remaining entries to complete .dynamic contents. -template <class ELFT> void DynamicSection<ELFT>::finalize() { +template <class ELFT> void DynamicSection<ELFT>::finalizeContents() { if (this->Size) return; // Already finalized. this->Link = In<ELFT>::DynStrTab->OutSec->SectionIndex; if (In<ELFT>::RelaDyn->OutSec->Size > 0) { - bool IsRela = Config->Rela; + bool IsRela = Config->IsRela; add({IsRela ? DT_RELA : DT_REL, In<ELFT>::RelaDyn}); add({IsRela ? DT_RELASZ : DT_RELSZ, In<ELFT>::RelaDyn->OutSec->Size}); add({IsRela ? DT_RELAENT : DT_RELENT, - uintX_t(IsRela ? sizeof(Elf_Rela) : sizeof(Elf_Rel))}); + uint64_t(IsRela ? sizeof(Elf_Rela) : sizeof(Elf_Rel))}); // MIPS dynamic loader does not support RELCOUNT tag. // The problem is in the tight relation between dynamic @@ -861,29 +1085,31 @@ template <class ELFT> void DynamicSection<ELFT>::finalize() { add({DT_PLTRELSZ, In<ELFT>::RelaPlt->OutSec->Size}); add({Config->EMachine == EM_MIPS ? DT_MIPS_PLTGOT : DT_PLTGOT, In<ELFT>::GotPlt}); - add({DT_PLTREL, uint64_t(Config->Rela ? DT_RELA : DT_REL)}); + add({DT_PLTREL, uint64_t(Config->IsRela ? DT_RELA : DT_REL)}); } add({DT_SYMTAB, In<ELFT>::DynSymTab}); add({DT_SYMENT, sizeof(Elf_Sym)}); add({DT_STRTAB, In<ELFT>::DynStrTab}); add({DT_STRSZ, In<ELFT>::DynStrTab->getSize()}); + if (!Config->ZText) + add({DT_TEXTREL, (uint64_t)0}); if (In<ELFT>::GnuHashTab) add({DT_GNU_HASH, In<ELFT>::GnuHashTab}); if (In<ELFT>::HashTab) add({DT_HASH, In<ELFT>::HashTab}); - if (Out<ELFT>::PreinitArray) { - add({DT_PREINIT_ARRAY, Out<ELFT>::PreinitArray}); - add({DT_PREINIT_ARRAYSZ, Out<ELFT>::PreinitArray, Entry::SecSize}); + if (Out::PreinitArray) { + add({DT_PREINIT_ARRAY, Out::PreinitArray}); + add({DT_PREINIT_ARRAYSZ, Out::PreinitArray, Entry::SecSize}); } - if (Out<ELFT>::InitArray) { - add({DT_INIT_ARRAY, Out<ELFT>::InitArray}); - add({DT_INIT_ARRAYSZ, Out<ELFT>::InitArray, Entry::SecSize}); + if (Out::InitArray) { + add({DT_INIT_ARRAY, Out::InitArray}); + add({DT_INIT_ARRAYSZ, Out::InitArray, Entry::SecSize}); } - if (Out<ELFT>::FiniArray) { - add({DT_FINI_ARRAY, Out<ELFT>::FiniArray}); - add({DT_FINI_ARRAYSZ, Out<ELFT>::FiniArray, Entry::SecSize}); + if (Out::FiniArray) { + add({DT_FINI_ARRAY, Out::FiniArray}); + add({DT_FINI_ARRAYSZ, Out::FiniArray, Entry::SecSize}); } if (SymbolBody *B = Symtab<ELFT>::X->findInCurrentDSO(Config->Init)) @@ -918,7 +1144,6 @@ template <class ELFT> void DynamicSection<ELFT>::finalize() { add({DT_MIPS_RLD_MAP, In<ELFT>::MipsRldMap}); } - this->OutSec->Entsize = this->Entsize; this->OutSec->Link = this->Link; // +1 for DT_NULL @@ -941,7 +1166,7 @@ template <class ELFT> void DynamicSection<ELFT>::writeTo(uint8_t *Buf) { P->d_un.d_val = E.OutSec->Size; break; case Entry::SymAddr: - P->d_un.d_ptr = E.Sym->template getVA<ELFT>(); + P->d_un.d_ptr = E.Sym->getVA(); break; case Entry::PlainInt: P->d_un.d_val = E.Val; @@ -951,21 +1176,17 @@ template <class ELFT> void DynamicSection<ELFT>::writeTo(uint8_t *Buf) { } } -template <class ELFT> -typename ELFT::uint DynamicReloc<ELFT>::getOffset() const { - if (OutputSec) - return OutputSec->Addr + OffsetInSec; +uint64_t DynamicReloc::getOffset() const { return InputSec->OutSec->Addr + InputSec->getOffset(OffsetInSec); } -template <class ELFT> -typename ELFT::uint DynamicReloc<ELFT>::getAddend() const { +int64_t DynamicReloc::getAddend() const { if (UseSymVA) - return Sym->getVA<ELFT>(Addend); + return Sym->getVA(Addend); return Addend; } -template <class ELFT> uint32_t DynamicReloc<ELFT>::getSymIndex() const { +uint32_t DynamicReloc::getSymIndex() const { if (Sym && !UseSymVA) return Sym->DynsymIndex; return 0; @@ -973,14 +1194,14 @@ template <class ELFT> uint32_t DynamicReloc<ELFT>::getSymIndex() const { template <class ELFT> RelocationSection<ELFT>::RelocationSection(StringRef Name, bool Sort) - : SyntheticSection<ELFT>(SHF_ALLOC, Config->Rela ? SHT_RELA : SHT_REL, - sizeof(uintX_t), Name), + : SyntheticSection(SHF_ALLOC, Config->IsRela ? SHT_RELA : SHT_REL, + Config->Wordsize, Name), Sort(Sort) { - this->Entsize = Config->Rela ? sizeof(Elf_Rela) : sizeof(Elf_Rel); + this->Entsize = Config->IsRela ? sizeof(Elf_Rela) : sizeof(Elf_Rel); } template <class ELFT> -void RelocationSection<ELFT>::addReloc(const DynamicReloc<ELFT> &Reloc) { +void RelocationSection<ELFT>::addReloc(const DynamicReloc &Reloc) { if (Reloc.Type == Target->RelativeRel) ++NumRelativeRelocs; Relocs.push_back(Reloc); @@ -988,21 +1209,21 @@ void RelocationSection<ELFT>::addReloc(const DynamicReloc<ELFT> &Reloc) { template <class ELFT, class RelTy> static bool compRelocations(const RelTy &A, const RelTy &B) { - bool AIsRel = A.getType(Config->Mips64EL) == Target->RelativeRel; - bool BIsRel = B.getType(Config->Mips64EL) == Target->RelativeRel; + bool AIsRel = A.getType(Config->IsMips64EL) == Target->RelativeRel; + bool BIsRel = B.getType(Config->IsMips64EL) == Target->RelativeRel; if (AIsRel != BIsRel) return AIsRel; - return A.getSymbol(Config->Mips64EL) < B.getSymbol(Config->Mips64EL); + return A.getSymbol(Config->IsMips64EL) < B.getSymbol(Config->IsMips64EL); } template <class ELFT> void RelocationSection<ELFT>::writeTo(uint8_t *Buf) { uint8_t *BufBegin = Buf; - for (const DynamicReloc<ELFT> &Rel : Relocs) { + for (const DynamicReloc &Rel : Relocs) { auto *P = reinterpret_cast<Elf_Rela *>(Buf); - Buf += Config->Rela ? sizeof(Elf_Rela) : sizeof(Elf_Rel); + Buf += Config->IsRela ? sizeof(Elf_Rela) : sizeof(Elf_Rel); - if (Config->Rela) + if (Config->IsRela) P->r_addend = Rel.getAddend(); P->r_offset = Rel.getOffset(); if (Config->EMachine == EM_MIPS && Rel.getInputSec() == In<ELFT>::MipsGot) @@ -1010,11 +1231,11 @@ template <class ELFT> void RelocationSection<ELFT>::writeTo(uint8_t *Buf) { // allocated in the end of the GOT. We need to adjust the offset to take // in account 'local' and 'global' GOT entries. P->r_offset += In<ELFT>::MipsGot->getTlsOffset(); - P->setSymbolAndType(Rel.getSymIndex(), Rel.Type, Config->Mips64EL); + P->setSymbolAndType(Rel.getSymIndex(), Rel.Type, Config->IsMips64EL); } if (Sort) { - if (Config->Rela) + if (Config->IsRela) std::stable_sort((Elf_Rela *)BufBegin, (Elf_Rela *)BufBegin + Relocs.size(), compRelocations<ELFT, Elf_Rela>); @@ -1028,22 +1249,20 @@ template <class ELFT> unsigned RelocationSection<ELFT>::getRelocOffset() { return this->Entsize * Relocs.size(); } -template <class ELFT> void RelocationSection<ELFT>::finalize() { +template <class ELFT> void RelocationSection<ELFT>::finalizeContents() { this->Link = In<ELFT>::DynSymTab ? In<ELFT>::DynSymTab->OutSec->SectionIndex : In<ELFT>::SymTab->OutSec->SectionIndex; // Set required output section properties. this->OutSec->Link = this->Link; - this->OutSec->Entsize = this->Entsize; } template <class ELFT> -SymbolTableSection<ELFT>::SymbolTableSection( - StringTableSection<ELFT> &StrTabSec) - : SyntheticSection<ELFT>(StrTabSec.isDynamic() ? (uintX_t)SHF_ALLOC : 0, - StrTabSec.isDynamic() ? SHT_DYNSYM : SHT_SYMTAB, - sizeof(uintX_t), - StrTabSec.isDynamic() ? ".dynsym" : ".symtab"), +SymbolTableSection<ELFT>::SymbolTableSection(StringTableSection &StrTabSec) + : SyntheticSection(StrTabSec.isDynamic() ? (uint64_t)SHF_ALLOC : 0, + StrTabSec.isDynamic() ? SHT_DYNSYM : SHT_SYMTAB, + Config->Wordsize, + StrTabSec.isDynamic() ? ".dynsym" : ".symtab"), StrTabSec(StrTabSec) { this->Entsize = sizeof(Elf_Sym); } @@ -1053,289 +1272,257 @@ SymbolTableSection<ELFT>::SymbolTableSection( // See "Global Offset Table" in Chapter 5 in the following document // for detailed description: // ftp://www.linux-mips.org/pub/linux/mips/doc/ABI/mipsabi.pdf -static bool sortMipsSymbols(const SymbolBody *L, const SymbolBody *R) { +static bool sortMipsSymbols(const SymbolTableEntry &L, + const SymbolTableEntry &R) { // Sort entries related to non-local preemptible symbols by GOT indexes. // All other entries go to the first part of GOT in arbitrary order. - bool LIsInLocalGot = !L->IsInGlobalMipsGot; - bool RIsInLocalGot = !R->IsInGlobalMipsGot; + bool LIsInLocalGot = !L.Symbol->IsInGlobalMipsGot; + bool RIsInLocalGot = !R.Symbol->IsInGlobalMipsGot; if (LIsInLocalGot || RIsInLocalGot) return !RIsInLocalGot; - return L->GotIndex < R->GotIndex; -} - -template <class ELFT> void SymbolTableSection<ELFT>::finalize() { - this->OutSec->Link = this->Link = StrTabSec.OutSec->SectionIndex; - this->OutSec->Info = this->Info = NumLocals + 1; - this->OutSec->Entsize = this->Entsize; - - if (Config->Relocatable) - return; + return L.Symbol->GotIndex < R.Symbol->GotIndex; +} + +// Finalize a symbol table. The ELF spec requires that all local +// symbols precede global symbols, so we sort symbol entries in this +// function. (For .dynsym, we don't do that because symbols for +// dynamic linking are inherently all globals.) +template <class ELFT> void SymbolTableSection<ELFT>::finalizeContents() { + this->OutSec->Link = StrTabSec.OutSec->SectionIndex; + + // If it is a .dynsym, there should be no local symbols, but we need + // to do a few things for the dynamic linker. + if (this->Type == SHT_DYNSYM) { + // Section's Info field has the index of the first non-local symbol. + // Because the first symbol entry is a null entry, 1 is the first. + this->OutSec->Info = 1; + + if (In<ELFT>::GnuHashTab) { + // NB: It also sorts Symbols to meet the GNU hash table requirements. + In<ELFT>::GnuHashTab->addSymbols(Symbols); + } else if (Config->EMachine == EM_MIPS) { + std::stable_sort(Symbols.begin(), Symbols.end(), sortMipsSymbols); + } - if (!StrTabSec.isDynamic()) { - auto GlobBegin = Symbols.begin() + NumLocals; - auto It = std::stable_partition( - GlobBegin, Symbols.end(), [](const SymbolTableEntry &S) { - return S.Symbol->symbol()->computeBinding() == STB_LOCAL; - }); - // update sh_info with number of Global symbols output with computed - // binding of STB_LOCAL - this->OutSec->Info = this->Info = 1 + It - Symbols.begin(); + size_t I = 0; + for (const SymbolTableEntry &S : Symbols) + S.Symbol->DynsymIndex = ++I; return; } - - if (In<ELFT>::GnuHashTab) - // NB: It also sorts Symbols to meet the GNU hash table requirements. - In<ELFT>::GnuHashTab->addSymbols(Symbols); - else if (Config->EMachine == EM_MIPS) - std::stable_sort(Symbols.begin(), Symbols.end(), - [](const SymbolTableEntry &L, const SymbolTableEntry &R) { - return sortMipsSymbols(L.Symbol, R.Symbol); - }); - size_t I = 0; - for (const SymbolTableEntry &S : Symbols) - S.Symbol->DynsymIndex = ++I; } -template <class ELFT> void SymbolTableSection<ELFT>::addGlobal(SymbolBody *B) { - Symbols.push_back({B, StrTabSec.addString(B->getName(), false)}); +template <class ELFT> void SymbolTableSection<ELFT>::postThunkContents() { + if (this->Type == SHT_DYNSYM) + return; + // move all local symbols before global symbols. + auto It = std::stable_partition( + Symbols.begin(), Symbols.end(), [](const SymbolTableEntry &S) { + return S.Symbol->isLocal() || + S.Symbol->symbol()->computeBinding() == STB_LOCAL; + }); + size_t NumLocals = It - Symbols.begin(); + this->OutSec->Info = NumLocals + 1; } -template <class ELFT> void SymbolTableSection<ELFT>::addLocal(SymbolBody *B) { - assert(!StrTabSec.isDynamic()); - ++NumLocals; - Symbols.push_back({B, StrTabSec.addString(B->getName())}); +template <class ELFT> void SymbolTableSection<ELFT>::addSymbol(SymbolBody *B) { + // Adding a local symbol to a .dynsym is a bug. + assert(this->Type != SHT_DYNSYM || !B->isLocal()); + + bool HashIt = B->isLocal(); + Symbols.push_back({B, StrTabSec.addString(B->getName(), HashIt)}); } template <class ELFT> size_t SymbolTableSection<ELFT>::getSymbolIndex(SymbolBody *Body) { - auto I = llvm::find_if( - Symbols, [&](const SymbolTableEntry &E) { return E.Symbol == Body; }); + auto I = llvm::find_if(Symbols, [&](const SymbolTableEntry &E) { + if (E.Symbol == Body) + return true; + // This is used for -r, so we have to handle multiple section + // symbols being combined. + if (Body->Type == STT_SECTION && E.Symbol->Type == STT_SECTION) + return cast<DefinedRegular>(Body)->Section->getOutputSection() == + cast<DefinedRegular>(E.Symbol)->Section->getOutputSection(); + return false; + }); if (I == Symbols.end()) return 0; return I - Symbols.begin() + 1; } +// Write the internal symbol table contents to the output symbol table. template <class ELFT> void SymbolTableSection<ELFT>::writeTo(uint8_t *Buf) { + // The first entry is a null entry as per the ELF spec. Buf += sizeof(Elf_Sym); - // All symbols with STB_LOCAL binding precede the weak and global symbols. - // .dynsym only contains global symbols. - if (Config->Discard != DiscardPolicy::All && !StrTabSec.isDynamic()) - writeLocalSymbols(Buf); - - writeGlobalSymbols(Buf); -} - -template <class ELFT> -void SymbolTableSection<ELFT>::writeLocalSymbols(uint8_t *&Buf) { - // Iterate over all input object files to copy their local symbols - // to the output symbol table pointed by Buf. + auto *ESym = reinterpret_cast<Elf_Sym *>(Buf); - for (auto I = Symbols.begin(); I != Symbols.begin() + NumLocals; ++I) { - const DefinedRegular<ELFT> &Body = *cast<DefinedRegular<ELFT>>(I->Symbol); - InputSectionBase<ELFT> *Section = Body.Section; - auto *ESym = reinterpret_cast<Elf_Sym *>(Buf); + for (SymbolTableEntry &Ent : Symbols) { + SymbolBody *Body = Ent.Symbol; - if (!Section) { - ESym->st_shndx = SHN_ABS; - ESym->st_value = Body.Value; + // Set st_info and st_other. + if (Body->isLocal()) { + ESym->setBindingAndType(STB_LOCAL, Body->Type); } else { - const OutputSectionBase *OutSec = Section->OutSec; - ESym->st_shndx = OutSec->SectionIndex; - ESym->st_value = OutSec->Addr + Section->getOffset(Body); + ESym->setBindingAndType(Body->symbol()->computeBinding(), Body->Type); + ESym->setVisibility(Body->symbol()->Visibility); } - ESym->st_name = I->StrTabOffset; - ESym->st_size = Body.template getSize<ELFT>(); - ESym->setBindingAndType(STB_LOCAL, Body.Type); - Buf += sizeof(*ESym); - } -} -template <class ELFT> -void SymbolTableSection<ELFT>::writeGlobalSymbols(uint8_t *Buf) { - // Write the internal symbol table contents to the output symbol table - // pointed by Buf. - auto *ESym = reinterpret_cast<Elf_Sym *>(Buf); + ESym->st_name = Ent.StrTabOffset; + ESym->st_size = Body->getSize<ELFT>(); - for (auto I = Symbols.begin() + NumLocals; I != Symbols.end(); ++I) { - const SymbolTableEntry &S = *I; - SymbolBody *Body = S.Symbol; - size_t StrOff = S.StrTabOffset; - - uint8_t Type = Body->Type; - uintX_t Size = Body->getSize<ELFT>(); - - ESym->setBindingAndType(Body->symbol()->computeBinding(), Type); - ESym->st_size = Size; - ESym->st_name = StrOff; - ESym->setVisibility(Body->symbol()->Visibility); - ESym->st_value = Body->getVA<ELFT>(); - - if (const OutputSectionBase *OutSec = getOutputSection(Body)) { + // Set a section index. + if (const OutputSection *OutSec = Body->getOutputSection()) ESym->st_shndx = OutSec->SectionIndex; - } else if (isa<DefinedRegular<ELFT>>(Body)) { + else if (isa<DefinedRegular>(Body)) ESym->st_shndx = SHN_ABS; - } else if (isa<DefinedCommon>(Body)) { + else if (isa<DefinedCommon>(Body)) ESym->st_shndx = SHN_COMMON; + + // st_value is usually an address of a symbol, but that has a + // special meaining for uninstantiated common symbols (this can + // occur if -r is given). + if (!Config->DefineCommon && isa<DefinedCommon>(Body)) ESym->st_value = cast<DefinedCommon>(Body)->Alignment; - } + else + ESym->st_value = Body->getVA(); - if (Config->EMachine == EM_MIPS) { - // On MIPS we need to mark symbol which has a PLT entry and requires - // pointer equality by STO_MIPS_PLT flag. That is necessary to help - // dynamic linker distinguish such symbols and MIPS lazy-binding stubs. - // https://sourceware.org/ml/binutils/2008-07/txt00000.txt - if (Body->isInPlt() && Body->NeedsCopyOrPltAddr) - ESym->st_other |= STO_MIPS_PLT; - if (Config->Relocatable) { - auto *D = dyn_cast<DefinedRegular<ELFT>>(Body); - if (D && D->isMipsPIC()) - ESym->st_other |= STO_MIPS_PIC; - } - } ++ESym; } -} -template <class ELFT> -const OutputSectionBase * -SymbolTableSection<ELFT>::getOutputSection(SymbolBody *Sym) { - switch (Sym->kind()) { - case SymbolBody::DefinedSyntheticKind: - return cast<DefinedSynthetic>(Sym)->Section; - case SymbolBody::DefinedRegularKind: { - auto &D = cast<DefinedRegular<ELFT>>(*Sym); - if (D.Section) - return D.Section->OutSec; - break; - } - case SymbolBody::DefinedCommonKind: - if (!Config->DefineCommon) - return nullptr; - return In<ELFT>::Common->OutSec; - case SymbolBody::SharedKind: { - auto &SS = cast<SharedSymbol<ELFT>>(*Sym); - if (SS.needsCopy()) - return SS.getBssSectionForCopy(); - break; - } - case SymbolBody::UndefinedKind: - case SymbolBody::LazyArchiveKind: - case SymbolBody::LazyObjectKind: - break; + // On MIPS we need to mark symbol which has a PLT entry and requires + // pointer equality by STO_MIPS_PLT flag. That is necessary to help + // dynamic linker distinguish such symbols and MIPS lazy-binding stubs. + // https://sourceware.org/ml/binutils/2008-07/txt00000.txt + if (Config->EMachine == EM_MIPS) { + auto *ESym = reinterpret_cast<Elf_Sym *>(Buf); + + for (SymbolTableEntry &Ent : Symbols) { + SymbolBody *Body = Ent.Symbol; + if (Body->isInPlt() && Body->NeedsPltAddr) + ESym->st_other |= STO_MIPS_PLT; + + if (Config->Relocatable) + if (auto *D = dyn_cast<DefinedRegular>(Body)) + if (D->isMipsPIC<ELFT>()) + ESym->st_other |= STO_MIPS_PIC; + ++ESym; + } } - return nullptr; } +// .hash and .gnu.hash sections contain on-disk hash tables that map +// symbol names to their dynamic symbol table indices. Their purpose +// is to help the dynamic linker resolve symbols quickly. If ELF files +// don't have them, the dynamic linker has to do linear search on all +// dynamic symbols, which makes programs slower. Therefore, a .hash +// section is added to a DSO by default. A .gnu.hash is added if you +// give the -hash-style=gnu or -hash-style=both option. +// +// The Unix semantics of resolving dynamic symbols is somewhat expensive. +// Each ELF file has a list of DSOs that the ELF file depends on and a +// list of dynamic symbols that need to be resolved from any of the +// DSOs. That means resolving all dynamic symbols takes O(m)*O(n) +// where m is the number of DSOs and n is the number of dynamic +// symbols. For modern large programs, both m and n are large. So +// making each step faster by using hash tables substiantially +// improves time to load programs. +// +// (Note that this is not the only way to design the shared library. +// For instance, the Windows DLL takes a different approach. On +// Windows, each dynamic symbol has a name of DLL from which the symbol +// has to be resolved. That makes the cost of symbol resolution O(n). +// This disables some hacky techniques you can use on Unix such as +// LD_PRELOAD, but this is arguably better semantics than the Unix ones.) +// +// Due to historical reasons, we have two different hash tables, .hash +// and .gnu.hash. They are for the same purpose, and .gnu.hash is a new +// and better version of .hash. .hash is just an on-disk hash table, but +// .gnu.hash has a bloom filter in addition to a hash table to skip +// DSOs very quickly. If you are sure that your dynamic linker knows +// about .gnu.hash, you want to specify -hash-style=gnu. Otherwise, a +// safe bet is to specify -hash-style=both for backward compatibilty. template <class ELFT> GnuHashTableSection<ELFT>::GnuHashTableSection() - : SyntheticSection<ELFT>(SHF_ALLOC, SHT_GNU_HASH, sizeof(uintX_t), - ".gnu.hash") { - this->Entsize = ELFT::Is64Bits ? 0 : 4; + : SyntheticSection(SHF_ALLOC, SHT_GNU_HASH, Config->Wordsize, ".gnu.hash") { } -template <class ELFT> -unsigned GnuHashTableSection<ELFT>::calcNBuckets(unsigned NumHashed) { - if (!NumHashed) - return 0; - - // These values are prime numbers which are not greater than 2^(N-1) + 1. - // In result, for any particular NumHashed we return a prime number - // which is not greater than NumHashed. - static const unsigned Primes[] = { - 1, 1, 3, 3, 7, 13, 31, 61, 127, 251, - 509, 1021, 2039, 4093, 8191, 16381, 32749, 65521, 131071}; - - return Primes[std::min<unsigned>(Log2_32_Ceil(NumHashed), - array_lengthof(Primes) - 1)]; -} - -// Bloom filter estimation: at least 8 bits for each hashed symbol. -// GNU Hash table requirement: it should be a power of 2, -// the minimum value is 1, even for an empty table. -// Expected results for a 32-bit target: -// calcMaskWords(0..4) = 1 -// calcMaskWords(5..8) = 2 -// calcMaskWords(9..16) = 4 -// For a 64-bit target: -// calcMaskWords(0..8) = 1 -// calcMaskWords(9..16) = 2 -// calcMaskWords(17..32) = 4 -template <class ELFT> -unsigned GnuHashTableSection<ELFT>::calcMaskWords(unsigned NumHashed) { - if (!NumHashed) - return 1; - return NextPowerOf2((NumHashed - 1) / sizeof(Elf_Off)); -} +template <class ELFT> void GnuHashTableSection<ELFT>::finalizeContents() { + this->OutSec->Link = In<ELFT>::DynSymTab->OutSec->SectionIndex; -template <class ELFT> void GnuHashTableSection<ELFT>::finalize() { - unsigned NumHashed = Symbols.size(); - NBuckets = calcNBuckets(NumHashed); - MaskWords = calcMaskWords(NumHashed); - // Second hash shift estimation: just predefined values. - Shift2 = ELFT::Is64Bits ? 6 : 5; + // Computes bloom filter size in word size. We want to allocate 8 + // bits for each symbol. It must be a power of two. + if (Symbols.empty()) + MaskWords = 1; + else + MaskWords = NextPowerOf2((Symbols.size() - 1) / Config->Wordsize); - this->OutSec->Entsize = this->Entsize; - this->OutSec->Link = this->Link = In<ELFT>::DynSymTab->OutSec->SectionIndex; - this->Size = sizeof(Elf_Word) * 4 // Header - + sizeof(Elf_Off) * MaskWords // Bloom Filter - + sizeof(Elf_Word) * NBuckets // Hash Buckets - + sizeof(Elf_Word) * NumHashed; // Hash Values + Size = 16; // Header + Size += Config->Wordsize * MaskWords; // Bloom filter + Size += NBuckets * 4; // Hash buckets + Size += Symbols.size() * 4; // Hash values } -template <class ELFT> void GnuHashTableSection<ELFT>::writeTo(uint8_t *Buf) { - writeHeader(Buf); - if (Symbols.empty()) - return; +template <class ELFT> +void GnuHashTableSection<ELFT>::writeTo(uint8_t *Buf) { + // Write a header. + write32(Buf, NBuckets, Config->Endianness); + write32(Buf + 4, In<ELFT>::DynSymTab->getNumSymbols() - Symbols.size(), + Config->Endianness); + write32(Buf + 8, MaskWords, Config->Endianness); + write32(Buf + 12, getShift2(), Config->Endianness); + Buf += 16; + + // Write a bloom filter and a hash table. writeBloomFilter(Buf); + Buf += Config->Wordsize * MaskWords; writeHashTable(Buf); } +// This function writes a 2-bit bloom filter. This bloom filter alone +// usually filters out 80% or more of all symbol lookups [1]. +// The dynamic linker uses the hash table only when a symbol is not +// filtered out by a bloom filter. +// +// [1] Ulrich Drepper (2011), "How To Write Shared Libraries" (Ver. 4.1.2), +// p.9, https://www.akkadia.org/drepper/dsohowto.pdf template <class ELFT> -void GnuHashTableSection<ELFT>::writeHeader(uint8_t *&Buf) { - auto *P = reinterpret_cast<Elf_Word *>(Buf); - *P++ = NBuckets; - *P++ = In<ELFT>::DynSymTab->getNumSymbols() - Symbols.size(); - *P++ = MaskWords; - *P++ = Shift2; - Buf = reinterpret_cast<uint8_t *>(P); -} - -template <class ELFT> -void GnuHashTableSection<ELFT>::writeBloomFilter(uint8_t *&Buf) { - unsigned C = sizeof(Elf_Off) * 8; - - auto *Masks = reinterpret_cast<Elf_Off *>(Buf); - for (const SymbolData &Sym : Symbols) { - size_t Pos = (Sym.Hash / C) & (MaskWords - 1); - uintX_t V = (uintX_t(1) << (Sym.Hash % C)) | - (uintX_t(1) << ((Sym.Hash >> Shift2) % C)); - Masks[Pos] |= V; +void GnuHashTableSection<ELFT>::writeBloomFilter(uint8_t *Buf) { + const unsigned C = Config->Wordsize * 8; + for (const Entry &Sym : Symbols) { + size_t I = (Sym.Hash / C) & (MaskWords - 1); + uint64_t Val = readUint(Buf + I * Config->Wordsize); + Val |= uint64_t(1) << (Sym.Hash % C); + Val |= uint64_t(1) << ((Sym.Hash >> getShift2()) % C); + writeUint(Buf + I * Config->Wordsize, Val); } - Buf += sizeof(Elf_Off) * MaskWords; } template <class ELFT> void GnuHashTableSection<ELFT>::writeHashTable(uint8_t *Buf) { - Elf_Word *Buckets = reinterpret_cast<Elf_Word *>(Buf); - Elf_Word *Values = Buckets + NBuckets; - - int PrevBucket = -1; - int I = 0; - for (const SymbolData &Sym : Symbols) { - int Bucket = Sym.Hash % NBuckets; - assert(PrevBucket <= Bucket); - if (Bucket != PrevBucket) { - Buckets[Bucket] = Sym.Body->DynsymIndex; - PrevBucket = Bucket; - if (I > 0) - Values[I - 1] |= 1; - } - Values[I] = Sym.Hash & ~1; - ++I; + // Group symbols by hash value. + std::vector<std::vector<Entry>> Syms(NBuckets); + for (const Entry &Ent : Symbols) + Syms[Ent.Hash % NBuckets].push_back(Ent); + + // Write hash buckets. Hash buckets contain indices in the following + // hash value table. + uint32_t *Buckets = reinterpret_cast<uint32_t *>(Buf); + for (size_t I = 0; I < NBuckets; ++I) + if (!Syms[I].empty()) + write32(Buckets + I, Syms[I][0].Body->DynsymIndex, Config->Endianness); + + // Write a hash value table. It represents a sequence of chains that + // share the same hash modulo value. The last element of each chain + // is terminated by LSB 1. + uint32_t *Values = Buckets + NBuckets; + size_t I = 0; + for (std::vector<Entry> &Vec : Syms) { + if (Vec.empty()) + continue; + for (const Entry &Ent : makeArrayRef(Vec).drop_back()) + write32(Values + I++, Ent.Hash & ~1, Config->Endianness); + write32(Values + I++, Vec.back().Hash | 1, Config->Endianness); } - if (I > 0) - Values[I - 1] |= 1; } static uint32_t hashGnu(StringRef Name) { @@ -1345,45 +1532,60 @@ static uint32_t hashGnu(StringRef Name) { return H; } +// Returns a number of hash buckets to accomodate given number of elements. +// We want to choose a moderate number that is not too small (which +// causes too many hash collisions) and not too large (which wastes +// disk space.) +// +// We return a prime number because it (is believed to) achieve good +// hash distribution. +static size_t getBucketSize(size_t NumSymbols) { + // List of largest prime numbers that are not greater than 2^n + 1. + for (size_t N : {131071, 65521, 32749, 16381, 8191, 4093, 2039, 1021, 509, + 251, 127, 61, 31, 13, 7, 3, 1}) + if (N <= NumSymbols) + return N; + return 0; +} + // Add symbols to this symbol hash table. Note that this function // destructively sort a given vector -- which is needed because // GNU-style hash table places some sorting requirements. template <class ELFT> void GnuHashTableSection<ELFT>::addSymbols(std::vector<SymbolTableEntry> &V) { - // Ideally this will just be 'auto' but GCC 6.1 is not able - // to deduce it correctly. + // We cannot use 'auto' for Mid because GCC 6.1 cannot deduce + // its type correctly. std::vector<SymbolTableEntry>::iterator Mid = std::stable_partition(V.begin(), V.end(), [](const SymbolTableEntry &S) { return S.Symbol->isUndefined(); }); if (Mid == V.end()) return; - for (auto I = Mid, E = V.end(); I != E; ++I) { - SymbolBody *B = I->Symbol; - size_t StrOff = I->StrTabOffset; - Symbols.push_back({B, StrOff, hashGnu(B->getName())}); + + for (SymbolTableEntry &Ent : llvm::make_range(Mid, V.end())) { + SymbolBody *B = Ent.Symbol; + Symbols.push_back({B, Ent.StrTabOffset, hashGnu(B->getName())}); } - unsigned NBuckets = calcNBuckets(Symbols.size()); + NBuckets = getBucketSize(Symbols.size()); std::stable_sort(Symbols.begin(), Symbols.end(), - [&](const SymbolData &L, const SymbolData &R) { + [&](const Entry &L, const Entry &R) { return L.Hash % NBuckets < R.Hash % NBuckets; }); V.erase(Mid, V.end()); - for (const SymbolData &Sym : Symbols) - V.push_back({Sym.Body, Sym.STName}); + for (const Entry &Ent : Symbols) + V.push_back({Ent.Body, Ent.StrTabOffset}); } template <class ELFT> HashTableSection<ELFT>::HashTableSection() - : SyntheticSection<ELFT>(SHF_ALLOC, SHT_HASH, sizeof(Elf_Word), ".hash") { - this->Entsize = sizeof(Elf_Word); + : SyntheticSection(SHF_ALLOC, SHT_HASH, 4, ".hash") { + this->Entsize = 4; } -template <class ELFT> void HashTableSection<ELFT>::finalize() { - this->OutSec->Link = this->Link = In<ELFT>::DynSymTab->OutSec->SectionIndex; - this->OutSec->Entsize = this->Entsize; +template <class ELFT> void HashTableSection<ELFT>::finalizeContents() { + this->OutSec->Link = In<ELFT>::DynSymTab->OutSec->SectionIndex; unsigned NumEntries = 2; // nbucket and nchain. NumEntries += In<ELFT>::DynSymTab->getNumSymbols(); // The chain entries. @@ -1392,11 +1594,15 @@ template <class ELFT> void HashTableSection<ELFT>::finalize() { // FIXME: This is simplistic. We can try to optimize it, but implementing // support for SHT_GNU_HASH is probably even more profitable. NumEntries += In<ELFT>::DynSymTab->getNumSymbols(); - this->Size = NumEntries * sizeof(Elf_Word); + this->Size = NumEntries * 4; } template <class ELFT> void HashTableSection<ELFT>::writeTo(uint8_t *Buf) { + // A 32-bit integer type in the target endianness. + typedef typename ELFT::Word Elf_Word; + unsigned NumSymbols = In<ELFT>::DynSymTab->getNumSymbols(); + auto *P = reinterpret_cast<Elf_Word *>(Buf); *P++ = NumSymbols; // nbucket *P++ = NumSymbols; // nchain @@ -1414,79 +1620,65 @@ template <class ELFT> void HashTableSection<ELFT>::writeTo(uint8_t *Buf) { } } -template <class ELFT> -PltSection<ELFT>::PltSection() - : SyntheticSection<ELFT>(SHF_ALLOC | SHF_EXECINSTR, SHT_PROGBITS, 16, - ".plt") {} +PltSection::PltSection(size_t S) + : SyntheticSection(SHF_ALLOC | SHF_EXECINSTR, SHT_PROGBITS, 16, ".plt"), + HeaderSize(S) {} -template <class ELFT> void PltSection<ELFT>::writeTo(uint8_t *Buf) { - // At beginning of PLT, we have code to call the dynamic linker - // to resolve dynsyms at runtime. Write such code. - Target->writePltHeader(Buf); - size_t Off = Target->PltHeaderSize; +void PltSection::writeTo(uint8_t *Buf) { + // At beginning of PLT but not the IPLT, we have code to call the dynamic + // linker to resolve dynsyms at runtime. Write such code. + if (HeaderSize != 0) + Target->writePltHeader(Buf); + size_t Off = HeaderSize; + // The IPlt is immediately after the Plt, account for this in RelOff + unsigned PltOff = getPltRelocOff(); for (auto &I : Entries) { const SymbolBody *B = I.first; - unsigned RelOff = I.second; - uint64_t Got = B->getGotPltVA<ELFT>(); + unsigned RelOff = I.second + PltOff; + uint64_t Got = B->getGotPltVA(); uint64_t Plt = this->getVA() + Off; Target->writePlt(Buf + Off, Got, Plt, B->PltIndex, RelOff); Off += Target->PltEntrySize; } } -template <class ELFT> void PltSection<ELFT>::addEntry(SymbolBody &Sym) { +template <class ELFT> void PltSection::addEntry(SymbolBody &Sym) { Sym.PltIndex = Entries.size(); - unsigned RelOff = In<ELFT>::RelaPlt->getRelocOffset(); + RelocationSection<ELFT> *PltRelocSection = In<ELFT>::RelaPlt; + if (HeaderSize == 0) { + PltRelocSection = In<ELFT>::RelaIplt; + Sym.IsInIplt = true; + } + unsigned RelOff = PltRelocSection->getRelocOffset(); Entries.push_back(std::make_pair(&Sym, RelOff)); } -template <class ELFT> size_t PltSection<ELFT>::getSize() const { - return Target->PltHeaderSize + Entries.size() * Target->PltEntrySize; +size_t PltSection::getSize() const { + return HeaderSize + Entries.size() * Target->PltEntrySize; } -template <class ELFT> -IpltSection<ELFT>::IpltSection() - : SyntheticSection<ELFT>(SHF_ALLOC | SHF_EXECINSTR, SHT_PROGBITS, 16, - ".plt") {} - -template <class ELFT> void IpltSection<ELFT>::writeTo(uint8_t *Buf) { - // The IRelative relocations do not support lazy binding so no header is - // needed - size_t Off = 0; - for (auto &I : Entries) { - const SymbolBody *B = I.first; - unsigned RelOff = I.second + In<ELFT>::Plt->getSize(); - uint64_t Got = B->getGotPltVA<ELFT>(); - uint64_t Plt = this->getVA() + Off; - Target->writePlt(Buf + Off, Got, Plt, B->PltIndex, RelOff); +// Some architectures such as additional symbols in the PLT section. For +// example ARM uses mapping symbols to aid disassembly +void PltSection::addSymbols() { + // The PLT may have symbols defined for the Header, the IPLT has no header + if (HeaderSize != 0) + Target->addPltHeaderSymbols(this); + size_t Off = HeaderSize; + for (size_t I = 0; I < Entries.size(); ++I) { + Target->addPltSymbols(this, Off); Off += Target->PltEntrySize; } } -template <class ELFT> void IpltSection<ELFT>::addEntry(SymbolBody &Sym) { - Sym.PltIndex = Entries.size(); - Sym.IsInIplt = true; - unsigned RelOff = In<ELFT>::RelaIplt->getRelocOffset(); - Entries.push_back(std::make_pair(&Sym, RelOff)); -} - -template <class ELFT> size_t IpltSection<ELFT>::getSize() const { - return Entries.size() * Target->PltEntrySize; +unsigned PltSection::getPltRelocOff() const { + return (HeaderSize == 0) ? InX::Plt->getSize() : 0; } -template <class ELFT> -GdbIndexSection<ELFT>::GdbIndexSection() - : SyntheticSection<ELFT>(0, SHT_PROGBITS, 1, ".gdb_index"), +GdbIndexSection::GdbIndexSection() + : SyntheticSection(0, SHT_PROGBITS, 1, ".gdb_index"), StringPool(llvm::StringTableBuilder::ELF) {} -template <class ELFT> void GdbIndexSection<ELFT>::parseDebugSections() { - for (InputSectionBase<ELFT> *S : Symtab<ELFT>::X->Sections) - if (InputSection<ELFT> *IS = dyn_cast<InputSection<ELFT>>(S)) - if (IS->OutSec && IS->Name == ".debug_info") - readDwarf(IS); -} - // Iterative hash function for symbol's name is described in .gdb_index format // specification. Note that we use one for version 5 to 7 here, it is different // for version 4. @@ -1497,21 +1689,88 @@ static uint32_t hash(StringRef Str) { return R; } -template <class ELFT> -void GdbIndexSection<ELFT>::readDwarf(InputSection<ELFT> *I) { - GdbIndexBuilder<ELFT> Builder(I); - if (ErrorCount) +static std::vector<std::pair<uint64_t, uint64_t>> +readCuList(DWARFContext &Dwarf, InputSection *Sec) { + std::vector<std::pair<uint64_t, uint64_t>> Ret; + for (std::unique_ptr<DWARFCompileUnit> &CU : Dwarf.compile_units()) + Ret.push_back({Sec->OutSecOff + CU->getOffset(), CU->getLength() + 4}); + return Ret; +} + +static InputSectionBase *findSection(ArrayRef<InputSectionBase *> Arr, + uint64_t Offset) { + for (InputSectionBase *S : Arr) + if (S && S != &InputSection::Discarded) + if (Offset >= S->getOffsetInFile() && + Offset < S->getOffsetInFile() + S->getSize()) + return S; + return nullptr; +} + +static std::vector<AddressEntry> +readAddressArea(DWARFContext &Dwarf, InputSection *Sec, size_t CurrentCU) { + std::vector<AddressEntry> Ret; + + for (std::unique_ptr<DWARFCompileUnit> &CU : Dwarf.compile_units()) { + DWARFAddressRangesVector Ranges; + CU->collectAddressRanges(Ranges); + + ArrayRef<InputSectionBase *> Sections = Sec->File->getSections(); + for (std::pair<uint64_t, uint64_t> &R : Ranges) + if (InputSectionBase *S = findSection(Sections, R.first)) + Ret.push_back({S, R.first - S->getOffsetInFile(), + R.second - S->getOffsetInFile(), CurrentCU}); + ++CurrentCU; + } + return Ret; +} + +static std::vector<std::pair<StringRef, uint8_t>> +readPubNamesAndTypes(DWARFContext &Dwarf, bool IsLE) { + StringRef Data[] = {Dwarf.getGnuPubNamesSection(), + Dwarf.getGnuPubTypesSection()}; + + std::vector<std::pair<StringRef, uint8_t>> Ret; + for (StringRef D : Data) { + DWARFDebugPubTable PubTable(D, IsLE, true); + for (const DWARFDebugPubTable::Set &Set : PubTable.getData()) + for (const DWARFDebugPubTable::Entry &Ent : Set.Entries) + Ret.push_back({Ent.Name, Ent.Descriptor.toBits()}); + } + return Ret; +} + +class ObjInfoTy : public llvm::LoadedObjectInfo { + uint64_t getSectionLoadAddress(const object::SectionRef &Sec) const override { + auto &S = static_cast<const object::ELFSectionRef &>(Sec); + if (S.getFlags() & ELF::SHF_ALLOC) + return S.getOffset(); + return 0; + } + + std::unique_ptr<llvm::LoadedObjectInfo> clone() const override { return {}; } +}; + +void GdbIndexSection::readDwarf(InputSection *Sec) { + Expected<std::unique_ptr<object::ObjectFile>> Obj = + object::ObjectFile::createObjectFile(Sec->File->MB); + if (!Obj) { + error(toString(Sec->File) + ": error creating DWARF context"); return; + } + + ObjInfoTy ObjInfo; + DWARFContextInMemory Dwarf(*Obj.get(), &ObjInfo); size_t CuId = CompilationUnits.size(); - std::vector<std::pair<uintX_t, uintX_t>> CuList = Builder.readCUList(); - CompilationUnits.insert(CompilationUnits.end(), CuList.begin(), CuList.end()); + for (std::pair<uint64_t, uint64_t> &P : readCuList(Dwarf, Sec)) + CompilationUnits.push_back(P); - std::vector<AddressEntry<ELFT>> AddrArea = Builder.readAddressArea(CuId); - AddressArea.insert(AddressArea.end(), AddrArea.begin(), AddrArea.end()); + for (AddressEntry &Ent : readAddressArea(Dwarf, Sec, CuId)) + AddressArea.push_back(Ent); std::vector<std::pair<StringRef, uint8_t>> NamesAndTypes = - Builder.readPubNamesAndTypes(); + readPubNamesAndTypes(Dwarf, Config->IsLE); for (std::pair<StringRef, uint8_t> &Pair : NamesAndTypes) { uint32_t Hash = hash(Pair.first); @@ -1526,18 +1785,21 @@ void GdbIndexSection<ELFT>::readDwarf(InputSection<ELFT> *I) { continue; } - std::vector<std::pair<uint32_t, uint8_t>> &CuVec = - CuVectors[Sym->CuVectorIndex]; - CuVec.push_back({CuId, Pair.second}); + CuVectors[Sym->CuVectorIndex].push_back({CuId, Pair.second}); } } -template <class ELFT> void GdbIndexSection<ELFT>::finalize() { +void GdbIndexSection::finalizeContents() { if (Finalized) return; Finalized = true; - parseDebugSections(); + for (InputSectionBase *S : InputSections) + if (InputSection *IS = dyn_cast<InputSection>(S)) + if (IS->OutSec && IS->Name == ".debug_info") + readDwarf(IS); + + SymbolTable.finalizeContents(); // GdbIndex header consist from version fields // and 5 more fields with different kinds of offsets. @@ -1556,12 +1818,12 @@ template <class ELFT> void GdbIndexSection<ELFT>::finalize() { StringPool.finalizeInOrder(); } -template <class ELFT> size_t GdbIndexSection<ELFT>::getSize() const { - const_cast<GdbIndexSection<ELFT> *>(this)->finalize(); +size_t GdbIndexSection::getSize() const { + const_cast<GdbIndexSection *>(this)->finalizeContents(); return StringPoolOffset + StringPool.getSize(); } -template <class ELFT> void GdbIndexSection<ELFT>::writeTo(uint8_t *Buf) { +void GdbIndexSection::writeTo(uint8_t *Buf) { write32le(Buf, 7); // Write version. write32le(Buf + 4, CuListOffset); // CU list offset. write32le(Buf + 8, CuTypesOffset); // Types CU list offset. @@ -1571,15 +1833,15 @@ template <class ELFT> void GdbIndexSection<ELFT>::writeTo(uint8_t *Buf) { Buf += 24; // Write the CU list. - for (std::pair<uintX_t, uintX_t> CU : CompilationUnits) { + for (std::pair<uint64_t, uint64_t> CU : CompilationUnits) { write64le(Buf, CU.first); write64le(Buf + 8, CU.second); Buf += 16; } // Write the address area. - for (AddressEntry<ELFT> &E : AddressArea) { - uintX_t BaseAddr = E.Section->OutSec->Addr + E.Section->getOffset(0); + for (AddressEntry &E : AddressArea) { + uint64_t BaseAddr = E.Section->OutSec->Addr + E.Section->getOffset(0); write64le(Buf, BaseAddr + E.LowAddress); write64le(Buf + 8, BaseAddr + E.HighAddress); write32le(Buf + 16, E.CuIndex); @@ -1615,13 +1877,13 @@ template <class ELFT> void GdbIndexSection<ELFT>::writeTo(uint8_t *Buf) { StringPool.write(Buf); } -template <class ELFT> bool GdbIndexSection<ELFT>::empty() const { - return !Out<ELFT>::DebugInfo; +bool GdbIndexSection::empty() const { + return !Out::DebugInfo; } template <class ELFT> EhFrameHeader<ELFT>::EhFrameHeader() - : SyntheticSection<ELFT>(SHF_ALLOC, SHT_PROGBITS, 1, ".eh_frame_hdr") {} + : SyntheticSection(SHF_ALLOC, SHT_PROGBITS, 1, ".eh_frame_hdr") {} // .eh_frame_hdr contains a binary search table of pointers to FDEs. // Each entry of the search table consists of two values, @@ -1642,11 +1904,11 @@ template <class ELFT> void EhFrameHeader<ELFT>::writeTo(uint8_t *Buf) { Buf[1] = DW_EH_PE_pcrel | DW_EH_PE_sdata4; Buf[2] = DW_EH_PE_udata4; Buf[3] = DW_EH_PE_datarel | DW_EH_PE_sdata4; - write32<E>(Buf + 4, Out<ELFT>::EhFrame->Addr - this->getVA() - 4); + write32<E>(Buf + 4, In<ELFT>::EhFrame->OutSec->Addr - this->getVA() - 4); write32<E>(Buf + 8, Fdes.size()); Buf += 12; - uintX_t VA = this->getVA(); + uint64_t VA = this->getVA(); for (FdeData &Fde : Fdes) { write32<E>(Buf, Fde.Pc - VA); write32<E>(Buf + 4, Fde.FdeVA - VA); @@ -1656,7 +1918,7 @@ template <class ELFT> void EhFrameHeader<ELFT>::writeTo(uint8_t *Buf) { template <class ELFT> size_t EhFrameHeader<ELFT>::getSize() const { // .eh_frame_hdr has a 12 bytes header followed by an array of FDEs. - return 12 + Out<ELFT>::EhFrame->NumFdes * 8; + return 12 + In<ELFT>::EhFrame->NumFdes * 8; } template <class ELFT> @@ -1665,13 +1927,13 @@ void EhFrameHeader<ELFT>::addFde(uint32_t Pc, uint32_t FdeVA) { } template <class ELFT> bool EhFrameHeader<ELFT>::empty() const { - return Out<ELFT>::EhFrame->empty(); + return In<ELFT>::EhFrame->empty(); } template <class ELFT> VersionDefinitionSection<ELFT>::VersionDefinitionSection() - : SyntheticSection<ELFT>(SHF_ALLOC, SHT_GNU_verdef, sizeof(uint32_t), - ".gnu.version_d") {} + : SyntheticSection(SHF_ALLOC, SHT_GNU_verdef, sizeof(uint32_t), + ".gnu.version_d") {} static StringRef getFileDefName() { if (!Config->SoName.empty()) @@ -1679,17 +1941,17 @@ static StringRef getFileDefName() { return Config->OutputFile; } -template <class ELFT> void VersionDefinitionSection<ELFT>::finalize() { +template <class ELFT> void VersionDefinitionSection<ELFT>::finalizeContents() { FileDefNameOff = In<ELFT>::DynStrTab->addString(getFileDefName()); for (VersionDefinition &V : Config->VersionDefinitions) V.NameOff = In<ELFT>::DynStrTab->addString(V.Name); - this->OutSec->Link = this->Link = In<ELFT>::DynStrTab->OutSec->SectionIndex; + this->OutSec->Link = In<ELFT>::DynStrTab->OutSec->SectionIndex; // sh_info should be set to the number of definitions. This fact is missed in // documentation, but confirmed by binutils community: // https://sourceware.org/ml/binutils/2014-11/msg00355.html - this->OutSec->Info = this->Info = getVerDefNum(); + this->OutSec->Info = getVerDefNum(); } template <class ELFT> @@ -1729,14 +1991,15 @@ template <class ELFT> size_t VersionDefinitionSection<ELFT>::getSize() const { template <class ELFT> VersionTableSection<ELFT>::VersionTableSection() - : SyntheticSection<ELFT>(SHF_ALLOC, SHT_GNU_versym, sizeof(uint16_t), - ".gnu.version") {} + : SyntheticSection(SHF_ALLOC, SHT_GNU_versym, sizeof(uint16_t), + ".gnu.version") { + this->Entsize = sizeof(Elf_Versym); +} -template <class ELFT> void VersionTableSection<ELFT>::finalize() { - this->OutSec->Entsize = this->Entsize = sizeof(Elf_Versym); +template <class ELFT> void VersionTableSection<ELFT>::finalizeContents() { // At the moment of june 2016 GNU docs does not mention that sh_link field // should be set, but Sun docs do. Also readelf relies on this field. - this->OutSec->Link = this->Link = In<ELFT>::DynSymTab->OutSec->SectionIndex; + this->OutSec->Link = In<ELFT>::DynSymTab->OutSec->SectionIndex; } template <class ELFT> size_t VersionTableSection<ELFT>::getSize() const { @@ -1757,8 +2020,8 @@ template <class ELFT> bool VersionTableSection<ELFT>::empty() const { template <class ELFT> VersionNeedSection<ELFT>::VersionNeedSection() - : SyntheticSection<ELFT>(SHF_ALLOC, SHT_GNU_verneed, sizeof(uint32_t), - ".gnu.version_r") { + : SyntheticSection(SHF_ALLOC, SHT_GNU_verneed, sizeof(uint32_t), + ".gnu.version_r") { // Identifiers in verneed section start at 2 because 0 and 1 are reserved // for VER_NDX_LOCAL and VER_NDX_GLOBAL. // First identifiers are reserved by verdef section if it exist. @@ -1766,24 +2029,27 @@ VersionNeedSection<ELFT>::VersionNeedSection() } template <class ELFT> -void VersionNeedSection<ELFT>::addSymbol(SharedSymbol<ELFT> *SS) { - if (!SS->Verdef) { +void VersionNeedSection<ELFT>::addSymbol(SharedSymbol *SS) { + auto *Ver = reinterpret_cast<const typename ELFT::Verdef *>(SS->Verdef); + if (!Ver) { SS->symbol()->VersionId = VER_NDX_GLOBAL; return; } - SharedFile<ELFT> *F = SS->file(); + + auto *File = cast<SharedFile<ELFT>>(SS->File); + // If we don't already know that we need an Elf_Verneed for this DSO, prepare // to create one by adding it to our needed list and creating a dynstr entry // for the soname. - if (F->VerdefMap.empty()) - Needed.push_back({F, In<ELFT>::DynStrTab->addString(F->getSoName())}); - typename SharedFile<ELFT>::NeededVer &NV = F->VerdefMap[SS->Verdef]; + if (File->VerdefMap.empty()) + Needed.push_back({File, In<ELFT>::DynStrTab->addString(File->getSoName())}); + typename SharedFile<ELFT>::NeededVer &NV = File->VerdefMap[Ver]; // If we don't already know that we need an Elf_Vernaux for this Elf_Verdef, // prepare to create one by allocating a version identifier and creating a // dynstr entry for the version name. if (NV.Index == 0) { - NV.StrTab = In<ELFT>::DynStrTab->addString( - SS->file()->getStringTable().data() + SS->Verdef->getAux()->vda_name); + NV.StrTab = In<ELFT>::DynStrTab->addString(File->getStringTable().data() + + Ver->getAux()->vda_name); NV.Index = NextIndex++; } SS->symbol()->VersionId = NV.Index; @@ -1824,9 +2090,9 @@ template <class ELFT> void VersionNeedSection<ELFT>::writeTo(uint8_t *Buf) { Verneed[-1].vn_next = 0; } -template <class ELFT> void VersionNeedSection<ELFT>::finalize() { - this->OutSec->Link = this->Link = In<ELFT>::DynStrTab->OutSec->SectionIndex; - this->OutSec->Info = this->Info = Needed.size(); +template <class ELFT> void VersionNeedSection<ELFT>::finalizeContents() { + this->OutSec->Link = In<ELFT>::DynStrTab->OutSec->SectionIndex; + this->OutSec->Info = Needed.size(); } template <class ELFT> size_t VersionNeedSection<ELFT>::getSize() const { @@ -1840,53 +2106,175 @@ template <class ELFT> bool VersionNeedSection<ELFT>::empty() const { return getNeedNum() == 0; } -template <class ELFT> -MipsRldMapSection<ELFT>::MipsRldMapSection() - : SyntheticSection<ELFT>(SHF_ALLOC | SHF_WRITE, SHT_PROGBITS, - sizeof(typename ELFT::uint), ".rld_map") {} +MergeSyntheticSection::MergeSyntheticSection(StringRef Name, uint32_t Type, + uint64_t Flags, uint32_t Alignment) + : SyntheticSection(Flags, Type, Alignment, Name), + Builder(StringTableBuilder::RAW, Alignment) {} + +void MergeSyntheticSection::addSection(MergeInputSection *MS) { + assert(!Finalized); + MS->MergeSec = this; + Sections.push_back(MS); +} + +void MergeSyntheticSection::writeTo(uint8_t *Buf) { Builder.write(Buf); } + +bool MergeSyntheticSection::shouldTailMerge() const { + return (this->Flags & SHF_STRINGS) && Config->Optimize >= 2; +} + +void MergeSyntheticSection::finalizeTailMerge() { + // Add all string pieces to the string table builder to create section + // contents. + for (MergeInputSection *Sec : Sections) + for (size_t I = 0, E = Sec->Pieces.size(); I != E; ++I) + if (Sec->Pieces[I].Live) + Builder.add(Sec->getData(I)); + + // Fix the string table content. After this, the contents will never change. + Builder.finalize(); + + // finalize() fixed tail-optimized strings, so we can now get + // offsets of strings. Get an offset for each string and save it + // to a corresponding StringPiece for easy access. + for (MergeInputSection *Sec : Sections) + for (size_t I = 0, E = Sec->Pieces.size(); I != E; ++I) + if (Sec->Pieces[I].Live) + Sec->Pieces[I].OutputOff = Builder.getOffset(Sec->getData(I)); +} + +void MergeSyntheticSection::finalizeNoTailMerge() { + // Add all string pieces to the string table builder to create section + // contents. Because we are not tail-optimizing, offsets of strings are + // fixed when they are added to the builder (string table builder contains + // a hash table from strings to offsets). + for (MergeInputSection *Sec : Sections) + for (size_t I = 0, E = Sec->Pieces.size(); I != E; ++I) + if (Sec->Pieces[I].Live) + Sec->Pieces[I].OutputOff = Builder.add(Sec->getData(I)); + + Builder.finalizeInOrder(); +} + +void MergeSyntheticSection::finalizeContents() { + if (Finalized) + return; + Finalized = true; + if (shouldTailMerge()) + finalizeTailMerge(); + else + finalizeNoTailMerge(); +} + +size_t MergeSyntheticSection::getSize() const { + // We should finalize string builder to know the size. + const_cast<MergeSyntheticSection *>(this)->finalizeContents(); + return Builder.getSize(); +} + +MipsRldMapSection::MipsRldMapSection() + : SyntheticSection(SHF_ALLOC | SHF_WRITE, SHT_PROGBITS, Config->Wordsize, + ".rld_map") {} -template <class ELFT> void MipsRldMapSection<ELFT>::writeTo(uint8_t *Buf) { +void MipsRldMapSection::writeTo(uint8_t *Buf) { // Apply filler from linker script. - uint64_t Filler = Script<ELFT>::X->getFiller(this->Name); + Optional<uint32_t> Fill = Script->getFiller(this->Name); + if (!Fill || *Fill == 0) + return; + + uint64_t Filler = *Fill; Filler = (Filler << 32) | Filler; memcpy(Buf, &Filler, getSize()); } -template <class ELFT> -ARMExidxSentinelSection<ELFT>::ARMExidxSentinelSection() - : SyntheticSection<ELFT>(SHF_ALLOC | SHF_LINK_ORDER, SHT_ARM_EXIDX, - sizeof(typename ELFT::uint), ".ARM.exidx") {} +ARMExidxSentinelSection::ARMExidxSentinelSection() + : SyntheticSection(SHF_ALLOC | SHF_LINK_ORDER, SHT_ARM_EXIDX, + Config->Wordsize, ".ARM.exidx") {} // Write a terminating sentinel entry to the end of the .ARM.exidx table. // This section will have been sorted last in the .ARM.exidx table. // This table entry will have the form: // | PREL31 upper bound of code that has exception tables | EXIDX_CANTUNWIND | -template <class ELFT> -void ARMExidxSentinelSection<ELFT>::writeTo(uint8_t *Buf) { +void ARMExidxSentinelSection::writeTo(uint8_t *Buf) { // Get the InputSection before us, we are by definition last - auto RI = cast<OutputSection<ELFT>>(this->OutSec)->Sections.rbegin(); - InputSection<ELFT> *LE = *(++RI); - InputSection<ELFT> *LC = cast<InputSection<ELFT>>(LE->getLinkOrderDep()); + auto RI = cast<OutputSection>(this->OutSec)->Sections.rbegin(); + InputSection *LE = *(++RI); + InputSection *LC = cast<InputSection>(LE->getLinkOrderDep()); uint64_t S = LC->OutSec->Addr + LC->getOffset(LC->getSize()); uint64_t P = this->getVA(); Target->relocateOne(Buf, R_ARM_PREL31, S - P); write32le(Buf + 4, 0x1); } -template InputSection<ELF32LE> *elf::createCommonSection(); -template InputSection<ELF32BE> *elf::createCommonSection(); -template InputSection<ELF64LE> *elf::createCommonSection(); -template InputSection<ELF64BE> *elf::createCommonSection(); - -template InputSection<ELF32LE> *elf::createInterpSection(); -template InputSection<ELF32BE> *elf::createInterpSection(); -template InputSection<ELF64LE> *elf::createInterpSection(); -template InputSection<ELF64BE> *elf::createInterpSection(); - -template MergeInputSection<ELF32LE> *elf::createCommentSection(); -template MergeInputSection<ELF32BE> *elf::createCommentSection(); -template MergeInputSection<ELF64LE> *elf::createCommentSection(); -template MergeInputSection<ELF64BE> *elf::createCommentSection(); +ThunkSection::ThunkSection(OutputSection *OS, uint64_t Off) + : SyntheticSection(SHF_ALLOC | SHF_EXECINSTR, SHT_PROGBITS, + Config->Wordsize, ".text.thunk") { + this->OutSec = OS; + this->OutSecOff = Off; +} + +void ThunkSection::addThunk(Thunk *T) { + uint64_t Off = alignTo(Size, T->alignment); + T->Offset = Off; + Thunks.push_back(T); + T->addSymbols(*this); + Size = Off + T->size(); +} + +void ThunkSection::writeTo(uint8_t *Buf) { + for (const Thunk *T : Thunks) + T->writeTo(Buf + T->Offset, *this); +} + +InputSection *ThunkSection::getTargetInputSection() const { + const Thunk *T = Thunks.front(); + return T->getTargetInputSection(); +} + +InputSection *InX::ARMAttributes; +BssSection *InX::Bss; +BssSection *InX::BssRelRo; +BuildIdSection *InX::BuildId; +InputSection *InX::Common; +StringTableSection *InX::DynStrTab; +InputSection *InX::Interp; +GdbIndexSection *InX::GdbIndex; +GotPltSection *InX::GotPlt; +IgotPltSection *InX::IgotPlt; +MipsGotSection *InX::MipsGot; +MipsRldMapSection *InX::MipsRldMap; +PltSection *InX::Plt; +PltSection *InX::Iplt; +StringTableSection *InX::ShStrTab; +StringTableSection *InX::StrTab; + +template void PltSection::addEntry<ELF32LE>(SymbolBody &Sym); +template void PltSection::addEntry<ELF32BE>(SymbolBody &Sym); +template void PltSection::addEntry<ELF64LE>(SymbolBody &Sym); +template void PltSection::addEntry<ELF64BE>(SymbolBody &Sym); + +template InputSection *elf::createCommonSection<ELF32LE>(); +template InputSection *elf::createCommonSection<ELF32BE>(); +template InputSection *elf::createCommonSection<ELF64LE>(); +template InputSection *elf::createCommonSection<ELF64BE>(); + +template MergeInputSection *elf::createCommentSection<ELF32LE>(); +template MergeInputSection *elf::createCommentSection<ELF32BE>(); +template MergeInputSection *elf::createCommentSection<ELF64LE>(); +template MergeInputSection *elf::createCommentSection<ELF64BE>(); + +template SymbolBody *elf::addSyntheticLocal<ELF32LE>(StringRef, uint8_t, + uint64_t, uint64_t, + InputSectionBase *); +template SymbolBody *elf::addSyntheticLocal<ELF32BE>(StringRef, uint8_t, + uint64_t, uint64_t, + InputSectionBase *); +template SymbolBody *elf::addSyntheticLocal<ELF64LE>(StringRef, uint8_t, + uint64_t, uint64_t, + InputSectionBase *); +template SymbolBody *elf::addSyntheticLocal<ELF64BE>(StringRef, uint8_t, + uint64_t, uint64_t, + InputSectionBase *); template class elf::MipsAbiFlagsSection<ELF32LE>; template class elf::MipsAbiFlagsSection<ELF32BE>; @@ -1903,36 +2291,11 @@ template class elf::MipsReginfoSection<ELF32BE>; template class elf::MipsReginfoSection<ELF64LE>; template class elf::MipsReginfoSection<ELF64BE>; -template class elf::BuildIdSection<ELF32LE>; -template class elf::BuildIdSection<ELF32BE>; -template class elf::BuildIdSection<ELF64LE>; -template class elf::BuildIdSection<ELF64BE>; - template class elf::GotSection<ELF32LE>; template class elf::GotSection<ELF32BE>; template class elf::GotSection<ELF64LE>; template class elf::GotSection<ELF64BE>; -template class elf::MipsGotSection<ELF32LE>; -template class elf::MipsGotSection<ELF32BE>; -template class elf::MipsGotSection<ELF64LE>; -template class elf::MipsGotSection<ELF64BE>; - -template class elf::GotPltSection<ELF32LE>; -template class elf::GotPltSection<ELF32BE>; -template class elf::GotPltSection<ELF64LE>; -template class elf::GotPltSection<ELF64BE>; - -template class elf::IgotPltSection<ELF32LE>; -template class elf::IgotPltSection<ELF32BE>; -template class elf::IgotPltSection<ELF64LE>; -template class elf::IgotPltSection<ELF64BE>; - -template class elf::StringTableSection<ELF32LE>; -template class elf::StringTableSection<ELF32BE>; -template class elf::StringTableSection<ELF64LE>; -template class elf::StringTableSection<ELF64BE>; - template class elf::DynamicSection<ELF32LE>; template class elf::DynamicSection<ELF32BE>; template class elf::DynamicSection<ELF64LE>; @@ -1958,21 +2321,6 @@ template class elf::HashTableSection<ELF32BE>; template class elf::HashTableSection<ELF64LE>; template class elf::HashTableSection<ELF64BE>; -template class elf::PltSection<ELF32LE>; -template class elf::PltSection<ELF32BE>; -template class elf::PltSection<ELF64LE>; -template class elf::PltSection<ELF64BE>; - -template class elf::IpltSection<ELF32LE>; -template class elf::IpltSection<ELF32BE>; -template class elf::IpltSection<ELF64LE>; -template class elf::IpltSection<ELF64BE>; - -template class elf::GdbIndexSection<ELF32LE>; -template class elf::GdbIndexSection<ELF32BE>; -template class elf::GdbIndexSection<ELF64LE>; -template class elf::GdbIndexSection<ELF64BE>; - template class elf::EhFrameHeader<ELF32LE>; template class elf::EhFrameHeader<ELF32BE>; template class elf::EhFrameHeader<ELF64LE>; @@ -1993,12 +2341,7 @@ template class elf::VersionDefinitionSection<ELF32BE>; template class elf::VersionDefinitionSection<ELF64LE>; template class elf::VersionDefinitionSection<ELF64BE>; -template class elf::MipsRldMapSection<ELF32LE>; -template class elf::MipsRldMapSection<ELF32BE>; -template class elf::MipsRldMapSection<ELF64LE>; -template class elf::MipsRldMapSection<ELF64BE>; - -template class elf::ARMExidxSentinelSection<ELF32LE>; -template class elf::ARMExidxSentinelSection<ELF32BE>; -template class elf::ARMExidxSentinelSection<ELF64LE>; -template class elf::ARMExidxSentinelSection<ELF64BE>; +template class elf::EhFrameSection<ELF32LE>; +template class elf::EhFrameSection<ELF32BE>; +template class elf::EhFrameSection<ELF64LE>; +template class elf::EhFrameSection<ELF64BE>; diff --git a/ELF/SyntheticSections.h b/ELF/SyntheticSections.h index df67e079ad0e..1098c58a3baf 100644 --- a/ELF/SyntheticSections.h +++ b/ELF/SyntheticSections.h @@ -6,10 +6,22 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// +// +// Synthetic sections represent chunks of linker-created data. If you +// need to create a chunk of data that to be included in some section +// in the result, you probably want to create that as a synthetic section. +// +// Synthetic sections are designed as input sections as opposed to +// output sections because we want to allow them to be manipulated +// using linker scripts just like other input sections from regular +// files. +// +//===----------------------------------------------------------------------===// #ifndef LLD_ELF_SYNTHETIC_SECTION_H #define LLD_ELF_SYNTHETIC_SECTION_H +#include "EhFrame.h" #include "GdbIndex.h" #include "InputSection.h" #include "llvm/ADT/MapVector.h" @@ -18,49 +30,95 @@ namespace lld { namespace elf { -template <class ELFT> class SyntheticSection : public InputSection<ELFT> { - typedef typename ELFT::uint uintX_t; - +class SyntheticSection : public InputSection { public: - SyntheticSection(uintX_t Flags, uint32_t Type, uintX_t Addralign, + SyntheticSection(uint64_t Flags, uint32_t Type, uint32_t Alignment, StringRef Name) - : InputSection<ELFT>(Flags, Type, Addralign, ArrayRef<uint8_t>(), Name, - InputSectionData::Synthetic) { + : InputSection(Flags, Type, Alignment, {}, Name, + InputSectionBase::Synthetic) { this->Live = true; } virtual ~SyntheticSection() = default; virtual void writeTo(uint8_t *Buf) = 0; virtual size_t getSize() const = 0; - virtual void finalize() {} + virtual void finalizeContents() {} + // If the section has the SHF_ALLOC flag and the size may be changed if + // thunks are added, update the section size. + virtual void updateAllocSize() {} + // If any additional finalization of contents are needed post thunk creation. + virtual void postThunkContents() {} virtual bool empty() const { return false; } + uint64_t getVA() const; - uintX_t getVA() const { - return this->OutSec ? this->OutSec->Addr + this->OutSecOff : 0; + static bool classof(const InputSectionBase *D) { + return D->kind() == InputSectionBase::Synthetic; } +}; - static bool classof(const InputSectionData *D) { - return D->kind() == InputSectionData::Synthetic; - } +struct CieRecord { + EhSectionPiece *Piece = nullptr; + std::vector<EhSectionPiece *> FdePieces; }; -template <class ELFT> class GotSection final : public SyntheticSection<ELFT> { - typedef typename ELFT::uint uintX_t; +// Section for .eh_frame. +template <class ELFT> class EhFrameSection final : public SyntheticSection { + typedef typename ELFT::Shdr Elf_Shdr; + typedef typename ELFT::Rel Elf_Rel; + typedef typename ELFT::Rela Elf_Rela; + + void updateAlignment(uint64_t Val) { + if (Val > this->Alignment) + this->Alignment = Val; + } + +public: + EhFrameSection(); + void writeTo(uint8_t *Buf) override; + void finalizeContents() override; + bool empty() const override { return Sections.empty(); } + size_t getSize() const override { return Size; } + + void addSection(InputSectionBase *S); + + size_t NumFdes = 0; + + std::vector<EhInputSection *> Sections; + +private: + uint64_t Size = 0; + template <class RelTy> + void addSectionAux(EhInputSection *S, llvm::ArrayRef<RelTy> Rels); + + template <class RelTy> + CieRecord *addCie(EhSectionPiece &Piece, ArrayRef<RelTy> Rels); + + template <class RelTy> + bool isFdeLive(EhSectionPiece &Piece, ArrayRef<RelTy> Rels); + + uint64_t getFdePc(uint8_t *Buf, size_t Off, uint8_t Enc); + std::vector<CieRecord *> Cies; + + // CIE records are uniquified by their contents and personality functions. + llvm::DenseMap<std::pair<ArrayRef<uint8_t>, SymbolBody *>, CieRecord> CieMap; +}; + +template <class ELFT> class GotSection final : public SyntheticSection { public: GotSection(); void writeTo(uint8_t *Buf) override; size_t getSize() const override { return Size; } - void finalize() override; + void finalizeContents() override; bool empty() const override; void addEntry(SymbolBody &Sym); bool addDynTlsEntry(SymbolBody &Sym); bool addTlsIndex(); - uintX_t getGlobalDynAddr(const SymbolBody &B) const; - uintX_t getGlobalDynOffset(const SymbolBody &B) const; + uint64_t getGlobalDynAddr(const SymbolBody &B) const; + uint64_t getGlobalDynOffset(const SymbolBody &B) const; - uintX_t getTlsIndexVA() { return this->getVA() + TlsIndexOff; } + uint64_t getTlsIndexVA() { return this->getVA() + TlsIndexOff; } uint32_t getTlsIndexOff() const { return TlsIndexOff; } // Flag to force GOT to be in output if we have relocations @@ -70,11 +128,11 @@ public: private: size_t NumEntries = 0; uint32_t TlsIndexOff = -1; - uintX_t Size = 0; + uint64_t Size = 0; }; // .note.gnu.build-id section. -template <class ELFT> class BuildIdSection : public SyntheticSection<ELFT> { +class BuildIdSection : public SyntheticSection { // First 16 bytes are a header. static const unsigned HeaderSize = 16; @@ -92,22 +150,36 @@ private: uint8_t *HashBuf; }; -template <class ELFT> -class MipsGotSection final : public SyntheticSection<ELFT> { - typedef typename ELFT::uint uintX_t; +// BssSection is used to reserve space for copy relocations and common symbols. +// We create three instances of this class for .bss, .bss.rel.ro and "COMMON", +// that are used for writable symbols, read-only symbols and common symbols, +// respectively. +class BssSection final : public SyntheticSection { +public: + BssSection(StringRef Name); + void writeTo(uint8_t *) override {} + bool empty() const override { return getSize() == 0; } + size_t reserveSpace(uint64_t Size, uint32_t Alignment); + size_t getSize() const override { return Size; } +private: + uint64_t Size = 0; +}; + +class MipsGotSection final : public SyntheticSection { public: MipsGotSection(); void writeTo(uint8_t *Buf) override; size_t getSize() const override { return Size; } - void finalize() override; + void updateAllocSize() override; + void finalizeContents() override; bool empty() const override; - void addEntry(SymbolBody &Sym, uintX_t Addend, RelExpr Expr); + void addEntry(SymbolBody &Sym, int64_t Addend, RelExpr Expr); bool addDynTlsEntry(SymbolBody &Sym); bool addTlsIndex(); - uintX_t getPageEntryOffset(const SymbolBody &B, uintX_t Addend) const; - uintX_t getBodyEntryOffset(const SymbolBody &B, uintX_t Addend) const; - uintX_t getGlobalDynOffset(const SymbolBody &B) const; + uint64_t getPageEntryOffset(const SymbolBody &B, int64_t Addend) const; + uint64_t getBodyEntryOffset(const SymbolBody &B, int64_t Addend) const; + uint64_t getGlobalDynOffset(const SymbolBody &B) const; // Returns the symbol which corresponds to the first entry of the global part // of GOT on MIPS platform. It is required to fill up MIPS-specific dynamic @@ -121,11 +193,11 @@ public: // Returns offset of TLS part of the MIPS GOT table. This part goes // after 'local' and 'global' entries. - uintX_t getTlsOffset() const; + uint64_t getTlsOffset() const; uint32_t getTlsIndexOff() const { return TlsIndexOff; } - uintX_t getGp() const; + uint64_t getGp() const; private: // MIPS GOT consists of three parts: local, global and tls. Each part @@ -171,9 +243,9 @@ private: uint32_t PageEntriesNum = 0; // Map output sections referenced by MIPS GOT relocations // to the first index of "Page" entries allocated for this section. - llvm::SmallMapVector<const OutputSectionBase *, size_t, 16> PageIndexMap; + llvm::SmallMapVector<const OutputSection *, size_t, 16> PageIndexMap; - typedef std::pair<const SymbolBody *, uintX_t> GotEntry; + typedef std::pair<const SymbolBody *, uint64_t> GotEntry; typedef std::vector<GotEntry> GotEntries; // Map from Symbol-Addend pair to the GOT index. llvm::DenseMap<GotEntry, size_t> EntryIndexMap; @@ -189,13 +261,10 @@ private: std::vector<const SymbolBody *> TlsEntries; uint32_t TlsIndexOff = -1; - uintX_t Size = 0; + uint64_t Size = 0; }; -template <class ELFT> -class GotPltSection final : public SyntheticSection<ELFT> { - typedef typename ELFT::uint uintX_t; - +class GotPltSection final : public SyntheticSection { public: GotPltSection(); void addEntry(SymbolBody &Sym); @@ -207,14 +276,11 @@ private: std::vector<const SymbolBody *> Entries; }; -// The IgotPltSection is a Got associated with the IpltSection for GNU Ifunc +// The IgotPltSection is a Got associated with the PltSection for GNU Ifunc // Symbols that will be relocated by Target->IRelativeRel. // On most Targets the IgotPltSection will immediately follow the GotPltSection // on ARM the IgotPltSection will immediately follow the GotSection. -template <class ELFT> -class IgotPltSection final : public SyntheticSection<ELFT> { - typedef typename ELFT::uint uintX_t; - +class IgotPltSection final : public SyntheticSection { public: IgotPltSection(); void addEntry(SymbolBody &Sym); @@ -226,10 +292,8 @@ private: std::vector<const SymbolBody *> Entries; }; -template <class ELFT> -class StringTableSection final : public SyntheticSection<ELFT> { +class StringTableSection final : public SyntheticSection { public: - typedef typename ELFT::uint uintX_t; StringTableSection(StringRef Name, bool Dynamic); unsigned addString(StringRef S, bool HashIt = true); void writeTo(uint8_t *Buf) override; @@ -239,54 +303,41 @@ public: private: const bool Dynamic; - // ELF string tables start with a NUL byte, so 1. - uintX_t Size = 1; + uint64_t Size = 0; llvm::DenseMap<StringRef, unsigned> StringMap; std::vector<StringRef> Strings; }; -template <class ELFT> class DynamicReloc { - typedef typename ELFT::uint uintX_t; - +class DynamicReloc { public: - DynamicReloc(uint32_t Type, const InputSectionBase<ELFT> *InputSec, - uintX_t OffsetInSec, bool UseSymVA, SymbolBody *Sym, - uintX_t Addend) + DynamicReloc(uint32_t Type, const InputSectionBase *InputSec, + uint64_t OffsetInSec, bool UseSymVA, SymbolBody *Sym, + int64_t Addend) : Type(Type), Sym(Sym), InputSec(InputSec), OffsetInSec(OffsetInSec), UseSymVA(UseSymVA), Addend(Addend) {} - DynamicReloc(uint32_t Type, const OutputSectionBase *OutputSec, - uintX_t OffsetInSec, bool UseSymVA, SymbolBody *Sym, - uintX_t Addend) - : Type(Type), Sym(Sym), OutputSec(OutputSec), OffsetInSec(OffsetInSec), - UseSymVA(UseSymVA), Addend(Addend) {} - - uintX_t getOffset() const; - uintX_t getAddend() const; + uint64_t getOffset() const; + int64_t getAddend() const; uint32_t getSymIndex() const; - const OutputSectionBase *getOutputSec() const { return OutputSec; } - const InputSectionBase<ELFT> *getInputSec() const { return InputSec; } + const InputSectionBase *getInputSec() const { return InputSec; } uint32_t Type; private: SymbolBody *Sym; - const InputSectionBase<ELFT> *InputSec = nullptr; - const OutputSectionBase *OutputSec = nullptr; - uintX_t OffsetInSec; + const InputSectionBase *InputSec = nullptr; + uint64_t OffsetInSec; bool UseSymVA; - uintX_t Addend; + int64_t Addend; }; -template <class ELFT> -class DynamicSection final : public SyntheticSection<ELFT> { +template <class ELFT> class DynamicSection final : public SyntheticSection { typedef typename ELFT::Dyn Elf_Dyn; typedef typename ELFT::Rel Elf_Rel; typedef typename ELFT::Rela Elf_Rela; typedef typename ELFT::Shdr Elf_Shdr; typedef typename ELFT::Sym Elf_Sym; - typedef typename ELFT::uint uintX_t; // The .dynamic section contains information for the dynamic linker. // The section consists of fixed size entries, which consist of @@ -295,49 +346,45 @@ class DynamicSection final : public SyntheticSection<ELFT> { struct Entry { int32_t Tag; union { - OutputSectionBase *OutSec; - InputSection<ELFT> *InSec; + OutputSection *OutSec; + InputSection *InSec; uint64_t Val; const SymbolBody *Sym; }; enum KindT { SecAddr, SecSize, SymAddr, PlainInt, InSecAddr } Kind; - Entry(int32_t Tag, OutputSectionBase *OutSec, KindT Kind = SecAddr) + Entry(int32_t Tag, OutputSection *OutSec, KindT Kind = SecAddr) : Tag(Tag), OutSec(OutSec), Kind(Kind) {} - Entry(int32_t Tag, InputSection<ELFT> *Sec) + Entry(int32_t Tag, InputSection *Sec) : Tag(Tag), InSec(Sec), Kind(InSecAddr) {} Entry(int32_t Tag, uint64_t Val) : Tag(Tag), Val(Val), Kind(PlainInt) {} Entry(int32_t Tag, const SymbolBody *Sym) : Tag(Tag), Sym(Sym), Kind(SymAddr) {} }; - // finalize() fills this vector with the section contents. finalize() - // cannot directly create final section contents because when the - // function is called, symbol or section addresses are not fixed yet. + // finalizeContents() fills this vector with the section contents. std::vector<Entry> Entries; public: DynamicSection(); - void finalize() override; + void finalizeContents() override; void writeTo(uint8_t *Buf) override; size_t getSize() const override { return Size; } private: void addEntries(); void add(Entry E) { Entries.push_back(E); } - uintX_t Size = 0; + uint64_t Size = 0; }; -template <class ELFT> -class RelocationSection final : public SyntheticSection<ELFT> { +template <class ELFT> class RelocationSection final : public SyntheticSection { typedef typename ELFT::Rel Elf_Rel; typedef typename ELFT::Rela Elf_Rela; - typedef typename ELFT::uint uintX_t; public: RelocationSection(StringRef Name, bool Sort); - void addReloc(const DynamicReloc<ELFT> &Reloc); + void addReloc(const DynamicReloc &Reloc); unsigned getRelocOffset(); - void finalize() override; + void finalizeContents() override; void writeTo(uint8_t *Buf) override; bool empty() const override { return Relocs.empty(); } size_t getSize() const override { return Relocs.size() * this->Entsize; } @@ -346,7 +393,7 @@ public: private: bool Sort; size_t NumRelativeRelocs = 0; - std::vector<DynamicReloc<ELFT>> Relocs; + std::vector<DynamicReloc> Relocs; }; struct SymbolTableEntry { @@ -354,125 +401,95 @@ struct SymbolTableEntry { size_t StrTabOffset; }; -template <class ELFT> -class SymbolTableSection final : public SyntheticSection<ELFT> { +template <class ELFT> class SymbolTableSection final : public SyntheticSection { public: - typedef typename ELFT::Shdr Elf_Shdr; typedef typename ELFT::Sym Elf_Sym; - typedef typename ELFT::SymRange Elf_Sym_Range; - typedef typename ELFT::uint uintX_t; - SymbolTableSection(StringTableSection<ELFT> &StrTabSec); - void finalize() override; + SymbolTableSection(StringTableSection &StrTabSec); + + void finalizeContents() override; + void postThunkContents() override; void writeTo(uint8_t *Buf) override; size_t getSize() const override { return getNumSymbols() * sizeof(Elf_Sym); } - void addGlobal(SymbolBody *Body); - void addLocal(SymbolBody *Body); - StringTableSection<ELFT> &getStrTabSec() const { return StrTabSec; } + void addSymbol(SymbolBody *Body); unsigned getNumSymbols() const { return Symbols.size() + 1; } size_t getSymbolIndex(SymbolBody *Body); - ArrayRef<SymbolTableEntry> getSymbols() const { return Symbols; } - static const OutputSectionBase *getOutputSection(SymbolBody *Sym); - private: - void writeLocalSymbols(uint8_t *&Buf); - void writeGlobalSymbols(uint8_t *Buf); - // A vector of symbols and their string table offsets. std::vector<SymbolTableEntry> Symbols; - StringTableSection<ELFT> &StrTabSec; - - unsigned NumLocals = 0; + StringTableSection &StrTabSec; }; // Outputs GNU Hash section. For detailed explanation see: // https://blogs.oracle.com/ali/entry/gnu_hash_elf_sections template <class ELFT> -class GnuHashTableSection final : public SyntheticSection<ELFT> { - typedef typename ELFT::Off Elf_Off; - typedef typename ELFT::Word Elf_Word; - typedef typename ELFT::uint uintX_t; - +class GnuHashTableSection final : public SyntheticSection { public: GnuHashTableSection(); - void finalize() override; + void finalizeContents() override; void writeTo(uint8_t *Buf) override; - size_t getSize() const override { return this->Size; } + size_t getSize() const override { return Size; } // Adds symbols to the hash table. // Sorts the input to satisfy GNU hash section requirements. void addSymbols(std::vector<SymbolTableEntry> &Symbols); private: - static unsigned calcNBuckets(unsigned NumHashed); - static unsigned calcMaskWords(unsigned NumHashed); + size_t getShift2() const { return Config->Is64 ? 6 : 5; } - void writeHeader(uint8_t *&Buf); - void writeBloomFilter(uint8_t *&Buf); + void writeBloomFilter(uint8_t *Buf); void writeHashTable(uint8_t *Buf); - struct SymbolData { + struct Entry { SymbolBody *Body; - size_t STName; + size_t StrTabOffset; uint32_t Hash; }; - std::vector<SymbolData> Symbols; - - unsigned MaskWords; - unsigned NBuckets; - unsigned Shift2; - uintX_t Size = 0; + std::vector<Entry> Symbols; + size_t MaskWords; + size_t NBuckets = 0; + size_t Size = 0; }; -template <class ELFT> -class HashTableSection final : public SyntheticSection<ELFT> { - typedef typename ELFT::Word Elf_Word; - +template <class ELFT> class HashTableSection final : public SyntheticSection { public: HashTableSection(); - void finalize() override; + void finalizeContents() override; void writeTo(uint8_t *Buf) override; - size_t getSize() const override { return this->Size; } + size_t getSize() const override { return Size; } private: size_t Size = 0; }; -template <class ELFT> class PltSection final : public SyntheticSection<ELFT> { +// The PltSection is used for both the Plt and Iplt. The former always has a +// header as its first entry that is used at run-time to resolve lazy binding. +// The latter is used for GNU Ifunc symbols, that will be subject to a +// Target->IRelativeRel. +class PltSection : public SyntheticSection { public: - PltSection(); + PltSection(size_t HeaderSize); void writeTo(uint8_t *Buf) override; size_t getSize() const override; - void addEntry(SymbolBody &Sym); bool empty() const override { return Entries.empty(); } + void addSymbols(); -private: - std::vector<std::pair<const SymbolBody *, unsigned>> Entries; -}; - -// The IpltSection is a variant of Plt for recording entries for GNU Ifunc -// symbols that will be subject to a Target->IRelativeRel -// The IpltSection immediately follows the Plt section in the Output Section -template <class ELFT> class IpltSection final : public SyntheticSection<ELFT> { -public: - IpltSection(); - void writeTo(uint8_t *Buf) override; - size_t getSize() const override; - void addEntry(SymbolBody &Sym); - bool empty() const override { return Entries.empty(); } + template <class ELFT> void addEntry(SymbolBody &Sym); private: + void writeHeader(uint8_t *Buf){}; + void addHeaderSymbols(){}; + unsigned getPltRelocOff() const; std::vector<std::pair<const SymbolBody *, unsigned>> Entries; + // Iplt always has HeaderSize of 0, the Plt HeaderSize is always non-zero + size_t HeaderSize; }; -template <class ELFT> -class GdbIndexSection final : public SyntheticSection<ELFT> { - typedef typename ELFT::uint uintX_t; - +class GdbIndexSection final : public SyntheticSection { const unsigned OffsetTypeSize = 4; const unsigned CuListOffset = 6 * OffsetTypeSize; const unsigned CompilationUnitSize = 16; @@ -481,13 +498,13 @@ class GdbIndexSection final : public SyntheticSection<ELFT> { public: GdbIndexSection(); - void finalize() override; + void finalizeContents() override; void writeTo(uint8_t *Buf) override; size_t getSize() const override; bool empty() const override; // Pairs of [CU Offset, CU length]. - std::vector<std::pair<uintX_t, uintX_t>> CompilationUnits; + std::vector<std::pair<uint64_t, uint64_t>> CompilationUnits; llvm::StringTableBuilder StringPool; @@ -496,11 +513,10 @@ public: // The CU vector portion of the constant pool. std::vector<std::vector<std::pair<uint32_t, uint8_t>>> CuVectors; - std::vector<AddressEntry<ELFT>> AddressArea; + std::vector<AddressEntry> AddressArea; private: - void parseDebugSections(); - void readDwarf(InputSection<ELFT> *I); + void readDwarf(InputSection *Sec); uint32_t CuTypesOffset; uint32_t SymTabOffset; @@ -522,10 +538,7 @@ private: // Detailed info about internals can be found in Ian Lance Taylor's blog: // http://www.airs.com/blog/archives/460 (".eh_frame") // http://www.airs.com/blog/archives/462 (".eh_frame_hdr") -template <class ELFT> -class EhFrameHeader final : public SyntheticSection<ELFT> { - typedef typename ELFT::uint uintX_t; - +template <class ELFT> class EhFrameHeader final : public SyntheticSection { public: EhFrameHeader(); void writeTo(uint8_t *Buf) override; @@ -551,13 +564,13 @@ private: // The section shall contain an array of Elf_Verdef structures, optionally // followed by an array of Elf_Verdaux structures. template <class ELFT> -class VersionDefinitionSection final : public SyntheticSection<ELFT> { +class VersionDefinitionSection final : public SyntheticSection { typedef typename ELFT::Verdef Elf_Verdef; typedef typename ELFT::Verdaux Elf_Verdaux; public: VersionDefinitionSection(); - void finalize() override; + void finalizeContents() override; size_t getSize() const override; void writeTo(uint8_t *Buf) override; @@ -574,12 +587,12 @@ private: // The values 0 and 1 are reserved. All other values are used for versions in // the own object or in any of the dependencies. template <class ELFT> -class VersionTableSection final : public SyntheticSection<ELFT> { +class VersionTableSection final : public SyntheticSection { typedef typename ELFT::Versym Elf_Versym; public: VersionTableSection(); - void finalize() override; + void finalizeContents() override; size_t getSize() const override; void writeTo(uint8_t *Buf) override; bool empty() const override; @@ -590,8 +603,7 @@ public: // Elf_Verneed specifies the version requirements for a single DSO, and contains // a reference to a linked list of Elf_Vernaux data structures which define the // mapping from version identifiers to version names. -template <class ELFT> -class VersionNeedSection final : public SyntheticSection<ELFT> { +template <class ELFT> class VersionNeedSection final : public SyntheticSection { typedef typename ELFT::Verneed Elf_Verneed; typedef typename ELFT::Vernaux Elf_Vernaux; @@ -604,17 +616,40 @@ class VersionNeedSection final : public SyntheticSection<ELFT> { public: VersionNeedSection(); - void addSymbol(SharedSymbol<ELFT> *SS); - void finalize() override; + void addSymbol(SharedSymbol *SS); + void finalizeContents() override; void writeTo(uint8_t *Buf) override; size_t getSize() const override; size_t getNeedNum() const { return Needed.size(); } bool empty() const override; }; +// MergeSyntheticSection is a class that allows us to put mergeable sections +// with different attributes in a single output sections. To do that +// we put them into MergeSyntheticSection synthetic input sections which are +// attached to regular output sections. +class MergeSyntheticSection final : public SyntheticSection { +public: + MergeSyntheticSection(StringRef Name, uint32_t Type, uint64_t Flags, + uint32_t Alignment); + void addSection(MergeInputSection *MS); + void writeTo(uint8_t *Buf) override; + void finalizeContents() override; + bool shouldTailMerge() const; + size_t getSize() const override; + +private: + void finalizeTailMerge(); + void finalizeNoTailMerge(); + + bool Finalized = false; + llvm::StringTableBuilder Builder; + std::vector<MergeInputSection *> Sections; +}; + // .MIPS.abiflags section. template <class ELFT> -class MipsAbiFlagsSection final : public SyntheticSection<ELFT> { +class MipsAbiFlagsSection final : public SyntheticSection { typedef llvm::object::Elf_Mips_ABIFlags<ELFT> Elf_Mips_ABIFlags; public: @@ -629,8 +664,7 @@ private: }; // .MIPS.options section. -template <class ELFT> -class MipsOptionsSection final : public SyntheticSection<ELFT> { +template <class ELFT> class MipsOptionsSection final : public SyntheticSection { typedef llvm::object::Elf_Mips_Options<ELFT> Elf_Mips_Options; typedef llvm::object::Elf_Mips_RegInfo<ELFT> Elf_Mips_RegInfo; @@ -649,8 +683,7 @@ private: }; // MIPS .reginfo section. -template <class ELFT> -class MipsReginfoSection final : public SyntheticSection<ELFT> { +template <class ELFT> class MipsReginfoSection final : public SyntheticSection { typedef llvm::object::Elf_Mips_RegInfo<ELFT> Elf_Mips_RegInfo; public: @@ -668,78 +701,95 @@ private: // of executable file which is pointed to by the DT_MIPS_RLD_MAP entry. // See "Dynamic section" in Chapter 5 in the following document: // ftp://www.linux-mips.org/pub/linux/mips/doc/ABI/mipsabi.pdf -template <class ELFT> class MipsRldMapSection : public SyntheticSection<ELFT> { +class MipsRldMapSection : public SyntheticSection { public: MipsRldMapSection(); - size_t getSize() const override { return sizeof(typename ELFT::uint); } + size_t getSize() const override { return Config->Wordsize; } void writeTo(uint8_t *Buf) override; }; -template <class ELFT> class ARMExidxSentinelSection : public SyntheticSection<ELFT> { +class ARMExidxSentinelSection : public SyntheticSection { public: ARMExidxSentinelSection(); size_t getSize() const override { return 8; } void writeTo(uint8_t *Buf) override; }; -template <class ELFT> InputSection<ELFT> *createCommonSection(); -template <class ELFT> InputSection<ELFT> *createInterpSection(); -template <class ELFT> MergeInputSection<ELFT> *createCommentSection(); +// A container for one or more linker generated thunks. Instances of these +// thunks including ARM interworking and Mips LA25 PI to non-PI thunks. +class ThunkSection : public SyntheticSection { +public: + // ThunkSection in OS, with desired OutSecOff of Off + ThunkSection(OutputSection *OS, uint64_t Off); + + // Add a newly created Thunk to this container: + // Thunk is given offset from start of this InputSection + // Thunk defines a symbol in this InputSection that can be used as target + // of a relocation + void addThunk(Thunk *T); + size_t getSize() const override { return Size; } + void writeTo(uint8_t *Buf) override; + InputSection *getTargetInputSection() const; + +private: + std::vector<const Thunk *> Thunks; + size_t Size = 0; +}; + +template <class ELFT> InputSection *createCommonSection(); +InputSection *createInterpSection(); +template <class ELFT> MergeInputSection *createCommentSection(); +template <class ELFT> +SymbolBody *addSyntheticLocal(StringRef Name, uint8_t Type, uint64_t Value, + uint64_t Size, InputSectionBase *Section); // Linker generated sections which can be used as inputs. -template <class ELFT> struct In { - static InputSection<ELFT> *ARMAttributes; - static BuildIdSection<ELFT> *BuildId; - static InputSection<ELFT> *Common; +struct InX { + static InputSection *ARMAttributes; + static BssSection *Bss; + static BssSection *BssRelRo; + static BuildIdSection *BuildId; + static InputSection *Common; + static StringTableSection *DynStrTab; + static InputSection *Interp; + static GdbIndexSection *GdbIndex; + static GotPltSection *GotPlt; + static IgotPltSection *IgotPlt; + static MipsGotSection *MipsGot; + static MipsRldMapSection *MipsRldMap; + static PltSection *Plt; + static PltSection *Iplt; + static StringTableSection *ShStrTab; + static StringTableSection *StrTab; +}; + +template <class ELFT> struct In : public InX { static DynamicSection<ELFT> *Dynamic; - static StringTableSection<ELFT> *DynStrTab; static SymbolTableSection<ELFT> *DynSymTab; static EhFrameHeader<ELFT> *EhFrameHdr; static GnuHashTableSection<ELFT> *GnuHashTab; - static GdbIndexSection<ELFT> *GdbIndex; static GotSection<ELFT> *Got; - static MipsGotSection<ELFT> *MipsGot; - static GotPltSection<ELFT> *GotPlt; - static IgotPltSection<ELFT> *IgotPlt; + static EhFrameSection<ELFT> *EhFrame; static HashTableSection<ELFT> *HashTab; - static InputSection<ELFT> *Interp; - static MipsRldMapSection<ELFT> *MipsRldMap; - static PltSection<ELFT> *Plt; - static IpltSection<ELFT> *Iplt; static RelocationSection<ELFT> *RelaDyn; static RelocationSection<ELFT> *RelaPlt; static RelocationSection<ELFT> *RelaIplt; - static StringTableSection<ELFT> *ShStrTab; - static StringTableSection<ELFT> *StrTab; static SymbolTableSection<ELFT> *SymTab; static VersionDefinitionSection<ELFT> *VerDef; static VersionTableSection<ELFT> *VerSym; static VersionNeedSection<ELFT> *VerNeed; }; -template <class ELFT> InputSection<ELFT> *In<ELFT>::ARMAttributes; -template <class ELFT> BuildIdSection<ELFT> *In<ELFT>::BuildId; -template <class ELFT> InputSection<ELFT> *In<ELFT>::Common; template <class ELFT> DynamicSection<ELFT> *In<ELFT>::Dynamic; -template <class ELFT> StringTableSection<ELFT> *In<ELFT>::DynStrTab; template <class ELFT> SymbolTableSection<ELFT> *In<ELFT>::DynSymTab; template <class ELFT> EhFrameHeader<ELFT> *In<ELFT>::EhFrameHdr; -template <class ELFT> GdbIndexSection<ELFT> *In<ELFT>::GdbIndex; template <class ELFT> GnuHashTableSection<ELFT> *In<ELFT>::GnuHashTab; template <class ELFT> GotSection<ELFT> *In<ELFT>::Got; -template <class ELFT> MipsGotSection<ELFT> *In<ELFT>::MipsGot; -template <class ELFT> GotPltSection<ELFT> *In<ELFT>::GotPlt; -template <class ELFT> IgotPltSection<ELFT> *In<ELFT>::IgotPlt; +template <class ELFT> EhFrameSection<ELFT> *In<ELFT>::EhFrame; template <class ELFT> HashTableSection<ELFT> *In<ELFT>::HashTab; -template <class ELFT> InputSection<ELFT> *In<ELFT>::Interp; -template <class ELFT> MipsRldMapSection<ELFT> *In<ELFT>::MipsRldMap; -template <class ELFT> PltSection<ELFT> *In<ELFT>::Plt; -template <class ELFT> IpltSection<ELFT> *In<ELFT>::Iplt; template <class ELFT> RelocationSection<ELFT> *In<ELFT>::RelaDyn; template <class ELFT> RelocationSection<ELFT> *In<ELFT>::RelaPlt; template <class ELFT> RelocationSection<ELFT> *In<ELFT>::RelaIplt; -template <class ELFT> StringTableSection<ELFT> *In<ELFT>::ShStrTab; -template <class ELFT> StringTableSection<ELFT> *In<ELFT>::StrTab; template <class ELFT> SymbolTableSection<ELFT> *In<ELFT>::SymTab; template <class ELFT> VersionDefinitionSection<ELFT> *In<ELFT>::VerDef; template <class ELFT> VersionTableSection<ELFT> *In<ELFT>::VerSym; diff --git a/ELF/Target.cpp b/ELF/Target.cpp index 55fcf1734d1f..664dcd1ed44e 100644 --- a/ELF/Target.cpp +++ b/ELF/Target.cpp @@ -45,7 +45,10 @@ using namespace llvm::support::endian; using namespace llvm::ELF; std::string lld::toString(uint32_t Type) { - return getELFRelocationTypeName(elf::Config->EMachine, Type); + StringRef S = getELFRelocationTypeName(elf::Config->EMachine, Type); + if (S == "Unknown") + return ("Unknown (" + Twine(Type) + ")").str(); + return S; } namespace lld { @@ -56,20 +59,20 @@ TargetInfo *Target; static void or32le(uint8_t *P, int32_t V) { write32le(P, read32le(P) | V); } static void or32be(uint8_t *P, int32_t V) { write32be(P, read32be(P) | V); } -template <class ELFT> static std::string getErrorLoc(uint8_t *Loc) { - for (InputSectionData *D : Symtab<ELFT>::X->Sections) { - auto *IS = dyn_cast_or_null<InputSection<ELFT>>(D); +template <class ELFT> static std::string getErrorLoc(const uint8_t *Loc) { + for (InputSectionBase *D : InputSections) { + auto *IS = dyn_cast_or_null<InputSection>(D); if (!IS || !IS->OutSec) continue; - uint8_t *ISLoc = cast<OutputSection<ELFT>>(IS->OutSec)->Loc + IS->OutSecOff; + uint8_t *ISLoc = cast<OutputSection>(IS->OutSec)->Loc + IS->OutSecOff; if (ISLoc <= Loc && Loc < ISLoc + IS->getSize()) - return IS->getLocation(Loc - ISLoc) + ": "; + return IS->template getLocation<ELFT>(Loc - ISLoc) + ": "; } return ""; } -static std::string getErrorLocation(uint8_t *Loc) { +static std::string getErrorLocation(const uint8_t *Loc) { switch (Config->EKind) { case ELF32LEKind: return getErrorLoc<ELF32LE>(Loc); @@ -116,17 +119,17 @@ namespace { class X86TargetInfo final : public TargetInfo { public: X86TargetInfo(); - RelExpr getRelExpr(uint32_t Type, const SymbolBody &S) const override; - uint64_t getImplicitAddend(const uint8_t *Buf, uint32_t Type) const override; + RelExpr getRelExpr(uint32_t Type, const SymbolBody &S, + const uint8_t *Loc) const override; + int64_t getImplicitAddend(const uint8_t *Buf, uint32_t Type) const override; void writeGotPltHeader(uint8_t *Buf) const override; uint32_t getDynRel(uint32_t Type) const override; bool isTlsLocalDynamicRel(uint32_t Type) const override; - bool isTlsGlobalDynamicRel(uint32_t Type) const override; bool isTlsInitialExecRel(uint32_t Type) const override; void writeGotPlt(uint8_t *Buf, const SymbolBody &S) const override; void writeIgotPlt(uint8_t *Buf, const SymbolBody &S) const override; void writePltHeader(uint8_t *Buf) const override; - void writePlt(uint8_t *Buf, uint64_t GotEntryAddr, uint64_t PltEntryAddr, + void writePlt(uint8_t *Buf, uint64_t GotPltEntryAddr, uint64_t PltEntryAddr, int32_t Index, unsigned RelOff) const override; void relocateOne(uint8_t *Loc, uint32_t Type, uint64_t Val) const override; @@ -141,15 +144,15 @@ public: template <class ELFT> class X86_64TargetInfo final : public TargetInfo { public: X86_64TargetInfo(); - RelExpr getRelExpr(uint32_t Type, const SymbolBody &S) const override; + RelExpr getRelExpr(uint32_t Type, const SymbolBody &S, + const uint8_t *Loc) const override; bool isPicRel(uint32_t Type) const override; bool isTlsLocalDynamicRel(uint32_t Type) const override; - bool isTlsGlobalDynamicRel(uint32_t Type) const override; bool isTlsInitialExecRel(uint32_t Type) const override; void writeGotPltHeader(uint8_t *Buf) const override; void writeGotPlt(uint8_t *Buf, const SymbolBody &S) const override; void writePltHeader(uint8_t *Buf) const override; - void writePlt(uint8_t *Buf, uint64_t GotEntryAddr, uint64_t PltEntryAddr, + void writePlt(uint8_t *Buf, uint64_t GotPltEntryAddr, uint64_t PltEntryAddr, int32_t Index, unsigned RelOff) const override; void relocateOne(uint8_t *Loc, uint32_t Type, uint64_t Val) const override; @@ -170,14 +173,16 @@ class PPCTargetInfo final : public TargetInfo { public: PPCTargetInfo(); void relocateOne(uint8_t *Loc, uint32_t Type, uint64_t Val) const override; - RelExpr getRelExpr(uint32_t Type, const SymbolBody &S) const override; + RelExpr getRelExpr(uint32_t Type, const SymbolBody &S, + const uint8_t *Loc) const override; }; class PPC64TargetInfo final : public TargetInfo { public: PPC64TargetInfo(); - RelExpr getRelExpr(uint32_t Type, const SymbolBody &S) const override; - void writePlt(uint8_t *Buf, uint64_t GotEntryAddr, uint64_t PltEntryAddr, + RelExpr getRelExpr(uint32_t Type, const SymbolBody &S, + const uint8_t *Loc) const override; + void writePlt(uint8_t *Buf, uint64_t GotPltEntryAddr, uint64_t PltEntryAddr, int32_t Index, unsigned RelOff) const override; void relocateOne(uint8_t *Loc, uint32_t Type, uint64_t Val) const override; }; @@ -185,12 +190,13 @@ public: class AArch64TargetInfo final : public TargetInfo { public: AArch64TargetInfo(); - RelExpr getRelExpr(uint32_t Type, const SymbolBody &S) const override; + RelExpr getRelExpr(uint32_t Type, const SymbolBody &S, + const uint8_t *Loc) const override; bool isPicRel(uint32_t Type) const override; bool isTlsInitialExecRel(uint32_t Type) const override; void writeGotPlt(uint8_t *Buf, const SymbolBody &S) const override; void writePltHeader(uint8_t *Buf) const override; - void writePlt(uint8_t *Buf, uint64_t GotEntryAddr, uint64_t PltEntryAddr, + void writePlt(uint8_t *Buf, uint64_t GotPltEntryAddr, uint64_t PltEntryAddr, int32_t Index, unsigned RelOff) const override; bool usesOnlyLowPageBits(uint32_t Type) const override; void relocateOne(uint8_t *Loc, uint32_t Type, uint64_t Val) const override; @@ -205,44 +211,47 @@ class AMDGPUTargetInfo final : public TargetInfo { public: AMDGPUTargetInfo(); void relocateOne(uint8_t *Loc, uint32_t Type, uint64_t Val) const override; - RelExpr getRelExpr(uint32_t Type, const SymbolBody &S) const override; + RelExpr getRelExpr(uint32_t Type, const SymbolBody &S, + const uint8_t *Loc) const override; }; class ARMTargetInfo final : public TargetInfo { public: ARMTargetInfo(); - RelExpr getRelExpr(uint32_t Type, const SymbolBody &S) const override; + RelExpr getRelExpr(uint32_t Type, const SymbolBody &S, + const uint8_t *Loc) const override; bool isPicRel(uint32_t Type) const override; uint32_t getDynRel(uint32_t Type) const override; - uint64_t getImplicitAddend(const uint8_t *Buf, uint32_t Type) const override; + int64_t getImplicitAddend(const uint8_t *Buf, uint32_t Type) const override; bool isTlsLocalDynamicRel(uint32_t Type) const override; - bool isTlsGlobalDynamicRel(uint32_t Type) const override; bool isTlsInitialExecRel(uint32_t Type) const override; void writeGotPlt(uint8_t *Buf, const SymbolBody &S) const override; void writeIgotPlt(uint8_t *Buf, const SymbolBody &S) const override; void writePltHeader(uint8_t *Buf) const override; - void writePlt(uint8_t *Buf, uint64_t GotEntryAddr, uint64_t PltEntryAddr, + void writePlt(uint8_t *Buf, uint64_t GotPltEntryAddr, uint64_t PltEntryAddr, int32_t Index, unsigned RelOff) const override; - RelExpr getThunkExpr(RelExpr Expr, uint32_t RelocType, const InputFile &File, - const SymbolBody &S) const override; + void addPltSymbols(InputSectionBase *IS, uint64_t Off) const override; + void addPltHeaderSymbols(InputSectionBase *ISD) const override; + bool needsThunk(RelExpr Expr, uint32_t RelocType, const InputFile *File, + const SymbolBody &S) const override; void relocateOne(uint8_t *Loc, uint32_t Type, uint64_t Val) const override; }; template <class ELFT> class MipsTargetInfo final : public TargetInfo { public: MipsTargetInfo(); - RelExpr getRelExpr(uint32_t Type, const SymbolBody &S) const override; - uint64_t getImplicitAddend(const uint8_t *Buf, uint32_t Type) const override; + RelExpr getRelExpr(uint32_t Type, const SymbolBody &S, + const uint8_t *Loc) const override; + int64_t getImplicitAddend(const uint8_t *Buf, uint32_t Type) const override; bool isPicRel(uint32_t Type) const override; uint32_t getDynRel(uint32_t Type) const override; bool isTlsLocalDynamicRel(uint32_t Type) const override; - bool isTlsGlobalDynamicRel(uint32_t Type) const override; void writeGotPlt(uint8_t *Buf, const SymbolBody &S) const override; void writePltHeader(uint8_t *Buf) const override; - void writePlt(uint8_t *Buf, uint64_t GotEntryAddr, uint64_t PltEntryAddr, + void writePlt(uint8_t *Buf, uint64_t GotPltEntryAddr, uint64_t PltEntryAddr, int32_t Index, unsigned RelOff) const override; - RelExpr getThunkExpr(RelExpr Expr, uint32_t RelocType, const InputFile &File, - const SymbolBody &S) const override; + bool needsThunk(RelExpr Expr, uint32_t RelocType, const InputFile *File, + const SymbolBody &S) const override; void relocateOne(uint8_t *Loc, uint32_t Type, uint64_t Val) const override; bool usesOnlyLowPageBits(uint32_t Type) const override; }; @@ -286,25 +295,21 @@ TargetInfo *createTarget() { TargetInfo::~TargetInfo() {} -uint64_t TargetInfo::getImplicitAddend(const uint8_t *Buf, - uint32_t Type) const { +int64_t TargetInfo::getImplicitAddend(const uint8_t *Buf, uint32_t Type) const { return 0; } bool TargetInfo::usesOnlyLowPageBits(uint32_t Type) const { return false; } -RelExpr TargetInfo::getThunkExpr(RelExpr Expr, uint32_t RelocType, - const InputFile &File, - const SymbolBody &S) const { - return Expr; +bool TargetInfo::needsThunk(RelExpr Expr, uint32_t RelocType, + const InputFile *File, const SymbolBody &S) const { + return false; } bool TargetInfo::isTlsInitialExecRel(uint32_t Type) const { return false; } bool TargetInfo::isTlsLocalDynamicRel(uint32_t Type) const { return false; } -bool TargetInfo::isTlsGlobalDynamicRel(uint32_t Type) const { return false; } - void TargetInfo::writeIgotPlt(uint8_t *Buf, const SymbolBody &S) const { writeGotPlt(Buf, S); } @@ -352,10 +357,14 @@ X86TargetInfo::X86TargetInfo() { PltEntrySize = 16; PltHeaderSize = 16; TlsGdRelaxSkip = 2; + // 0xCC is the "int3" (call debug exception handler) instruction. + TrapInstr = 0xcccccccc; } -RelExpr X86TargetInfo::getRelExpr(uint32_t Type, const SymbolBody &S) const { +RelExpr X86TargetInfo::getRelExpr(uint32_t Type, const SymbolBody &S, + const uint8_t *Loc) const { switch (Type) { + case R_386_8: case R_386_16: case R_386_32: case R_386_TLS_LDO_32: @@ -366,6 +375,7 @@ RelExpr X86TargetInfo::getRelExpr(uint32_t Type, const SymbolBody &S) const { return R_TLSLD; case R_386_PLT32: return R_PLT_PC; + case R_386_PC8: case R_386_PC16: case R_386_PC32: return R_PC; @@ -375,6 +385,24 @@ RelExpr X86TargetInfo::getRelExpr(uint32_t Type, const SymbolBody &S) const { return R_GOT; case R_386_GOT32: case R_386_GOT32X: + // These relocations can be calculated in two different ways. + // Usual calculation is G + A - GOT what means an offset in GOT table + // (R_GOT_FROM_END). When instruction pointed by relocation has no base + // register, then relocations can be used when PIC code is disabled. In that + // case calculation is G + A, it resolves to an address of entry in GOT + // (R_GOT) and not an offset. + // + // To check that instruction has no base register we scan ModR/M byte. + // See "Table 2-2. 32-Bit Addressing Forms with the ModR/M Byte" + // (http://www.intel.com/content/dam/www/public/us/en/documents/manuals/ + // 64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf) + if ((Loc[-1] & 0xc7) != 0x5) + return R_GOT_FROM_END; + if (Config->Pic) + error(toString(S.File) + ": relocation " + toString(Type) + " against '" + + S.getName() + + "' without base register can not be used when PIC enabled"); + return R_GOT; case R_386_TLS_GOTIE: return R_GOT_FROM_END; case R_386_GOTOFF: @@ -384,10 +412,9 @@ RelExpr X86TargetInfo::getRelExpr(uint32_t Type, const SymbolBody &S) const { case R_386_TLS_LE_32: return R_NEG_TLS; case R_386_NONE: - return R_HINT; + return R_NONE; default: - error("do not know how to handle relocation '" + toString(Type) + "' (" + - Twine(Type) + ")"); + error(toString(S.File) + ": unknown relocation type: " + toString(Type)); return R_HINT; } } @@ -411,12 +438,12 @@ void X86TargetInfo::writeGotPltHeader(uint8_t *Buf) const { void X86TargetInfo::writeGotPlt(uint8_t *Buf, const SymbolBody &S) const { // Entries in .got.plt initially points back to the corresponding // PLT entries with a fixed offset to skip the first instruction. - write32le(Buf, S.getPltVA<ELF32LE>() + 6); + write32le(Buf, S.getPltVA() + 6); } void X86TargetInfo::writeIgotPlt(uint8_t *Buf, const SymbolBody &S) const { // An x86 entry is the address of the ifunc resolver function. - write32le(Buf, S.getVA<ELF32LE>()); + write32le(Buf, S.getVA()); } uint32_t X86TargetInfo::getDynRel(uint32_t Type) const { @@ -427,10 +454,6 @@ uint32_t X86TargetInfo::getDynRel(uint32_t Type) const { return Type; } -bool X86TargetInfo::isTlsGlobalDynamicRel(uint32_t Type) const { - return Type == R_386_TLS_GD; -} - bool X86TargetInfo::isTlsLocalDynamicRel(uint32_t Type) const { return Type == R_386_TLS_LDO_32 || Type == R_386_TLS_LDM; } @@ -440,30 +463,33 @@ bool X86TargetInfo::isTlsInitialExecRel(uint32_t Type) const { } void X86TargetInfo::writePltHeader(uint8_t *Buf) const { - // Executable files and shared object files have - // separate procedure linkage tables. if (Config->Pic) { const uint8_t V[] = { - 0xff, 0xb3, 0x04, 0x00, 0x00, 0x00, // pushl 4(%ebx) - 0xff, 0xa3, 0x08, 0x00, 0x00, 0x00, // jmp *8(%ebx) - 0x90, 0x90, 0x90, 0x90 // nop; nop; nop; nop + 0xff, 0xb3, 0x04, 0x00, 0x00, 0x00, // pushl GOTPLT+4(%ebx) + 0xff, 0xa3, 0x08, 0x00, 0x00, 0x00, // jmp *GOTPLT+8(%ebx) + 0x90, 0x90, 0x90, 0x90 // nop }; memcpy(Buf, V, sizeof(V)); + + uint32_t Ebx = In<ELF32LE>::Got->getVA() + In<ELF32LE>::Got->getSize(); + uint32_t GotPlt = In<ELF32LE>::GotPlt->getVA() - Ebx; + write32le(Buf + 2, GotPlt + 4); + write32le(Buf + 8, GotPlt + 8); return; } const uint8_t PltData[] = { - 0xff, 0x35, 0x00, 0x00, 0x00, 0x00, // pushl (GOT+4) - 0xff, 0x25, 0x00, 0x00, 0x00, 0x00, // jmp *(GOT+8) - 0x90, 0x90, 0x90, 0x90 // nop; nop; nop; nop + 0xff, 0x35, 0x00, 0x00, 0x00, 0x00, // pushl (GOTPLT+4) + 0xff, 0x25, 0x00, 0x00, 0x00, 0x00, // jmp *(GOTPLT+8) + 0x90, 0x90, 0x90, 0x90 // nop }; memcpy(Buf, PltData, sizeof(PltData)); - uint32_t Got = In<ELF32LE>::GotPlt->getVA(); - write32le(Buf + 2, Got + 4); - write32le(Buf + 8, Got + 8); + uint32_t GotPlt = In<ELF32LE>::GotPlt->getVA(); + write32le(Buf + 2, GotPlt + 4); + write32le(Buf + 8, GotPlt + 8); } -void X86TargetInfo::writePlt(uint8_t *Buf, uint64_t GotEntryAddr, +void X86TargetInfo::writePlt(uint8_t *Buf, uint64_t GotPltEntryAddr, uint64_t PltEntryAddr, int32_t Index, unsigned RelOff) const { const uint8_t Inst[] = { @@ -473,22 +499,32 @@ void X86TargetInfo::writePlt(uint8_t *Buf, uint64_t GotEntryAddr, }; memcpy(Buf, Inst, sizeof(Inst)); - // jmp *foo@GOT(%ebx) or jmp *foo_in_GOT - Buf[1] = Config->Pic ? 0xa3 : 0x25; - uint32_t Got = In<ELF32LE>::GotPlt->getVA(); - write32le(Buf + 2, Config->Shared ? GotEntryAddr - Got : GotEntryAddr); + if (Config->Pic) { + // jmp *foo@GOT(%ebx) + uint32_t Ebx = In<ELF32LE>::Got->getVA() + In<ELF32LE>::Got->getSize(); + Buf[1] = 0xa3; + write32le(Buf + 2, GotPltEntryAddr - Ebx); + } else { + // jmp *foo_in_GOT + Buf[1] = 0x25; + write32le(Buf + 2, GotPltEntryAddr); + } + write32le(Buf + 7, RelOff); write32le(Buf + 12, -Index * PltEntrySize - PltHeaderSize - 16); } -uint64_t X86TargetInfo::getImplicitAddend(const uint8_t *Buf, - uint32_t Type) const { +int64_t X86TargetInfo::getImplicitAddend(const uint8_t *Buf, + uint32_t Type) const { switch (Type) { default: return 0; + case R_386_8: + case R_386_PC8: + return SignExtend64<8>(*Buf); case R_386_16: case R_386_PC16: - return read16le(Buf); + return SignExtend64<16>(read16le(Buf)); case R_386_32: case R_386_GOT32: case R_386_GOT32X: @@ -497,21 +533,36 @@ uint64_t X86TargetInfo::getImplicitAddend(const uint8_t *Buf, case R_386_PC32: case R_386_PLT32: case R_386_TLS_LE: - return read32le(Buf); + return SignExtend64<32>(read32le(Buf)); } } void X86TargetInfo::relocateOne(uint8_t *Loc, uint32_t Type, uint64_t Val) const { - checkInt<32>(Loc, Val, Type); - - // R_386_PC16 and R_386_16 are not part of the current i386 psABI. They are - // used by 16-bit x86 objects, like boot loaders. - if (Type == R_386_16 || Type == R_386_PC16) { + // R_386_{PC,}{8,16} are not part of the i386 psABI, but they are + // being used for some 16-bit programs such as boot loaders, so + // we want to support them. + switch (Type) { + case R_386_8: + checkUInt<8>(Loc, Val, Type); + *Loc = Val; + break; + case R_386_PC8: + checkInt<8>(Loc, Val, Type); + *Loc = Val; + break; + case R_386_16: + checkUInt<16>(Loc, Val, Type); write16le(Loc, Val); - return; + break; + case R_386_PC16: + checkInt<16>(Loc, Val, Type); + write16le(Loc, Val); + break; + default: + checkInt<32>(Loc, Val, Type); + write32le(Loc, Val); } - write32le(Loc, Val); } void X86TargetInfo::relaxTlsGdToLe(uint8_t *Loc, uint32_t Type, @@ -527,7 +578,7 @@ void X86TargetInfo::relaxTlsGdToLe(uint8_t *Loc, uint32_t Type, 0x81, 0xe8, 0x00, 0x00, 0x00, 0x00 // subl 0(%ebx), %eax }; memcpy(Loc - 3, Inst, sizeof(Inst)); - relocateOne(Loc + 5, R_386_32, Val); + write32le(Loc + 5, Val); } void X86TargetInfo::relaxTlsGdToIe(uint8_t *Loc, uint32_t Type, @@ -543,7 +594,7 @@ void X86TargetInfo::relaxTlsGdToIe(uint8_t *Loc, uint32_t Type, 0x03, 0x83, 0x00, 0x00, 0x00, 0x00 // addl 0(%ebx), %eax }; memcpy(Loc - 3, Inst, sizeof(Inst)); - relocateOne(Loc + 5, R_386_32, Val); + write32le(Loc + 5, Val); } // In some conditions, relocations can be optimized to avoid using GOT. @@ -583,13 +634,13 @@ void X86TargetInfo::relaxTlsIeToLe(uint8_t *Loc, uint32_t Type, Loc[-1] = 0x80 | (Reg << 3) | Reg; } } - relocateOne(Loc, R_386_TLS_LE, Val); + write32le(Loc, Val); } void X86TargetInfo::relaxTlsLdToLe(uint8_t *Loc, uint32_t Type, uint64_t Val) const { if (Type == R_386_TLS_LDO_32) { - relocateOne(Loc, R_386_TLS_LE, Val); + write32le(Loc, Val); return; } @@ -625,12 +676,16 @@ template <class ELFT> X86_64TargetInfo<ELFT>::X86_64TargetInfo() { // Align to the large page size (known as a superpage or huge page). // FreeBSD automatically promotes large, superpage-aligned allocations. DefaultImageBase = 0x200000; + // 0xCC is the "int3" (call debug exception handler) instruction. + TrapInstr = 0xcccccccc; } template <class ELFT> -RelExpr X86_64TargetInfo<ELFT>::getRelExpr(uint32_t Type, - const SymbolBody &S) const { +RelExpr X86_64TargetInfo<ELFT>::getRelExpr(uint32_t Type, const SymbolBody &S, + const uint8_t *Loc) const { switch (Type) { + case R_X86_64_8: + case R_X86_64_16: case R_X86_64_32: case R_X86_64_32S: case R_X86_64_64: @@ -660,10 +715,9 @@ RelExpr X86_64TargetInfo<ELFT>::getRelExpr(uint32_t Type, case R_X86_64_GOTTPOFF: return R_GOT_PC; case R_X86_64_NONE: - return R_HINT; + return R_NONE; default: - error("do not know how to handle relocation '" + toString(Type) + "' (" + - Twine(Type) + ")"); + error(toString(S.File) + ": unknown relocation type: " + toString(Type)); return R_HINT; } } @@ -681,25 +735,25 @@ template <class ELFT> void X86_64TargetInfo<ELFT>::writeGotPlt(uint8_t *Buf, const SymbolBody &S) const { // See comments in X86TargetInfo::writeGotPlt. - write32le(Buf, S.getPltVA<ELFT>() + 6); + write32le(Buf, S.getPltVA() + 6); } template <class ELFT> void X86_64TargetInfo<ELFT>::writePltHeader(uint8_t *Buf) const { const uint8_t PltData[] = { - 0xff, 0x35, 0x00, 0x00, 0x00, 0x00, // pushq GOT+8(%rip) - 0xff, 0x25, 0x00, 0x00, 0x00, 0x00, // jmp *GOT+16(%rip) - 0x0f, 0x1f, 0x40, 0x00 // nopl 0x0(rax) + 0xff, 0x35, 0x00, 0x00, 0x00, 0x00, // pushq GOTPLT+8(%rip) + 0xff, 0x25, 0x00, 0x00, 0x00, 0x00, // jmp *GOTPLT+16(%rip) + 0x0f, 0x1f, 0x40, 0x00 // nop }; memcpy(Buf, PltData, sizeof(PltData)); - uint64_t Got = In<ELFT>::GotPlt->getVA(); + uint64_t GotPlt = In<ELFT>::GotPlt->getVA(); uint64_t Plt = In<ELFT>::Plt->getVA(); - write32le(Buf + 2, Got - Plt + 2); // GOT+8 - write32le(Buf + 8, Got - Plt + 4); // GOT+16 + write32le(Buf + 2, GotPlt - Plt + 2); // GOTPLT+8 + write32le(Buf + 8, GotPlt - Plt + 4); // GOTPLT+16 } template <class ELFT> -void X86_64TargetInfo<ELFT>::writePlt(uint8_t *Buf, uint64_t GotEntryAddr, +void X86_64TargetInfo<ELFT>::writePlt(uint8_t *Buf, uint64_t GotPltEntryAddr, uint64_t PltEntryAddr, int32_t Index, unsigned RelOff) const { const uint8_t Inst[] = { @@ -709,7 +763,7 @@ void X86_64TargetInfo<ELFT>::writePlt(uint8_t *Buf, uint64_t GotEntryAddr, }; memcpy(Buf, Inst, sizeof(Inst)); - write32le(Buf + 2, GotEntryAddr - PltEntryAddr - 6); + write32le(Buf + 2, GotPltEntryAddr - PltEntryAddr - 6); write32le(Buf + 7, Index); write32le(Buf + 12, -Index * PltEntrySize - PltHeaderSize - 16); } @@ -725,11 +779,6 @@ bool X86_64TargetInfo<ELFT>::isTlsInitialExecRel(uint32_t Type) const { } template <class ELFT> -bool X86_64TargetInfo<ELFT>::isTlsGlobalDynamicRel(uint32_t Type) const { - return Type == R_X86_64_TLSGD; -} - -template <class ELFT> bool X86_64TargetInfo<ELFT>::isTlsLocalDynamicRel(uint32_t Type) const { return Type == R_X86_64_DTPOFF32 || Type == R_X86_64_DTPOFF64 || Type == R_X86_64_TLSLD; @@ -752,9 +801,10 @@ void X86_64TargetInfo<ELFT>::relaxTlsGdToLe(uint8_t *Loc, uint32_t Type, 0x48, 0x8d, 0x80, 0x00, 0x00, 0x00, 0x00 // lea x@tpoff,%rax }; memcpy(Loc - 4, Inst, sizeof(Inst)); + // The original code used a pc relative relocation and so we have to // compensate for the -4 in had in the addend. - relocateOne(Loc + 8, R_X86_64_TPOFF32, Val + 4); + write32le(Loc + 8, Val + 4); } template <class ELFT> @@ -774,9 +824,10 @@ void X86_64TargetInfo<ELFT>::relaxTlsGdToIe(uint8_t *Loc, uint32_t Type, 0x48, 0x03, 0x05, 0x00, 0x00, 0x00, 0x00 // addq x@tpoff,%rax }; memcpy(Loc - 4, Inst, sizeof(Inst)); + // Both code sequences are PC relatives, but since we are moving the constant // forward by 8 bytes we have to subtract the value by 8. - relocateOne(Loc + 8, R_X86_64_PC32, Val - 8); + write32le(Loc + 8, Val - 8); } // In some conditions, R_X86_64_GOTTPOFF relocation can be optimized to @@ -821,7 +872,7 @@ void X86_64TargetInfo<ELFT>::relaxTlsIeToLe(uint8_t *Loc, uint32_t Type, // The original code used a PC relative relocation. // Need to compensate for the -4 it had in the addend. - relocateOne(Loc, R_X86_64_TPOFF32, Val + 4); + write32le(Loc, Val + 4); } template <class ELFT> @@ -841,7 +892,7 @@ void X86_64TargetInfo<ELFT>::relaxTlsLdToLe(uint8_t *Loc, uint32_t Type, return; } if (Type == R_X86_64_DTPOFF32) { - relocateOne(Loc, R_X86_64_TPOFF32, Val); + write32le(Loc, Val); return; } @@ -857,6 +908,14 @@ template <class ELFT> void X86_64TargetInfo<ELFT>::relocateOne(uint8_t *Loc, uint32_t Type, uint64_t Val) const { switch (Type) { + case R_X86_64_8: + checkUInt<8>(Loc, Val, Type); + *Loc = Val; + break; + case R_X86_64_16: + checkUInt<16>(Loc, Val, Type); + write16le(Loc, Val); + break; case R_X86_64_32: checkUInt<32>(Loc, Val, Type); write32le(Loc, Val); @@ -898,12 +957,14 @@ RelExpr X86_64TargetInfo<ELFT>::adjustRelaxExpr(uint32_t Type, return RelExpr; const uint8_t Op = Data[-2]; const uint8_t ModRm = Data[-1]; + // FIXME: When PIC is disabled and foo is defined locally in the // lower 32 bit address space, memory operand in mov can be converted into // immediate operand. Otherwise, mov must be changed to lea. We support only // latter relaxation at this moment. if (Op == 0x8b) return R_RELAX_GOT_PC; + // Relax call and jmp. if (Op == 0xff && (ModRm == 0x15 || ModRm == 0x25)) return R_RELAX_GOT_PC; @@ -961,7 +1022,7 @@ void X86_64TargetInfo<ELFT>::relaxGotNoPic(uint8_t *Loc, uint64_t Val, // SIB.base field. // See "2.2.1.2 More on REX Prefix Fields " (2-8 Vol. 2A). Loc[-3] = (Rex & ~0x4) | (Rex & 0x4) >> 2; - relocateOne(Loc, R_X86_64_PC32, Val); + write32le(Loc, Val); return; } @@ -982,7 +1043,7 @@ void X86_64TargetInfo<ELFT>::relaxGotNoPic(uint8_t *Loc, uint64_t Val, // descriptions about each operation. Loc[-2] = 0x81; Loc[-3] = (Rex & ~0x4) | (Rex & 0x4) >> 2; - relocateOne(Loc, R_X86_64_PC32, Val); + write32le(Loc, Val); } template <class ELFT> @@ -993,7 +1054,7 @@ void X86_64TargetInfo<ELFT>::relaxGot(uint8_t *Loc, uint64_t Val) const { // Convert "mov foo@GOTPCREL(%rip),%reg" to "lea foo(%rip),%reg". if (Op == 0x8b) { Loc[-2] = 0x8d; - relocateOne(Loc, R_X86_64_PC32, Val); + write32le(Loc, Val); return; } @@ -1012,7 +1073,7 @@ void X86_64TargetInfo<ELFT>::relaxGot(uint8_t *Loc, uint64_t Val) const { // prefix. That makes result expression to be a single instruction. Loc[-2] = 0x67; // addr32 prefix Loc[-1] = 0xe8; // call - relocateOne(Loc, R_X86_64_PC32, Val); + write32le(Loc, Val); return; } @@ -1021,7 +1082,7 @@ void X86_64TargetInfo<ELFT>::relaxGot(uint8_t *Loc, uint64_t Val) const { assert(ModRm == 0x25); Loc[-2] = 0xe9; // jmp Loc[3] = 0x90; // nop - relocateOne(Loc - 1, R_X86_64_PC32, Val + 1); + write32le(Loc - 1, Val + 1); } // Relocation masks following the #lo(value), #hi(value), #ha(value), @@ -1059,7 +1120,8 @@ void PPCTargetInfo::relocateOne(uint8_t *Loc, uint32_t Type, } } -RelExpr PPCTargetInfo::getRelExpr(uint32_t Type, const SymbolBody &S) const { +RelExpr PPCTargetInfo::getRelExpr(uint32_t Type, const SymbolBody &S, + const uint8_t *Loc) const { switch (Type) { case R_PPC_REL24: case R_PPC_REL32: @@ -1108,7 +1170,8 @@ uint64_t getPPC64TocBase() { return TocVA + PPC64TocOffset; } -RelExpr PPC64TargetInfo::getRelExpr(uint32_t Type, const SymbolBody &S) const { +RelExpr PPC64TargetInfo::getRelExpr(uint32_t Type, const SymbolBody &S, + const uint8_t *Loc) const { switch (Type) { default: return R_ABS; @@ -1126,10 +1189,10 @@ RelExpr PPC64TargetInfo::getRelExpr(uint32_t Type, const SymbolBody &S) const { } } -void PPC64TargetInfo::writePlt(uint8_t *Buf, uint64_t GotEntryAddr, +void PPC64TargetInfo::writePlt(uint8_t *Buf, uint64_t GotPltEntryAddr, uint64_t PltEntryAddr, int32_t Index, unsigned RelOff) const { - uint64_t Off = GotEntryAddr - getPPC64TocBase(); + uint64_t Off = GotPltEntryAddr - getPPC64TocBase(); // FIXME: What we should do, in theory, is get the offset of the function // descriptor in the .opd section, and use that as the offset from %r2 (the @@ -1256,8 +1319,8 @@ AArch64TargetInfo::AArch64TargetInfo() { TcbSize = 16; } -RelExpr AArch64TargetInfo::getRelExpr(uint32_t Type, - const SymbolBody &S) const { +RelExpr AArch64TargetInfo::getRelExpr(uint32_t Type, const SymbolBody &S, + const uint8_t *Loc) const { switch (Type) { default: return R_ABS; @@ -1289,6 +1352,8 @@ RelExpr AArch64TargetInfo::getRelExpr(uint32_t Type, case R_AARCH64_ADR_GOT_PAGE: case R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21: return R_GOT_PAGE_PC; + case R_AARCH64_NONE: + return R_NONE; } } @@ -1361,7 +1426,7 @@ void AArch64TargetInfo::writePltHeader(uint8_t *Buf) const { relocateOne(Buf + 12, R_AARCH64_ADD_ABS_LO12_NC, Got + 16); } -void AArch64TargetInfo::writePlt(uint8_t *Buf, uint64_t GotEntryAddr, +void AArch64TargetInfo::writePlt(uint8_t *Buf, uint64_t GotPltEntryAddr, uint64_t PltEntryAddr, int32_t Index, unsigned RelOff) const { const uint8_t Inst[] = { @@ -1373,9 +1438,9 @@ void AArch64TargetInfo::writePlt(uint8_t *Buf, uint64_t GotEntryAddr, memcpy(Buf, Inst, sizeof(Inst)); relocateOne(Buf, R_AARCH64_ADR_PREL_PG_HI21, - getAArch64Page(GotEntryAddr) - getAArch64Page(PltEntryAddr)); - relocateOne(Buf + 4, R_AARCH64_LDST64_ABS_LO12_NC, GotEntryAddr); - relocateOne(Buf + 8, R_AARCH64_ADD_ABS_LO12_NC, GotEntryAddr); + getAArch64Page(GotPltEntryAddr) - getAArch64Page(PltEntryAddr)); + relocateOne(Buf + 4, R_AARCH64_LDST64_ABS_LO12_NC, GotPltEntryAddr); + relocateOne(Buf + 8, R_AARCH64_ADD_ABS_LO12_NC, GotPltEntryAddr); } static void write32AArch64Addr(uint8_t *L, uint64_t Imm) { @@ -1598,7 +1663,8 @@ void AMDGPUTargetInfo::relocateOne(uint8_t *Loc, uint32_t Type, } } -RelExpr AMDGPUTargetInfo::getRelExpr(uint32_t Type, const SymbolBody &S) const { +RelExpr AMDGPUTargetInfo::getRelExpr(uint32_t Type, const SymbolBody &S, + const uint8_t *Loc) const { switch (Type) { case R_AMDGPU_ABS32: case R_AMDGPU_ABS64: @@ -1612,7 +1678,8 @@ RelExpr AMDGPUTargetInfo::getRelExpr(uint32_t Type, const SymbolBody &S) const { case R_AMDGPU_GOTPCREL32_HI: return R_GOT_PC; default: - fatal("do not know how to handle relocation " + Twine(Type)); + error(toString(S.File) + ": unknown relocation type: " + toString(Type)); + return R_HINT; } } @@ -1634,7 +1701,8 @@ ARMTargetInfo::ARMTargetInfo() { NeedsThunks = true; } -RelExpr ARMTargetInfo::getRelExpr(uint32_t Type, const SymbolBody &S) const { +RelExpr ARMTargetInfo::getRelExpr(uint32_t Type, const SymbolBody &S, + const uint8_t *Loc) const { switch (Type) { default: return R_ABS; @@ -1683,7 +1751,7 @@ RelExpr ARMTargetInfo::getRelExpr(uint32_t Type, const SymbolBody &S) const { case R_ARM_THM_MOVT_PREL: return R_PC; case R_ARM_NONE: - return R_HINT; + return R_NONE; case R_ARM_TLS_LE32: return R_TLS; } @@ -1709,7 +1777,7 @@ void ARMTargetInfo::writeGotPlt(uint8_t *Buf, const SymbolBody &) const { void ARMTargetInfo::writeIgotPlt(uint8_t *Buf, const SymbolBody &S) const { // An ARM entry is the address of the ifunc resolver function. - write32le(Buf, S.getVA<ELF32LE>()); + write32le(Buf, S.getVA()); } void ARMTargetInfo::writePltHeader(uint8_t *Buf) const { @@ -1726,7 +1794,13 @@ void ARMTargetInfo::writePltHeader(uint8_t *Buf) const { write32le(Buf + 16, GotPlt - L1 - 8); } -void ARMTargetInfo::writePlt(uint8_t *Buf, uint64_t GotEntryAddr, +void ARMTargetInfo::addPltHeaderSymbols(InputSectionBase *ISD) const { + auto *IS = cast<InputSection>(ISD); + addSyntheticLocal<ELF32LE>("$a", STT_NOTYPE, 0, 0, IS); + addSyntheticLocal<ELF32LE>("$d", STT_NOTYPE, 16, 0, IS); +} + +void ARMTargetInfo::writePlt(uint8_t *Buf, uint64_t GotPltEntryAddr, uint64_t PltEntryAddr, int32_t Index, unsigned RelOff) const { // FIXME: Using simple code sequence with simple relocations. @@ -1740,18 +1814,24 @@ void ARMTargetInfo::writePlt(uint8_t *Buf, uint64_t GotEntryAddr, }; memcpy(Buf, PltData, sizeof(PltData)); uint64_t L1 = PltEntryAddr + 4; - write32le(Buf + 12, GotEntryAddr - L1 - 8); + write32le(Buf + 12, GotPltEntryAddr - L1 - 8); +} + +void ARMTargetInfo::addPltSymbols(InputSectionBase *ISD, uint64_t Off) const { + auto *IS = cast<InputSection>(ISD); + addSyntheticLocal<ELF32LE>("$a", STT_NOTYPE, Off, 0, IS); + addSyntheticLocal<ELF32LE>("$d", STT_NOTYPE, Off + 12, 0, IS); } -RelExpr ARMTargetInfo::getThunkExpr(RelExpr Expr, uint32_t RelocType, - const InputFile &File, - const SymbolBody &S) const { +bool ARMTargetInfo::needsThunk(RelExpr Expr, uint32_t RelocType, + const InputFile *File, + const SymbolBody &S) const { // If S is an undefined weak symbol in an executable we don't need a Thunk. // In a DSO calls to undefined symbols, including weak ones get PLT entries // which may need a thunk. - if (S.isUndefined() && !S.isLocal() && S.symbol()->isWeak() - && !Config->Shared) - return Expr; + if (S.isUndefined() && !S.isLocal() && S.symbol()->isWeak() && + !Config->Shared) + return false; // A state change from ARM to Thumb and vice versa must go through an // interworking thunk if the relocation type is not R_ARM_CALL or // R_ARM_THM_CALL. @@ -1761,20 +1841,18 @@ RelExpr ARMTargetInfo::getThunkExpr(RelExpr Expr, uint32_t RelocType, case R_ARM_JUMP24: // Source is ARM, all PLT entries are ARM so no interworking required. // Otherwise we need to interwork if Symbol has bit 0 set (Thumb). - if (Expr == R_PC && ((S.getVA<ELF32LE>() & 1) == 1)) - return R_THUNK_PC; + if (Expr == R_PC && ((S.getVA() & 1) == 1)) + return true; break; case R_ARM_THM_JUMP19: case R_ARM_THM_JUMP24: // Source is Thumb, all PLT entries are ARM so interworking is required. // Otherwise we need to interwork if Symbol has bit 0 clear (ARM). - if (Expr == R_PLT_PC) - return R_THUNK_PLT_PC; - if ((S.getVA<ELF32LE>() & 1) == 0) - return R_THUNK_PC; + if (Expr == R_PLT_PC || ((S.getVA() & 1) == 0)) + return true; break; } - return Expr; + return false; } void ARMTargetInfo::relocateOne(uint8_t *Loc, uint32_t Type, @@ -1796,6 +1874,7 @@ void ARMTargetInfo::relocateOne(uint8_t *Loc, uint32_t Type, case R_ARM_TLS_LDO32: case R_ARM_TLS_LE32: case R_ARM_TLS_TPOFF32: + case R_ARM_TLS_DTPOFF32: write32le(Loc, Val); break; case R_ARM_TLS_DTPMOD32: @@ -1911,8 +1990,8 @@ void ARMTargetInfo::relocateOne(uint8_t *Loc, uint32_t Type, } } -uint64_t ARMTargetInfo::getImplicitAddend(const uint8_t *Buf, - uint32_t Type) const { +int64_t ARMTargetInfo::getImplicitAddend(const uint8_t *Buf, + uint32_t Type) const { switch (Type) { default: return 0; @@ -1990,10 +2069,6 @@ bool ARMTargetInfo::isTlsLocalDynamicRel(uint32_t Type) const { return Type == R_ARM_TLS_LDO32 || Type == R_ARM_TLS_LDM32; } -bool ARMTargetInfo::isTlsGlobalDynamicRel(uint32_t Type) const { - return Type == R_ARM_TLS_GD32; -} - bool ARMTargetInfo::isTlsInitialExecRel(uint32_t Type) const { return Type == R_ARM_TLS_IE32; } @@ -2022,8 +2097,8 @@ template <class ELFT> MipsTargetInfo<ELFT>::MipsTargetInfo() { } template <class ELFT> -RelExpr MipsTargetInfo<ELFT>::getRelExpr(uint32_t Type, - const SymbolBody &S) const { +RelExpr MipsTargetInfo<ELFT>::getRelExpr(uint32_t Type, const SymbolBody &S, + const uint8_t *Loc) const { // See comment in the calculateMipsRelChain. if (ELFT::Is64Bits || Config->MipsN32Abi) Type &= 0xff; @@ -2039,13 +2114,16 @@ RelExpr MipsTargetInfo<ELFT>::getRelExpr(uint32_t Type, return R_PLT; case R_MIPS_HI16: case R_MIPS_LO16: - case R_MIPS_GOT_OFST: // R_MIPS_HI16/R_MIPS_LO16 relocations against _gp_disp calculate // offset between start of function and 'gp' value which by default // equal to the start of .got section. In that case we consider these // relocations as relative. - if (&S == ElfSym<ELFT>::MipsGpDisp) - return R_PC; + if (&S == ElfSym::MipsGpDisp) + return R_MIPS_GOT_GP_PC; + if (&S == ElfSym::MipsLocalGp) + return R_MIPS_GOT_GP; + // fallthrough + case R_MIPS_GOT_OFST: return R_ABS; case R_MIPS_PC32: case R_MIPS_PC16: @@ -2092,11 +2170,6 @@ bool MipsTargetInfo<ELFT>::isTlsLocalDynamicRel(uint32_t Type) const { } template <class ELFT> -bool MipsTargetInfo<ELFT>::isTlsGlobalDynamicRel(uint32_t Type) const { - return Type == R_MIPS_TLS_GD; -} - -template <class ELFT> void MipsTargetInfo<ELFT>::writeGotPlt(uint8_t *Buf, const SymbolBody &) const { write32<ELFT::TargetEndianness>(Buf, In<ELFT>::Plt->getVA()); } @@ -2161,18 +2234,20 @@ void MipsTargetInfo<ELFT>::writePltHeader(uint8_t *Buf) const { write32<E>(Buf + 8, 0x279c0000); // addiu $28, $28, %lo(&GOTPLT[0]) write32<E>(Buf + 12, 0x031cc023); // subu $24, $24, $28 } + write32<E>(Buf + 16, 0x03e07825); // move $15, $31 write32<E>(Buf + 20, 0x0018c082); // srl $24, $24, 2 write32<E>(Buf + 24, 0x0320f809); // jalr $25 write32<E>(Buf + 28, 0x2718fffe); // subu $24, $24, 2 - uint64_t Got = In<ELFT>::GotPlt->getVA(); - writeMipsHi16<E>(Buf, Got); - writeMipsLo16<E>(Buf + 4, Got); - writeMipsLo16<E>(Buf + 8, Got); + + uint64_t GotPlt = In<ELFT>::GotPlt->getVA(); + writeMipsHi16<E>(Buf, GotPlt); + writeMipsLo16<E>(Buf + 4, GotPlt); + writeMipsLo16<E>(Buf + 8, GotPlt); } template <class ELFT> -void MipsTargetInfo<ELFT>::writePlt(uint8_t *Buf, uint64_t GotEntryAddr, +void MipsTargetInfo<ELFT>::writePlt(uint8_t *Buf, uint64_t GotPltEntryAddr, uint64_t PltEntryAddr, int32_t Index, unsigned RelOff) const { const endianness E = ELFT::TargetEndianness; @@ -2181,37 +2256,37 @@ void MipsTargetInfo<ELFT>::writePlt(uint8_t *Buf, uint64_t GotEntryAddr, // jr $25 write32<E>(Buf + 8, isMipsR6<ELFT>() ? 0x03200009 : 0x03200008); write32<E>(Buf + 12, 0x25f80000); // addiu $24, $15, %lo(.got.plt entry) - writeMipsHi16<E>(Buf, GotEntryAddr); - writeMipsLo16<E>(Buf + 4, GotEntryAddr); - writeMipsLo16<E>(Buf + 12, GotEntryAddr); + writeMipsHi16<E>(Buf, GotPltEntryAddr); + writeMipsLo16<E>(Buf + 4, GotPltEntryAddr); + writeMipsLo16<E>(Buf + 12, GotPltEntryAddr); } template <class ELFT> -RelExpr MipsTargetInfo<ELFT>::getThunkExpr(RelExpr Expr, uint32_t Type, - const InputFile &File, - const SymbolBody &S) const { +bool MipsTargetInfo<ELFT>::needsThunk(RelExpr Expr, uint32_t Type, + const InputFile *File, + const SymbolBody &S) const { // Any MIPS PIC code function is invoked with its address in register $t9. // So if we have a branch instruction from non-PIC code to the PIC one // we cannot make the jump directly and need to create a small stubs // to save the target function address. // See page 3-38 ftp://www.linux-mips.org/pub/linux/mips/doc/ABI/mipsabi.pdf if (Type != R_MIPS_26) - return Expr; - auto *F = dyn_cast<ELFFileBase<ELFT>>(&File); + return false; + auto *F = dyn_cast_or_null<ELFFileBase<ELFT>>(File); if (!F) - return Expr; + return false; // If current file has PIC code, LA25 stub is not required. if (F->getObj().getHeader()->e_flags & EF_MIPS_PIC) - return Expr; - auto *D = dyn_cast<DefinedRegular<ELFT>>(&S); + return false; + auto *D = dyn_cast<DefinedRegular>(&S); // LA25 is required if target file has PIC code // or target symbol is a PIC symbol. - return D && D->isMipsPIC() ? R_THUNK_ABS : Expr; + return D && D->isMipsPIC<ELFT>(); } template <class ELFT> -uint64_t MipsTargetInfo<ELFT>::getImplicitAddend(const uint8_t *Buf, - uint32_t Type) const { +int64_t MipsTargetInfo<ELFT>::getImplicitAddend(const uint8_t *Buf, + uint32_t Type) const { const endianness E = ELFT::TargetEndianness; switch (Type) { default: @@ -2220,7 +2295,7 @@ uint64_t MipsTargetInfo<ELFT>::getImplicitAddend(const uint8_t *Buf, case R_MIPS_GPREL32: case R_MIPS_TLS_DTPREL32: case R_MIPS_TLS_TPREL32: - return read32<E>(Buf); + return SignExtend64<32>(read32<E>(Buf)); case R_MIPS_26: // FIXME (simon): If the relocation target symbol is not a PLT entry // we should use another expression for calculation: @@ -2303,9 +2378,19 @@ void MipsTargetInfo<ELFT>::relocateOne(uint8_t *Loc, uint32_t Type, case R_MIPS_26: write32<E>(Loc, (read32<E>(Loc) & ~0x3ffffff) | ((Val >> 2) & 0x3ffffff)); break; + case R_MIPS_GOT16: + // The R_MIPS_GOT16 relocation's value in "relocatable" linking mode + // is updated addend (not a GOT index). In that case write high 16 bits + // to store a correct addend value. + if (Config->Relocatable) + writeMipsHi16<E>(Loc, Val); + else { + checkInt<16>(Loc, Val, Type); + writeMipsLo16<E>(Loc, Val); + } + break; case R_MIPS_GOT_DISP: case R_MIPS_GOT_PAGE: - case R_MIPS_GOT16: case R_MIPS_GPREL16: case R_MIPS_TLS_GD: case R_MIPS_TLS_LDM: diff --git a/ELF/Target.h b/ELF/Target.h index 752f9cd5ee4e..4b88626050b3 100644 --- a/ELF/Target.h +++ b/ELF/Target.h @@ -25,13 +25,12 @@ class TargetInfo { public: virtual bool isTlsInitialExecRel(uint32_t Type) const; virtual bool isTlsLocalDynamicRel(uint32_t Type) const; - virtual bool isTlsGlobalDynamicRel(uint32_t Type) const; virtual bool isPicRel(uint32_t Type) const { return true; } virtual uint32_t getDynRel(uint32_t Type) const { return Type; } virtual void writeGotPltHeader(uint8_t *Buf) const {} virtual void writeGotPlt(uint8_t *Buf, const SymbolBody &S) const {}; virtual void writeIgotPlt(uint8_t *Buf, const SymbolBody &S) const; - virtual uint64_t getImplicitAddend(const uint8_t *Buf, uint32_t Type) const; + virtual int64_t getImplicitAddend(const uint8_t *Buf, uint32_t Type) const; // If lazy binding is supported, the first entry of the PLT has code // to call the dynamic linker to resolve PLT entries the first time @@ -41,7 +40,8 @@ public: virtual void writePlt(uint8_t *Buf, uint64_t GotEntryAddr, uint64_t PltEntryAddr, int32_t Index, unsigned RelOff) const {} - + virtual void addPltHeaderSymbols(InputSectionBase *IS) const {} + virtual void addPltSymbols(InputSectionBase *IS, uint64_t Off) const {} // Returns true if a relocation only uses the low bits of a value such that // all those bits are in in the same page. For example, if the relocation // only uses the low 12 bits in a system with 4k pages. If this is true, the @@ -50,15 +50,11 @@ public: virtual bool usesOnlyLowPageBits(uint32_t Type) const; // Decide whether a Thunk is needed for the relocation from File - // targeting S. Returns one of: - // Expr if there is no Thunk required - // R_THUNK_ABS if thunk is required and expression is absolute - // R_THUNK_PC if thunk is required and expression is pc rel - // R_THUNK_PLT_PC if thunk is required to PLT entry and expression is pc rel - virtual RelExpr getThunkExpr(RelExpr Expr, uint32_t RelocType, - const InputFile &File, - const SymbolBody &S) const; - virtual RelExpr getRelExpr(uint32_t Type, const SymbolBody &S) const = 0; + // targeting S. + virtual bool needsThunk(RelExpr Expr, uint32_t RelocType, + const InputFile *File, const SymbolBody &S) const; + virtual RelExpr getRelExpr(uint32_t Type, const SymbolBody &S, + const uint8_t *Loc) const = 0; virtual void relocateOne(uint8_t *Loc, uint32_t Type, uint64_t Val) const = 0; virtual ~TargetInfo(); @@ -95,6 +91,10 @@ public: bool NeedsThunks = false; + // A 4-byte field corresponding to one or more trap instructions, used to pad + // executable OutputSections. + uint32_t TrapInstr = 0; + virtual RelExpr adjustRelaxExpr(uint32_t Type, const uint8_t *Data, RelExpr Expr) const; virtual void relaxGot(uint8_t *Loc, uint64_t Val) const; diff --git a/ELF/Threads.h b/ELF/Threads.h index c03e15253e15..897432e69f8e 100644 --- a/ELF/Threads.h +++ b/ELF/Threads.h @@ -15,7 +15,7 @@ // // That said, we don't want to do "too clever" things using threads. // Complex multi-threaded algorithms are sometimes extremely hard to -// justify the correctness and can easily mess up the entire design. +// reason about and can easily mess up the entire design. // // Fortunately, when a linker links large programs (when the link time is // most critical), it spends most of the time to work on massive number of @@ -34,7 +34,7 @@ // instead of std::for_each (or a plain for loop). Because tasks are // completely independent from each other, we can run them in parallel // without any coordination between them. That's very easy to understand -// and justify. +// and reason about. // // For the cases such as the latter, we can use parallel algorithms to // deal with massive data. We have to write code for a tailored algorithm @@ -69,14 +69,15 @@ namespace lld { namespace elf { template <class IterTy, class FuncTy> -void forEach(IterTy Begin, IterTy End, FuncTy Fn) { +void parallelForEach(IterTy Begin, IterTy End, FuncTy Fn) { if (Config->Threads) parallel_for_each(Begin, End, Fn); else std::for_each(Begin, End, Fn); } -inline void forLoop(size_t Begin, size_t End, std::function<void(size_t)> Fn) { +inline void parallelFor(size_t Begin, size_t End, + std::function<void(size_t)> Fn) { if (Config->Threads) { parallel_for(Begin, End, Fn); } else { diff --git a/ELF/Thunks.cpp b/ELF/Thunks.cpp index 397a0ee66319..307ca5df2288 100644 --- a/ELF/Thunks.cpp +++ b/ELF/Thunks.cpp @@ -28,6 +28,7 @@ #include "Memory.h" #include "OutputSections.h" #include "Symbols.h" +#include "SyntheticSections.h" #include "Target.h" #include "llvm/Support/Casting.h" #include "llvm/Support/ELF.h" @@ -49,127 +50,159 @@ namespace { // Specific ARM Thunk implementations. The naming convention is: // Source State, TargetState, Target Requirement, ABS or PI, Range -template <class ELFT> -class ARMToThumbV7ABSLongThunk final : public Thunk<ELFT> { +template <class ELFT> class ARMV7ABSLongThunk final : public Thunk { public: - ARMToThumbV7ABSLongThunk(const SymbolBody &Dest, - const InputSection<ELFT> &Owner) - : Thunk<ELFT>(Dest, Owner) {} + ARMV7ABSLongThunk(const SymbolBody &Dest) : Thunk(Dest) {} uint32_t size() const override { return 12; } - void writeTo(uint8_t *Buf) const override; + void writeTo(uint8_t *Buf, ThunkSection &IS) const override; + void addSymbols(ThunkSection &IS) override; }; -template <class ELFT> class ARMToThumbV7PILongThunk final : public Thunk<ELFT> { +template <class ELFT> class ARMV7PILongThunk final : public Thunk { public: - ARMToThumbV7PILongThunk(const SymbolBody &Dest, - const InputSection<ELFT> &Owner) - : Thunk<ELFT>(Dest, Owner) {} + ARMV7PILongThunk(const SymbolBody &Dest) : Thunk(Dest) {} uint32_t size() const override { return 16; } - void writeTo(uint8_t *Buf) const override; + void writeTo(uint8_t *Buf, ThunkSection &IS) const override; + void addSymbols(ThunkSection &IS) override; }; -template <class ELFT> -class ThumbToARMV7ABSLongThunk final : public Thunk<ELFT> { +template <class ELFT> class ThumbV7ABSLongThunk final : public Thunk { public: - ThumbToARMV7ABSLongThunk(const SymbolBody &Dest, - const InputSection<ELFT> &Owner) - : Thunk<ELFT>(Dest, Owner) {} + ThumbV7ABSLongThunk(const SymbolBody &Dest) : Thunk(Dest) { + this->alignment = 2; + } uint32_t size() const override { return 10; } - void writeTo(uint8_t *Buf) const override; + void writeTo(uint8_t *Buf, ThunkSection &IS) const override; + void addSymbols(ThunkSection &IS) override; }; -template <class ELFT> class ThumbToARMV7PILongThunk final : public Thunk<ELFT> { +template <class ELFT> class ThumbV7PILongThunk final : public Thunk { public: - ThumbToARMV7PILongThunk(const SymbolBody &Dest, - const InputSection<ELFT> &Owner) - : Thunk<ELFT>(Dest, Owner) {} + ThumbV7PILongThunk(const SymbolBody &Dest) : Thunk(Dest) { + this->alignment = 2; + } uint32_t size() const override { return 12; } - void writeTo(uint8_t *Buf) const override; + void writeTo(uint8_t *Buf, ThunkSection &IS) const override; + void addSymbols(ThunkSection &IS) override; }; // MIPS LA25 thunk -template <class ELFT> class MipsThunk final : public Thunk<ELFT> { +template <class ELFT> class MipsThunk final : public Thunk { public: - MipsThunk(const SymbolBody &Dest, const InputSection<ELFT> &Owner) - : Thunk<ELFT>(Dest, Owner) {} + MipsThunk(const SymbolBody &Dest) : Thunk(Dest) {} uint32_t size() const override { return 16; } - void writeTo(uint8_t *Buf) const override; + void writeTo(uint8_t *Buf, ThunkSection &IS) const override; + void addSymbols(ThunkSection &IS) override; + InputSection *getTargetInputSection() const override; }; } // end anonymous namespace // ARM Target Thunks -template <class ELFT> static uint64_t getARMThunkDestVA(const SymbolBody &S) { - uint64_t V = S.isInPlt() ? S.getPltVA<ELFT>() : S.getVA<ELFT>(); +static uint64_t getARMThunkDestVA(const SymbolBody &S) { + uint64_t V = S.isInPlt() ? S.getPltVA() : S.getVA(); return SignExtend64<32>(V); } template <class ELFT> -void ARMToThumbV7ABSLongThunk<ELFT>::writeTo(uint8_t *Buf) const { +void ARMV7ABSLongThunk<ELFT>::writeTo(uint8_t *Buf, ThunkSection &IS) const { const uint8_t Data[] = { 0x00, 0xc0, 0x00, 0xe3, // movw ip,:lower16:S 0x00, 0xc0, 0x40, 0xe3, // movt ip,:upper16:S 0x1c, 0xff, 0x2f, 0xe1, // bx ip }; - uint64_t S = getARMThunkDestVA<ELFT>(this->Destination); + uint64_t S = getARMThunkDestVA(this->Destination); memcpy(Buf, Data, sizeof(Data)); Target->relocateOne(Buf, R_ARM_MOVW_ABS_NC, S); Target->relocateOne(Buf + 4, R_ARM_MOVT_ABS, S); } template <class ELFT> -void ThumbToARMV7ABSLongThunk<ELFT>::writeTo(uint8_t *Buf) const { +void ARMV7ABSLongThunk<ELFT>::addSymbols(ThunkSection &IS) { + this->ThunkSym = addSyntheticLocal<ELFT>( + Saver.save("__ARMv7ABSLongThunk_" + this->Destination.getName()), + STT_FUNC, this->Offset, size(), &IS); + addSyntheticLocal<ELFT>("$a", STT_NOTYPE, this->Offset, 0, &IS); +} + +template <class ELFT> +void ThumbV7ABSLongThunk<ELFT>::writeTo(uint8_t *Buf, ThunkSection &IS) const { const uint8_t Data[] = { 0x40, 0xf2, 0x00, 0x0c, // movw ip, :lower16:S 0xc0, 0xf2, 0x00, 0x0c, // movt ip, :upper16:S 0x60, 0x47, // bx ip }; - uint64_t S = getARMThunkDestVA<ELFT>(this->Destination); + uint64_t S = getARMThunkDestVA(this->Destination); memcpy(Buf, Data, sizeof(Data)); Target->relocateOne(Buf, R_ARM_THM_MOVW_ABS_NC, S); Target->relocateOne(Buf + 4, R_ARM_THM_MOVT_ABS, S); } template <class ELFT> -void ARMToThumbV7PILongThunk<ELFT>::writeTo(uint8_t *Buf) const { +void ThumbV7ABSLongThunk<ELFT>::addSymbols(ThunkSection &IS) { + this->ThunkSym = addSyntheticLocal<ELFT>( + Saver.save("__Thumbv7ABSLongThunk_" + this->Destination.getName()), + STT_FUNC, this->Offset, size(), &IS); + addSyntheticLocal<ELFT>("$t", STT_NOTYPE, this->Offset, 0, &IS); +} + +template <class ELFT> +void ARMV7PILongThunk<ELFT>::writeTo(uint8_t *Buf, ThunkSection &IS) const { const uint8_t Data[] = { 0xf0, 0xcf, 0x0f, 0xe3, // P: movw ip,:lower16:S - (P + (L1-P) +8) 0x00, 0xc0, 0x40, 0xe3, // movt ip,:upper16:S - (P + (L1-P+4) +8) 0x0f, 0xc0, 0x8c, 0xe0, // L1: add ip, ip, pc 0x1c, 0xff, 0x2f, 0xe1, // bx r12 }; - uint64_t S = getARMThunkDestVA<ELFT>(this->Destination); - uint64_t P = this->getVA(); + uint64_t S = getARMThunkDestVA(this->Destination); + uint64_t P = this->ThunkSym->getVA(); memcpy(Buf, Data, sizeof(Data)); Target->relocateOne(Buf, R_ARM_MOVW_PREL_NC, S - P - 16); Target->relocateOne(Buf + 4, R_ARM_MOVT_PREL, S - P - 12); } template <class ELFT> -void ThumbToARMV7PILongThunk<ELFT>::writeTo(uint8_t *Buf) const { +void ARMV7PILongThunk<ELFT>::addSymbols(ThunkSection &IS) { + this->ThunkSym = addSyntheticLocal<ELFT>( + Saver.save("__ARMV7PILongThunk_" + this->Destination.getName()), STT_FUNC, + this->Offset, size(), &IS); + addSyntheticLocal<ELFT>("$a", STT_NOTYPE, this->Offset, 0, &IS); +} + +template <class ELFT> +void ThumbV7PILongThunk<ELFT>::writeTo(uint8_t *Buf, ThunkSection &IS) const { const uint8_t Data[] = { 0x4f, 0xf6, 0xf4, 0x7c, // P: movw ip,:lower16:S - (P + (L1-P) + 4) 0xc0, 0xf2, 0x00, 0x0c, // movt ip,:upper16:S - (P + (L1-P+4) + 4) 0xfc, 0x44, // L1: add r12, pc 0x60, 0x47, // bx r12 }; - uint64_t S = getARMThunkDestVA<ELFT>(this->Destination); - uint64_t P = this->getVA(); + uint64_t S = getARMThunkDestVA(this->Destination); + uint64_t P = this->ThunkSym->getVA(); memcpy(Buf, Data, sizeof(Data)); Target->relocateOne(Buf, R_ARM_THM_MOVW_PREL_NC, S - P - 12); Target->relocateOne(Buf + 4, R_ARM_THM_MOVT_PREL, S - P - 8); } +template <class ELFT> +void ThumbV7PILongThunk<ELFT>::addSymbols(ThunkSection &IS) { + this->ThunkSym = addSyntheticLocal<ELFT>( + Saver.save("__ThumbV7PILongThunk_" + this->Destination.getName()), + STT_FUNC, this->Offset, size(), &IS); + addSyntheticLocal<ELFT>("$t", STT_NOTYPE, this->Offset, 0, &IS); +} + // Write MIPS LA25 thunk code to call PIC function from the non-PIC one. -template <class ELFT> void MipsThunk<ELFT>::writeTo(uint8_t *Buf) const { +template <class ELFT> +void MipsThunk<ELFT>::writeTo(uint8_t *Buf, ThunkSection &) const { const endianness E = ELFT::TargetEndianness; - uint64_t S = this->Destination.template getVA<ELFT>(); + uint64_t S = this->Destination.getVA(); write32<E>(Buf, 0x3c190000); // lui $25, %hi(func) write32<E>(Buf + 4, 0x08000000 | (S >> 2)); // j func write32<E>(Buf + 8, 0x27390000); // addiu $25, $25, %lo(func) @@ -178,20 +211,24 @@ template <class ELFT> void MipsThunk<ELFT>::writeTo(uint8_t *Buf) const { Target->relocateOne(Buf + 8, R_MIPS_LO16, S); } -template <class ELFT> -Thunk<ELFT>::Thunk(const SymbolBody &D, const InputSection<ELFT> &O) - : Destination(D), Owner(O), Offset(O.getThunkOff() + O.getThunksSize()) {} +template <class ELFT> void MipsThunk<ELFT>::addSymbols(ThunkSection &IS) { + this->ThunkSym = addSyntheticLocal<ELFT>( + Saver.save("__LA25Thunk_" + this->Destination.getName()), STT_FUNC, + this->Offset, size(), &IS); +} -template <class ELFT> typename ELFT::uint Thunk<ELFT>::getVA() const { - return Owner.OutSec->Addr + Owner.OutSecOff + Offset; +template <class ELFT> +InputSection *MipsThunk<ELFT>::getTargetInputSection() const { + auto *DR = dyn_cast<DefinedRegular>(&this->Destination); + return dyn_cast<InputSection>(DR->Section); } -template <class ELFT> Thunk<ELFT>::~Thunk() = default; +Thunk::Thunk(const SymbolBody &D) : Destination(D), Offset(0) {} + +Thunk::~Thunk() = default; // Creates a thunk for Thumb-ARM interworking. -template <class ELFT> -static Thunk<ELFT> *createThunkArm(uint32_t Reloc, SymbolBody &S, - InputSection<ELFT> &IS) { +template <class ELFT> static Thunk *addThunkArm(uint32_t Reloc, SymbolBody &S) { // ARM relocations need ARM to Thumb interworking Thunks. // Thumb relocations need Thumb to ARM relocations. // Use position independent Thunks if we require position independent code. @@ -200,76 +237,33 @@ static Thunk<ELFT> *createThunkArm(uint32_t Reloc, SymbolBody &S, case R_ARM_PLT32: case R_ARM_JUMP24: if (Config->Pic) - return new (BAlloc) ARMToThumbV7PILongThunk<ELFT>(S, IS); - return new (BAlloc) ARMToThumbV7ABSLongThunk<ELFT>(S, IS); + return make<ARMV7PILongThunk<ELFT>>(S); + return make<ARMV7ABSLongThunk<ELFT>>(S); case R_ARM_THM_JUMP19: case R_ARM_THM_JUMP24: if (Config->Pic) - return new (BAlloc) ThumbToARMV7PILongThunk<ELFT>(S, IS); - return new (BAlloc) ThumbToARMV7ABSLongThunk<ELFT>(S, IS); + return make<ThumbV7PILongThunk<ELFT>>(S); + return make<ThumbV7ABSLongThunk<ELFT>>(S); } fatal("unrecognized relocation type"); } -template <class ELFT> -static void addThunkARM(uint32_t Reloc, SymbolBody &S, InputSection<ELFT> &IS) { - // Only one Thunk supported per symbol. - if (S.hasThunk<ELFT>()) - return; - - // ARM Thunks are added to the same InputSection as the relocation. This - // isn't strictly necessary but it makes it more likely that a limited range - // branch can reach the Thunk, and it makes Thunks to the PLT section easier - Thunk<ELFT> *T = createThunkArm(Reloc, S, IS); - IS.addThunk(T); - if (auto *Sym = dyn_cast<DefinedRegular<ELFT>>(&S)) - Sym->ThunkData = T; - else if (auto *Sym = dyn_cast<SharedSymbol<ELFT>>(&S)) - Sym->ThunkData = T; - else if (auto *Sym = dyn_cast<Undefined<ELFT>>(&S)) - Sym->ThunkData = T; - else - fatal("symbol not DefinedRegular or Shared"); -} - -template <class ELFT> -static void addThunkMips(uint32_t RelocType, SymbolBody &S, - InputSection<ELFT> &IS) { - // Only one Thunk supported per symbol. - if (S.hasThunk<ELFT>()) - return; - - // Mips Thunks are added to the InputSection defining S. - auto *R = cast<DefinedRegular<ELFT>>(&S); - auto *Sec = cast<InputSection<ELFT>>(R->Section); - auto *T = new (BAlloc) MipsThunk<ELFT>(S, *Sec); - Sec->addThunk(T); - R->ThunkData = T; +template <class ELFT> static Thunk *addThunkMips(SymbolBody &S) { + return make<MipsThunk<ELFT>>(S); } -template <class ELFT> -void addThunk(uint32_t RelocType, SymbolBody &S, InputSection<ELFT> &IS) { +template <class ELFT> Thunk *addThunk(uint32_t RelocType, SymbolBody &S) { if (Config->EMachine == EM_ARM) - addThunkARM<ELFT>(RelocType, S, IS); + return addThunkArm<ELFT>(RelocType, S); else if (Config->EMachine == EM_MIPS) - addThunkMips<ELFT>(RelocType, S, IS); - else - llvm_unreachable("add Thunk only supported for ARM and Mips"); + return addThunkMips<ELFT>(S); + llvm_unreachable("add Thunk only supported for ARM and Mips"); + return nullptr; } -template void addThunk<ELF32LE>(uint32_t, SymbolBody &, - InputSection<ELF32LE> &); -template void addThunk<ELF32BE>(uint32_t, SymbolBody &, - InputSection<ELF32BE> &); -template void addThunk<ELF64LE>(uint32_t, SymbolBody &, - InputSection<ELF64LE> &); -template void addThunk<ELF64BE>(uint32_t, SymbolBody &, - InputSection<ELF64BE> &); - -template class Thunk<ELF32LE>; -template class Thunk<ELF32BE>; -template class Thunk<ELF64LE>; -template class Thunk<ELF64BE>; - +template Thunk *addThunk<ELF32LE>(uint32_t, SymbolBody &); +template Thunk *addThunk<ELF32BE>(uint32_t, SymbolBody &); +template Thunk *addThunk<ELF64LE>(uint32_t, SymbolBody &); +template Thunk *addThunk<ELF64BE>(uint32_t, SymbolBody &); } // end namespace elf } // end namespace lld diff --git a/ELF/Thunks.h b/ELF/Thunks.h index b937d7918491..a9f49279f3f2 100644 --- a/ELF/Thunks.h +++ b/ELF/Thunks.h @@ -15,8 +15,7 @@ namespace lld { namespace elf { class SymbolBody; -template <class ELFT> class InputSection; - +class ThunkSection; // Class to describe an instance of a Thunk. // A Thunk is a code-sequence inserted by the linker in between a caller and // the callee. The relocation to the callee is redirected to the Thunk, which @@ -24,31 +23,35 @@ template <class ELFT> class InputSection; // include transferring control from non-pi to pi and changing state on // targets like ARM. // -// Thunks can be created for DefinedRegular and Shared Symbols. The Thunk -// is stored in a field of the Symbol Destination. -// Thunks to be written to an InputSection are recorded by the InputSection. -template <class ELFT> class Thunk { - typedef typename ELFT::uint uintX_t; - +// Thunks can be created for DefinedRegular, Shared and Undefined Symbols. +// Thunks are assigned to synthetic ThunkSections +class Thunk { public: - Thunk(const SymbolBody &Destination, const InputSection<ELFT> &Owner); + Thunk(const SymbolBody &Destination); virtual ~Thunk(); virtual uint32_t size() const { return 0; } - virtual void writeTo(uint8_t *Buf) const {} - uintX_t getVA() const; + virtual void writeTo(uint8_t *Buf, ThunkSection &IS) const {} + + // All Thunks must define at least one symbol ThunkSym so that we can + // redirect relocations to it. + virtual void addSymbols(ThunkSection &IS) {} + + // Some Thunks must be placed immediately before their Target as they elide + // a branch and fall through to the first Symbol in the Target. + virtual InputSection *getTargetInputSection() const { return nullptr; } -protected: + // The alignment requirement for this Thunk, defaults to the size of the + // typical code section alignment. const SymbolBody &Destination; - const InputSection<ELFT> &Owner; + SymbolBody *ThunkSym; uint64_t Offset; + uint32_t alignment = 4; }; -// For a Relocation to symbol S from InputSection Src, create a Thunk and -// update the fields of S and the InputSection that the Thunk body will be -// written to. At present there are implementations for ARM and Mips Thunks. -template <class ELFT> -void addThunk(uint32_t RelocType, SymbolBody &S, InputSection<ELFT> &Src); +// For a Relocation to symbol S create a Thunk to be added to a synthetic +// ThunkSection. At present there are implementations for ARM and Mips Thunks. +template <class ELFT> Thunk *addThunk(uint32_t RelocType, SymbolBody &S); } // namespace elf } // namespace lld diff --git a/ELF/Writer.cpp b/ELF/Writer.cpp index b004a4f0d7f7..3ded0c675b80 100644 --- a/ELF/Writer.cpp +++ b/ELF/Writer.cpp @@ -9,7 +9,9 @@ #include "Writer.h" #include "Config.h" +#include "Filesystem.h" #include "LinkerScript.h" +#include "MapFile.h" #include "Memory.h" #include "OutputSections.h" #include "Relocations.h" @@ -20,10 +22,8 @@ #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/Support/FileOutputBuffer.h" -#include "llvm/Support/FileSystem.h" #include "llvm/Support/raw_ostream.h" #include <climits> -#include <thread> using namespace llvm; using namespace llvm::ELF; @@ -38,22 +38,19 @@ namespace { // The writer writes a SymbolTable result to a file. template <class ELFT> class Writer { public: - typedef typename ELFT::uint uintX_t; typedef typename ELFT::Shdr Elf_Shdr; typedef typename ELFT::Ehdr Elf_Ehdr; typedef typename ELFT::Phdr Elf_Phdr; - typedef typename ELFT::Sym Elf_Sym; - typedef typename ELFT::SymRange Elf_Sym_Range; - typedef typename ELFT::Rela Elf_Rela; + void run(); private: void createSyntheticSections(); void copyLocalSymbols(); + void addSectionSymbols(); void addReservedSymbols(); - void addInputSec(InputSectionBase<ELFT> *S); void createSections(); - void forEachRelSec(std::function<void(InputSectionBase<ELFT> &)> Fn); + void forEachRelSec(std::function<void(InputSectionBase &)> Fn); void sortSections(); void finalizeSections(); void addPredefinedSections(); @@ -67,7 +64,7 @@ private: void setPhdrs(); void fixHeaders(); void fixSectionAlignments(); - void fixAbsoluteSymbols(); + void fixPredefinedSymbols(); void openFile(); void writeHeader(); void writeSections(); @@ -76,19 +73,19 @@ private: std::unique_ptr<FileOutputBuffer> Buffer; - std::vector<OutputSectionBase *> OutputSections; - OutputSectionFactory<ELFT> Factory; + std::vector<OutputSection *> OutputSections; + OutputSectionFactory Factory{OutputSections}; void addRelIpltSymbols(); void addStartEndSymbols(); - void addStartStopSymbols(OutputSectionBase *Sec); - uintX_t getEntryAddr(); - OutputSectionBase *findSection(StringRef Name); + void addStartStopSymbols(OutputSection *Sec); + uint64_t getEntryAddr(); + OutputSection *findSection(StringRef Name); std::vector<PhdrEntry> Phdrs; - uintX_t FileSize; - uintX_t SectionHeaderOff; + uint64_t FileSize; + uint64_t SectionHeaderOff; bool AllocateHeader = true; }; } // anonymous namespace @@ -97,9 +94,21 @@ StringRef elf::getOutputSectionName(StringRef Name) { if (Config->Relocatable) return Name; + // If -emit-relocs is given (which is rare), we need to copy + // relocation sections to the output. If input section .foo is + // output as .bar, we want to rename .rel.foo .rel.bar as well. + if (Config->EmitRelocs) { + for (StringRef V : {".rel.", ".rela."}) { + if (Name.startswith(V)) { + StringRef Inner = getOutputSectionName(Name.substr(V.size() - 1)); + return Saver.save(Twine(V.drop_back()) + Inner); + } + } + } + for (StringRef V : - {".text.", ".rodata.", ".data.rel.ro.", ".data.", ".bss.", - ".init_array.", ".fini_array.", ".ctors.", ".dtors.", ".tbss.", + {".text.", ".rodata.", ".data.rel.ro.", ".data.", ".bss.rel.ro.", + ".bss.", ".init_array.", ".fini_array.", ".ctors.", ".dtors.", ".tbss.", ".gcc_except_table.", ".tdata.", ".ARM.exidx."}) { StringRef Prefix = V.drop_back(); if (Name.startswith(V) || Name == Prefix) @@ -118,17 +127,9 @@ StringRef elf::getOutputSectionName(StringRef Name) { return Name; } -template <class ELFT> void elf::reportDiscarded(InputSectionBase<ELFT> *IS) { - if (!Config->PrintGcSections) - return; - errs() << "removing unused section from '" << IS->Name << "' in file '" - << IS->getFile()->getName() << "'\n"; -} - template <class ELFT> static bool needsInterpSection() { return !Symtab<ELFT>::X->getSharedFiles().empty() && - !Config->DynamicLinker.empty() && - !Script<ELFT>::X->ignoreInterpSection(); + !Config->DynamicLinker.empty() && !Script->ignoreInterpSection(); } template <class ELFT> void elf::writeResult() { Writer<ELFT>().run(); } @@ -139,49 +140,105 @@ template <class ELFT> void Writer<ELFT>::removeEmptyPTLoad() { return false; if (!P.First) return true; - uintX_t Size = P.Last->Addr + P.Last->Size - P.First->Addr; + uint64_t Size = P.Last->Addr + P.Last->Size - P.First->Addr; return Size == 0; }); Phdrs.erase(I, Phdrs.end()); } +// This function scans over the input sections and creates mergeable +// synthetic sections. It removes MergeInputSections from array and +// adds new synthetic ones. Each synthetic section is added to the +// location of the first input section it replaces. +static void combineMergableSections() { + std::vector<MergeSyntheticSection *> MergeSections; + for (InputSectionBase *&S : InputSections) { + MergeInputSection *MS = dyn_cast<MergeInputSection>(S); + if (!MS) + continue; + + // We do not want to handle sections that are not alive, so just remove + // them instead of trying to merge. + if (!MS->Live) + continue; + + StringRef OutsecName = getOutputSectionName(MS->Name); + uint64_t Flags = MS->Flags & ~(uint64_t)(SHF_GROUP | SHF_COMPRESSED); + uint32_t Alignment = std::max<uint32_t>(MS->Alignment, MS->Entsize); + + auto I = + llvm::find_if(MergeSections, [=](MergeSyntheticSection *Sec) { + return Sec->Name == OutsecName && Sec->Flags == Flags && + Sec->Alignment == Alignment; + }); + if (I == MergeSections.end()) { + MergeSyntheticSection *Syn = + make<MergeSyntheticSection>(OutsecName, MS->Type, Flags, Alignment); + MergeSections.push_back(Syn); + I = std::prev(MergeSections.end()); + S = Syn; + } else { + S = nullptr; + } + (*I)->addSection(MS); + } + + std::vector<InputSectionBase *> &V = InputSections; + V.erase(std::remove(V.begin(), V.end(), nullptr), V.end()); +} + +template <class ELFT> static void combineEhFrameSections() { + for (InputSectionBase *&S : InputSections) { + EhInputSection *ES = dyn_cast<EhInputSection>(S); + if (!ES || !ES->Live) + continue; + + In<ELFT>::EhFrame->addSection(ES); + S = nullptr; + } + + std::vector<InputSectionBase *> &V = InputSections; + V.erase(std::remove(V.begin(), V.end(), nullptr), V.end()); +} + // The main function of the writer. template <class ELFT> void Writer<ELFT>::run() { // Create linker-synthesized sections such as .got or .plt. // Such sections are of type input section. createSyntheticSections(); + combineMergableSections(); + + if (!Config->Relocatable) + combineEhFrameSections<ELFT>(); // We need to create some reserved symbols such as _end. Create them. if (!Config->Relocatable) addReservedSymbols(); - // Some architectures use small displacements for jump instructions. - // It is linker's responsibility to create thunks containing long - // jump instructions if jump targets are too far. Create thunks. - if (Target->NeedsThunks) - forEachRelSec(createThunks<ELFT>); - // Create output sections. - Script<ELFT>::X->OutputSections = &OutputSections; - if (ScriptConfig->HasSections) { + Script->OutputSections = &OutputSections; + if (Script->Opt.HasSections) { // If linker script contains SECTIONS commands, let it create sections. - Script<ELFT>::X->processCommands(Factory); + Script->processCommands(Factory); // Linker scripts may have left some input sections unassigned. // Assign such sections using the default rule. - Script<ELFT>::X->addOrphanSections(Factory); + Script->addOrphanSections(Factory); } else { // If linker script does not contain SECTIONS commands, create // output sections by default rules. We still need to give the // linker script a chance to run, because it might contain // non-SECTIONS commands such as ASSERT. createSections(); - Script<ELFT>::X->processCommands(Factory); + Script->processCommands(Factory); } if (Config->Discard != DiscardPolicy::All) copyLocalSymbols(); + if (Config->CopyRelocs) + addSectionSymbols(); + // Now that we have a complete set of output sections. This function // completes section contents. For example, we need to add strings // to the string table, and add entries to .got and .plt. @@ -193,11 +250,12 @@ template <class ELFT> void Writer<ELFT>::run() { if (Config->Relocatable) { assignFileOffsets(); } else { - if (ScriptConfig->HasSections) { - Script<ELFT>::X->assignAddresses(Phdrs); + if (Script->Opt.HasSections) { + Script->assignAddresses(Phdrs); } else { fixSectionAlignments(); assignAddresses(); + Script->processNonSectionCommands(); } // Remove empty PT_LOAD to avoid causing the dynamic linker to try to mmap a @@ -211,9 +269,12 @@ template <class ELFT> void Writer<ELFT>::run() { assignFileOffsetsBinary(); setPhdrs(); - fixAbsoluteSymbols(); + fixPredefinedSymbols(); } + // It does not make sense try to open the file if we have error already. + if (ErrorCount) + return; // Write the result down to a file. openFile(); if (ErrorCount) @@ -231,8 +292,13 @@ template <class ELFT> void Writer<ELFT>::run() { if (ErrorCount) return; + // Handle -Map option. + writeMapFile<ELFT>(OutputSections); + if (ErrorCount) + return; + if (auto EC = Buffer->commit()) - error(EC, "failed to write to the output file"); + error("failed to write to the output file: " + EC.message()); // Flush the output streams and exit immediately. A full shutdown // is a good test that we are keeping track of all allocated memory, @@ -241,156 +307,161 @@ template <class ELFT> void Writer<ELFT>::run() { exitLld(0); } -// Initialize Out<ELFT> members. +// Initialize Out members. template <class ELFT> void Writer<ELFT>::createSyntheticSections() { // Initialize all pointers with NULL. This is needed because // you can call lld::elf::main more than once as a library. - memset(&Out<ELFT>::First, 0, sizeof(Out<ELFT>)); - - // Create singleton output sections. - Out<ELFT>::Bss = - make<OutputSection<ELFT>>(".bss", SHT_NOBITS, SHF_ALLOC | SHF_WRITE); - Out<ELFT>::BssRelRo = make<OutputSection<ELFT>>(".bss.rel.ro", SHT_NOBITS, - SHF_ALLOC | SHF_WRITE); - In<ELFT>::DynStrTab = make<StringTableSection<ELFT>>(".dynstr", true); + memset(&Out::First, 0, sizeof(Out)); + + auto Add = [](InputSectionBase *Sec) { InputSections.push_back(Sec); }; + + In<ELFT>::DynStrTab = make<StringTableSection>(".dynstr", true); In<ELFT>::Dynamic = make<DynamicSection<ELFT>>(); - Out<ELFT>::EhFrame = make<EhOutputSection<ELFT>>(); In<ELFT>::RelaDyn = make<RelocationSection<ELFT>>( - Config->Rela ? ".rela.dyn" : ".rel.dyn", Config->ZCombreloc); - In<ELFT>::ShStrTab = make<StringTableSection<ELFT>>(".shstrtab", false); + Config->IsRela ? ".rela.dyn" : ".rel.dyn", Config->ZCombreloc); + In<ELFT>::ShStrTab = make<StringTableSection>(".shstrtab", false); - Out<ELFT>::ElfHeader = make<OutputSectionBase>("", 0, SHF_ALLOC); - Out<ELFT>::ElfHeader->Size = sizeof(Elf_Ehdr); - Out<ELFT>::ProgramHeaders = make<OutputSectionBase>("", 0, SHF_ALLOC); - Out<ELFT>::ProgramHeaders->updateAlignment(sizeof(uintX_t)); + Out::ElfHeader = make<OutputSection>("", 0, SHF_ALLOC); + Out::ElfHeader->Size = sizeof(Elf_Ehdr); + Out::ProgramHeaders = make<OutputSection>("", 0, SHF_ALLOC); + Out::ProgramHeaders->updateAlignment(Config->Wordsize); if (needsInterpSection<ELFT>()) { - In<ELFT>::Interp = createInterpSection<ELFT>(); - Symtab<ELFT>::X->Sections.push_back(In<ELFT>::Interp); + In<ELFT>::Interp = createInterpSection(); + Add(In<ELFT>::Interp); } else { In<ELFT>::Interp = nullptr; } if (!Config->Relocatable) - Symtab<ELFT>::X->Sections.push_back(createCommentSection<ELFT>()); + Add(createCommentSection<ELFT>()); if (Config->Strip != StripPolicy::All) { - In<ELFT>::StrTab = make<StringTableSection<ELFT>>(".strtab", false); + In<ELFT>::StrTab = make<StringTableSection>(".strtab", false); In<ELFT>::SymTab = make<SymbolTableSection<ELFT>>(*In<ELFT>::StrTab); } if (Config->BuildId != BuildIdKind::None) { - In<ELFT>::BuildId = make<BuildIdSection<ELFT>>(); - Symtab<ELFT>::X->Sections.push_back(In<ELFT>::BuildId); + In<ELFT>::BuildId = make<BuildIdSection>(); + Add(In<ELFT>::BuildId); } - InputSection<ELFT> *Common = createCommonSection<ELFT>(); - if (!Common->Data.empty()) { - In<ELFT>::Common = Common; - Symtab<ELFT>::X->Sections.push_back(Common); - } + In<ELFT>::Common = createCommonSection<ELFT>(); + if (In<ELFT>::Common) + Add(InX::Common); + + In<ELFT>::Bss = make<BssSection>(".bss"); + Add(In<ELFT>::Bss); + In<ELFT>::BssRelRo = make<BssSection>(".bss.rel.ro"); + Add(In<ELFT>::BssRelRo); // Add MIPS-specific sections. - bool HasDynSymTab = !Symtab<ELFT>::X->getSharedFiles().empty() || Config->Pic; + bool HasDynSymTab = !Symtab<ELFT>::X->getSharedFiles().empty() || + Config->Pic || Config->ExportDynamic; if (Config->EMachine == EM_MIPS) { if (!Config->Shared && HasDynSymTab) { - In<ELFT>::MipsRldMap = make<MipsRldMapSection<ELFT>>(); - Symtab<ELFT>::X->Sections.push_back(In<ELFT>::MipsRldMap); + In<ELFT>::MipsRldMap = make<MipsRldMapSection>(); + Add(In<ELFT>::MipsRldMap); } if (auto *Sec = MipsAbiFlagsSection<ELFT>::create()) - Symtab<ELFT>::X->Sections.push_back(Sec); + Add(Sec); if (auto *Sec = MipsOptionsSection<ELFT>::create()) - Symtab<ELFT>::X->Sections.push_back(Sec); + Add(Sec); if (auto *Sec = MipsReginfoSection<ELFT>::create()) - Symtab<ELFT>::X->Sections.push_back(Sec); + Add(Sec); } if (HasDynSymTab) { In<ELFT>::DynSymTab = make<SymbolTableSection<ELFT>>(*In<ELFT>::DynStrTab); - Symtab<ELFT>::X->Sections.push_back(In<ELFT>::DynSymTab); + Add(In<ELFT>::DynSymTab); In<ELFT>::VerSym = make<VersionTableSection<ELFT>>(); - Symtab<ELFT>::X->Sections.push_back(In<ELFT>::VerSym); + Add(In<ELFT>::VerSym); if (!Config->VersionDefinitions.empty()) { In<ELFT>::VerDef = make<VersionDefinitionSection<ELFT>>(); - Symtab<ELFT>::X->Sections.push_back(In<ELFT>::VerDef); + Add(In<ELFT>::VerDef); } In<ELFT>::VerNeed = make<VersionNeedSection<ELFT>>(); - Symtab<ELFT>::X->Sections.push_back(In<ELFT>::VerNeed); + Add(In<ELFT>::VerNeed); if (Config->GnuHash) { In<ELFT>::GnuHashTab = make<GnuHashTableSection<ELFT>>(); - Symtab<ELFT>::X->Sections.push_back(In<ELFT>::GnuHashTab); + Add(In<ELFT>::GnuHashTab); } if (Config->SysvHash) { In<ELFT>::HashTab = make<HashTableSection<ELFT>>(); - Symtab<ELFT>::X->Sections.push_back(In<ELFT>::HashTab); + Add(In<ELFT>::HashTab); } - Symtab<ELFT>::X->Sections.push_back(In<ELFT>::Dynamic); - Symtab<ELFT>::X->Sections.push_back(In<ELFT>::DynStrTab); - Symtab<ELFT>::X->Sections.push_back(In<ELFT>::RelaDyn); + Add(In<ELFT>::Dynamic); + Add(In<ELFT>::DynStrTab); + Add(In<ELFT>::RelaDyn); } // Add .got. MIPS' .got is so different from the other archs, // it has its own class. if (Config->EMachine == EM_MIPS) { - In<ELFT>::MipsGot = make<MipsGotSection<ELFT>>(); - Symtab<ELFT>::X->Sections.push_back(In<ELFT>::MipsGot); + In<ELFT>::MipsGot = make<MipsGotSection>(); + Add(In<ELFT>::MipsGot); } else { In<ELFT>::Got = make<GotSection<ELFT>>(); - Symtab<ELFT>::X->Sections.push_back(In<ELFT>::Got); + Add(In<ELFT>::Got); } - In<ELFT>::GotPlt = make<GotPltSection<ELFT>>(); - Symtab<ELFT>::X->Sections.push_back(In<ELFT>::GotPlt); - In<ELFT>::IgotPlt = make<IgotPltSection<ELFT>>(); - Symtab<ELFT>::X->Sections.push_back(In<ELFT>::IgotPlt); + In<ELFT>::GotPlt = make<GotPltSection>(); + Add(In<ELFT>::GotPlt); + In<ELFT>::IgotPlt = make<IgotPltSection>(); + Add(In<ELFT>::IgotPlt); if (Config->GdbIndex) { - In<ELFT>::GdbIndex = make<GdbIndexSection<ELFT>>(); - Symtab<ELFT>::X->Sections.push_back(In<ELFT>::GdbIndex); + In<ELFT>::GdbIndex = make<GdbIndexSection>(); + Add(In<ELFT>::GdbIndex); } // We always need to add rel[a].plt to output if it has entries. // Even for static linking it can contain R_[*]_IRELATIVE relocations. In<ELFT>::RelaPlt = make<RelocationSection<ELFT>>( - Config->Rela ? ".rela.plt" : ".rel.plt", false /*Sort*/); - Symtab<ELFT>::X->Sections.push_back(In<ELFT>::RelaPlt); + Config->IsRela ? ".rela.plt" : ".rel.plt", false /*Sort*/); + Add(In<ELFT>::RelaPlt); // The RelaIplt immediately follows .rel.plt (.rel.dyn for ARM) to ensure // that the IRelative relocations are processed last by the dynamic loader In<ELFT>::RelaIplt = make<RelocationSection<ELFT>>( (Config->EMachine == EM_ARM) ? ".rel.dyn" : In<ELFT>::RelaPlt->Name, false /*Sort*/); - Symtab<ELFT>::X->Sections.push_back(In<ELFT>::RelaIplt); + Add(In<ELFT>::RelaIplt); - In<ELFT>::Plt = make<PltSection<ELFT>>(); - Symtab<ELFT>::X->Sections.push_back(In<ELFT>::Plt); - In<ELFT>::Iplt = make<IpltSection<ELFT>>(); - Symtab<ELFT>::X->Sections.push_back(In<ELFT>::Iplt); + In<ELFT>::Plt = make<PltSection>(Target->PltHeaderSize); + Add(In<ELFT>::Plt); + In<ELFT>::Iplt = make<PltSection>(0); + Add(In<ELFT>::Iplt); - if (Config->EhFrameHdr) { - In<ELFT>::EhFrameHdr = make<EhFrameHeader<ELFT>>(); - Symtab<ELFT>::X->Sections.push_back(In<ELFT>::EhFrameHdr); + if (!Config->Relocatable) { + if (Config->EhFrameHdr) { + In<ELFT>::EhFrameHdr = make<EhFrameHeader<ELFT>>(); + Add(In<ELFT>::EhFrameHdr); + } + In<ELFT>::EhFrame = make<EhFrameSection<ELFT>>(); + Add(In<ELFT>::EhFrame); } + + if (In<ELFT>::SymTab) + Add(In<ELFT>::SymTab); + Add(In<ELFT>::ShStrTab); + if (In<ELFT>::StrTab) + Add(In<ELFT>::StrTab); } -template <class ELFT> -static bool shouldKeepInSymtab(InputSectionBase<ELFT> *Sec, StringRef SymName, +static bool shouldKeepInSymtab(SectionBase *Sec, StringRef SymName, const SymbolBody &B) { - if (B.isFile()) + if (B.isFile() || B.isSection()) return false; - // We keep sections in symtab for relocatable output. - if (B.isSection()) - return Config->Relocatable; - // If sym references a section in a discarded group, don't keep it. - if (Sec == &InputSection<ELFT>::Discarded) + if (Sec == &InputSection::Discarded) return false; if (Config->Discard == DiscardPolicy::None) @@ -410,24 +481,23 @@ static bool shouldKeepInSymtab(InputSectionBase<ELFT> *Sec, StringRef SymName, return !Sec || !(Sec->Flags & SHF_MERGE); } -template <class ELFT> static bool includeInSymtab(const SymbolBody &B) { +static bool includeInSymtab(const SymbolBody &B) { if (!B.isLocal() && !B.symbol()->IsUsedInRegularObj) return false; - // If --retain-symbols-file is given, we'll keep only symbols listed in that - // file. - if (Config->Discard == DiscardPolicy::RetainFile && - !Config->RetainSymbolsFile.count(B.getName())) - return false; - - if (auto *D = dyn_cast<DefinedRegular<ELFT>>(&B)) { + if (auto *D = dyn_cast<DefinedRegular>(&B)) { // Always include absolute symbols. - if (!D->Section) + SectionBase *Sec = D->Section; + if (!Sec) return true; - // Exclude symbols pointing to garbage-collected sections. - if (!D->Section->Live) - return false; - if (auto *S = dyn_cast<MergeInputSection<ELFT>>(D->Section)) + if (auto *IS = dyn_cast<InputSectionBase>(Sec)) { + Sec = IS->Repl; + IS = cast<InputSectionBase>(Sec); + // Exclude symbols pointing to garbage-collected sections. + if (!IS->Live) + return false; + } + if (auto *S = dyn_cast<MergeInputSection>(Sec)) if (!S->getSectionPiece(D->Value)->Live) return false; } @@ -444,22 +514,41 @@ template <class ELFT> void Writer<ELFT>::copyLocalSymbols() { if (!B->IsLocal) fatal(toString(F) + ": broken object: getLocalSymbols returns a non-local symbol"); - auto *DR = dyn_cast<DefinedRegular<ELFT>>(B); + auto *DR = dyn_cast<DefinedRegular>(B); // No reason to keep local undefined symbol in symtab. if (!DR) continue; - if (!includeInSymtab<ELFT>(*B)) + if (!includeInSymtab(*B)) continue; - InputSectionBase<ELFT> *Sec = DR->Section; - if (!shouldKeepInSymtab<ELFT>(Sec, B->getName(), *B)) + SectionBase *Sec = DR->Section; + if (!shouldKeepInSymtab(Sec, B->getName(), *B)) continue; - In<ELFT>::SymTab->addLocal(B); + In<ELFT>::SymTab->addSymbol(B); } } } +template <class ELFT> void Writer<ELFT>::addSectionSymbols() { + // Create one STT_SECTION symbol for each output section we might + // have a relocation with. + for (OutputSection *Sec : OutputSections) { + if (Sec->Sections.empty()) + continue; + + InputSection *IS = Sec->Sections[0]; + if (isa<SyntheticSection>(IS) || IS->Type == SHT_REL || + IS->Type == SHT_RELA) + continue; + + auto *Sym = + make<DefinedRegular>("", /*IsLocal=*/true, /*StOther=*/0, STT_SECTION, + /*Value=*/0, /*Size=*/0, IS, nullptr); + In<ELFT>::SymTab->addSymbol(Sym); + } +} + // PPC64 has a number of special SHT_PROGBITS+SHF_ALLOC+SHF_WRITE sections that // we would like to make sure appear is a specific order to maximize their // coverage by a single signed 16-bit offset from the TOC base pointer. @@ -478,46 +567,94 @@ static int getPPC64SectionRank(StringRef SectionName) { // All sections with SHF_MIPS_GPREL flag should be grouped together // because data in these sections is addressable with a gp relative address. -static int getMipsSectionRank(const OutputSectionBase *S) { +static int getMipsSectionRank(const OutputSection *S) { if ((S->Flags & SHF_MIPS_GPREL) == 0) return 0; - if (S->getName() == ".got") + if (S->Name == ".got") return 1; return 2; } -template <class ELFT> bool elf::isRelroSection(const OutputSectionBase *Sec) { +// Today's loaders have a feature to make segments read-only after +// processing dynamic relocations to enhance security. PT_GNU_RELRO +// is defined for that. +// +// This function returns true if a section needs to be put into a +// PT_GNU_RELRO segment. +template <class ELFT> bool elf::isRelroSection(const OutputSection *Sec) { if (!Config->ZRelro) return false; + uint64_t Flags = Sec->Flags; + + // Non-allocatable or non-writable sections don't need RELRO because + // they are not writable or not even mapped to memory in the first place. + // RELRO is for sections that are essentially read-only but need to + // be writable only at process startup to allow dynamic linker to + // apply relocations. if (!(Flags & SHF_ALLOC) || !(Flags & SHF_WRITE)) return false; + + // Once initialized, TLS data segments are used as data templates + // for a thread-local storage. For each new thread, runtime + // allocates memory for a TLS and copy templates there. No thread + // are supposed to use templates directly. Thus, it can be in RELRO. if (Flags & SHF_TLS) return true; + + // .init_array, .preinit_array and .fini_array contain pointers to + // functions that are executed on process startup or exit. These + // pointers are set by the static linker, and they are not expected + // to change at runtime. But if you are an attacker, you could do + // interesting things by manipulating pointers in .fini_array, for + // example. So they are put into RELRO. uint32_t Type = Sec->Type; if (Type == SHT_INIT_ARRAY || Type == SHT_FINI_ARRAY || Type == SHT_PREINIT_ARRAY) return true; + + // .got contains pointers to external symbols. They are resolved by + // the dynamic linker when a module is loaded into memory, and after + // that they are not expected to change. So, it can be in RELRO. + if (In<ELFT>::Got && Sec == In<ELFT>::Got->OutSec) + return true; + + // .got.plt contains pointers to external function symbols. They are + // by default resolved lazily, so we usually cannot put it into RELRO. + // However, if "-z now" is given, the lazy symbol resolution is + // disabled, which enables us to put it into RELRO. if (Sec == In<ELFT>::GotPlt->OutSec) return Config->ZNow; + + // .dynamic section contains data for the dynamic linker, and + // there's no need to write to it at runtime, so it's better to put + // it into RELRO. if (Sec == In<ELFT>::Dynamic->OutSec) return true; - if (In<ELFT>::Got && Sec == In<ELFT>::Got->OutSec) - return true; - if (Sec == Out<ELFT>::BssRelRo) + + // .bss.rel.ro is used for copy relocations for read-only symbols. + // Since the dynamic linker needs to process copy relocations, the + // section cannot be read-only, but once initialized, they shouldn't + // change. + if (Sec == In<ELFT>::BssRelRo->OutSec) return true; - StringRef S = Sec->getName(); + + // Sections with some special names are put into RELRO. This is a + // bit unfortunate because section names shouldn't be significant in + // ELF in spirit. But in reality many linker features depend on + // magic section names. + StringRef S = Sec->Name; return S == ".data.rel.ro" || S == ".ctors" || S == ".dtors" || S == ".jcr" || S == ".eh_frame" || S == ".openbsd.randomdata"; } template <class ELFT> -static bool compareSectionsNonScript(const OutputSectionBase *A, - const OutputSectionBase *B) { +static bool compareSectionsNonScript(const OutputSection *A, + const OutputSection *B) { // Put .interp first because some loaders want to see that section // on the first page of the executable file when loaded into memory. - bool AIsInterp = A->getName() == ".interp"; - bool BIsInterp = B->getName() == ".interp"; + bool AIsInterp = A->Name == ".interp"; + bool BIsInterp = B->Name == ".interp"; if (AIsInterp != BIsInterp) return AIsInterp; @@ -535,8 +672,8 @@ static bool compareSectionsNonScript(const OutputSectionBase *A, // We want to put section specified by -T option first, so we // can start assigning VA starting from them later. - auto AAddrSetI = Config->SectionStartMap.find(A->getName()); - auto BAddrSetI = Config->SectionStartMap.find(B->getName()); + auto AAddrSetI = Config->SectionStartMap.find(A->Name); + auto BAddrSetI = Config->SectionStartMap.find(B->Name); bool AHasAddrSet = AAddrSetI != Config->SectionStartMap.end(); bool BHasAddrSet = BAddrSetI != Config->SectionStartMap.end(); if (AHasAddrSet != BHasAddrSet) @@ -601,8 +738,7 @@ static bool compareSectionsNonScript(const OutputSectionBase *A, // Some architectures have additional ordering restrictions for sections // within the same PT_LOAD. if (Config->EMachine == EM_PPC64) - return getPPC64SectionRank(A->getName()) < - getPPC64SectionRank(B->getName()); + return getPPC64SectionRank(A->Name) < getPPC64SectionRank(B->Name); if (Config->EMachine == EM_MIPS) return getMipsSectionRank(A) < getMipsSectionRank(B); @@ -611,11 +747,10 @@ static bool compareSectionsNonScript(const OutputSectionBase *A, // Output section ordering is determined by this function. template <class ELFT> -static bool compareSections(const OutputSectionBase *A, - const OutputSectionBase *B) { +static bool compareSections(const OutputSection *A, const OutputSection *B) { // For now, put sections mentioned in a linker script first. - int AIndex = Script<ELFT>::X->getSectionIndex(A->getName()); - int BIndex = Script<ELFT>::X->getSectionIndex(B->getName()); + int AIndex = Script->getSectionIndex(A->Name); + int BIndex = Script->getSectionIndex(B->Name); bool AInScript = AIndex != INT_MAX; bool BInScript = BIndex != INT_MAX; if (AInScript != BInScript) @@ -633,43 +768,37 @@ PhdrEntry::PhdrEntry(unsigned Type, unsigned Flags) { p_flags = Flags; } -void PhdrEntry::add(OutputSectionBase *Sec) { +void PhdrEntry::add(OutputSection *Sec) { Last = Sec; if (!First) First = Sec; - p_align = std::max(p_align, Sec->Addralign); + p_align = std::max(p_align, Sec->Alignment); if (p_type == PT_LOAD) Sec->FirstInPtLoad = First; } template <class ELFT> -static void addOptionalSynthetic(StringRef Name, OutputSectionBase *Sec, - typename ELFT::uint Val, - uint8_t StOther = STV_HIDDEN) { - if (SymbolBody *S = Symtab<ELFT>::X->find(Name)) - if (!S->isInCurrentDSO()) - Symtab<ELFT>::X->addSynthetic(Name, Sec, Val, StOther); -} - -template <class ELFT> -static Symbol *addRegular(StringRef Name, InputSectionBase<ELFT> *Sec, - typename ELFT::uint Value) { +static Symbol *addRegular(StringRef Name, SectionBase *Sec, uint64_t Value, + uint8_t StOther = STV_HIDDEN, + uint8_t Binding = STB_WEAK) { // The linker generated symbols are added as STB_WEAK to allow user defined // ones to override them. - return Symtab<ELFT>::X->addRegular(Name, STV_HIDDEN, STT_NOTYPE, Value, - /*Size=*/0, STB_WEAK, Sec, + return Symtab<ELFT>::X->addRegular(Name, StOther, STT_NOTYPE, Value, + /*Size=*/0, Binding, Sec, /*File=*/nullptr); } template <class ELFT> -static Symbol *addOptionalRegular(StringRef Name, InputSectionBase<ELFT> *IS, - typename ELFT::uint Value) { +static DefinedRegular * +addOptionalRegular(StringRef Name, SectionBase *Sec, uint64_t Val, + uint8_t StOther = STV_HIDDEN, uint8_t Binding = STB_GLOBAL) { SymbolBody *S = Symtab<ELFT>::X->find(Name); if (!S) return nullptr; if (S->isInCurrentDSO()) - return S->symbol(); - return addRegular(Name, IS, Value); + return nullptr; + return cast<DefinedRegular>( + addRegular<ELFT>(Name, Sec, Val, StOther, Binding)->body()); } // The beginning and the ending of .rel[a].plt section are marked @@ -681,11 +810,11 @@ static Symbol *addOptionalRegular(StringRef Name, InputSectionBase<ELFT> *IS, template <class ELFT> void Writer<ELFT>::addRelIpltSymbols() { if (In<ELFT>::DynSymTab) return; - StringRef S = Config->Rela ? "__rela_iplt_start" : "__rel_iplt_start"; - addOptionalRegular<ELFT>(S, In<ELFT>::RelaIplt, 0); + StringRef S = Config->IsRela ? "__rela_iplt_start" : "__rel_iplt_start"; + addOptionalRegular<ELFT>(S, In<ELFT>::RelaIplt, 0, STV_HIDDEN, STB_WEAK); - S = Config->Rela ? "__rela_iplt_end" : "__rel_iplt_end"; - addOptionalRegular<ELFT>(S, In<ELFT>::RelaIplt, -1); + S = Config->IsRela ? "__rela_iplt_end" : "__rel_iplt_end"; + addOptionalRegular<ELFT>(S, In<ELFT>::RelaIplt, -1, STV_HIDDEN, STB_WEAK); } // The linker is expected to define some symbols depending on @@ -697,15 +826,12 @@ template <class ELFT> void Writer<ELFT>::addReservedSymbols() { // to GOT. Default offset is 0x7ff0. // See "Global Data Symbols" in Chapter 6 in the following document: // ftp://www.linux-mips.org/pub/linux/mips/doc/ABI/mipsabi.pdf - ElfSym<ELFT>::MipsGp = - Symtab<ELFT>::X->addAbsolute("_gp", STV_HIDDEN, STB_LOCAL); + ElfSym::MipsGp = Symtab<ELFT>::X->addAbsolute("_gp", STV_HIDDEN, STB_LOCAL); // On MIPS O32 ABI, _gp_disp is a magic symbol designates offset between - // start of function and 'gp' pointer into GOT. To simplify relocation - // calculation we assign _gp value to it and calculate corresponding - // relocations as relative to this value. + // start of function and 'gp' pointer into GOT. if (Symtab<ELFT>::X->find("_gp_disp")) - ElfSym<ELFT>::MipsGpDisp = + ElfSym::MipsGpDisp = Symtab<ELFT>::X->addAbsolute("_gp_disp", STV_HIDDEN, STB_LOCAL); // The __gnu_local_gp is a magic symbol equal to the current value of 'gp' @@ -713,7 +839,7 @@ template <class ELFT> void Writer<ELFT>::addReservedSymbols() { // in case of using -mno-shared option. // https://sourceware.org/ml/binutils/2004-12/msg00094.html if (Symtab<ELFT>::X->find("__gnu_local_gp")) - ElfSym<ELFT>::MipsLocalGp = + ElfSym::MipsLocalGp = Symtab<ELFT>::X->addAbsolute("__gnu_local_gp", STV_HIDDEN, STB_LOCAL); } @@ -742,45 +868,41 @@ template <class ELFT> void Writer<ELFT>::addReservedSymbols() { Symtab<ELFT>::X->addIgnored("__tls_get_addr"); // If linker script do layout we do not need to create any standart symbols. - if (ScriptConfig->HasSections) + if (Script->Opt.HasSections) return; - ElfSym<ELFT>::EhdrStart = Symtab<ELFT>::X->addIgnored("__ehdr_start"); - - auto Define = [this](StringRef S, DefinedRegular<ELFT> *&Sym1, - DefinedRegular<ELFT> *&Sym2) { - Sym1 = Symtab<ELFT>::X->addIgnored(S, STV_DEFAULT); + // __ehdr_start is the location of ELF file headers. + addOptionalRegular<ELFT>("__ehdr_start", Out::ElfHeader, 0, STV_HIDDEN); - // The name without the underscore is not a reserved name, - // so it is defined only when there is a reference against it. - assert(S.startswith("_")); - S = S.substr(1); - if (SymbolBody *B = Symtab<ELFT>::X->find(S)) - if (B->isUndefined()) - Sym2 = Symtab<ELFT>::X->addAbsolute(S, STV_DEFAULT); + auto Add = [](StringRef S) { + return addOptionalRegular<ELFT>(S, Out::ElfHeader, 0, STV_DEFAULT); }; - Define("_end", ElfSym<ELFT>::End, ElfSym<ELFT>::End2); - Define("_etext", ElfSym<ELFT>::Etext, ElfSym<ELFT>::Etext2); - Define("_edata", ElfSym<ELFT>::Edata, ElfSym<ELFT>::Edata2); + ElfSym::Bss = Add("__bss_start"); + ElfSym::End1 = Add("end"); + ElfSym::End2 = Add("_end"); + ElfSym::Etext1 = Add("etext"); + ElfSym::Etext2 = Add("_etext"); + ElfSym::Edata1 = Add("edata"); + ElfSym::Edata2 = Add("_edata"); } // Sort input sections by section name suffixes for // __attribute__((init_priority(N))). -template <class ELFT> static void sortInitFini(OutputSectionBase *S) { +static void sortInitFini(OutputSection *S) { if (S) - reinterpret_cast<OutputSection<ELFT> *>(S)->sortInitFini(); + reinterpret_cast<OutputSection *>(S)->sortInitFini(); } // Sort input sections by the special rule for .ctors and .dtors. -template <class ELFT> static void sortCtorsDtors(OutputSectionBase *S) { +static void sortCtorsDtors(OutputSection *S) { if (S) - reinterpret_cast<OutputSection<ELFT> *>(S)->sortCtorsDtors(); + reinterpret_cast<OutputSection *>(S)->sortCtorsDtors(); } // Sort input sections using the list provided by --symbol-ordering-file. template <class ELFT> -static void sortBySymbolsOrder(ArrayRef<OutputSectionBase *> OutputSections) { +static void sortBySymbolsOrder(ArrayRef<OutputSection *> OutputSections) { if (Config->SymbolOrderingFile.empty()) return; @@ -793,10 +915,10 @@ static void sortBySymbolsOrder(ArrayRef<OutputSectionBase *> OutputSections) { SymbolOrder.insert({S, Priority++}); // Build a map from sections to their priorities. - DenseMap<InputSectionBase<ELFT> *, int> SectionOrder; + DenseMap<SectionBase *, int> SectionOrder; for (elf::ObjectFile<ELFT> *File : Symtab<ELFT>::X->getObjectFiles()) { for (SymbolBody *Body : File->getSymbols()) { - auto *D = dyn_cast<DefinedRegular<ELFT>>(Body); + auto *D = dyn_cast<DefinedRegular>(Body); if (!D || !D->Section) continue; int &Priority = SectionOrder[D->Section]; @@ -805,15 +927,14 @@ static void sortBySymbolsOrder(ArrayRef<OutputSectionBase *> OutputSections) { } // Sort sections by priority. - for (OutputSectionBase *Base : OutputSections) - if (auto *Sec = dyn_cast<OutputSection<ELFT>>(Base)) - Sec->sort([&](InputSection<ELFT> *S) { return SectionOrder.lookup(S); }); + for (OutputSection *Base : OutputSections) + if (auto *Sec = dyn_cast<OutputSection>(Base)) + Sec->sort([&](InputSectionBase *S) { return SectionOrder.lookup(S); }); } template <class ELFT> -void Writer<ELFT>::forEachRelSec( - std::function<void(InputSectionBase<ELFT> &)> Fn) { - for (InputSectionBase<ELFT> *IS : Symtab<ELFT>::X->Sections) { +void Writer<ELFT>::forEachRelSec(std::function<void(InputSectionBase &)> Fn) { + for (InputSectionBase *IS : InputSections) { if (!IS->Live) continue; // Scan all relocations. Each relocation goes through a series @@ -823,46 +944,32 @@ void Writer<ELFT>::forEachRelSec( // processed by InputSection::relocateNonAlloc. if (!(IS->Flags & SHF_ALLOC)) continue; - if (isa<InputSection<ELFT>>(IS) || isa<EhInputSection<ELFT>>(IS)) + if (isa<InputSection>(IS) || isa<EhInputSection>(IS)) Fn(*IS); } -} - -template <class ELFT> -void Writer<ELFT>::addInputSec(InputSectionBase<ELFT> *IS) { - if (!IS) - return; - if (!IS->Live) { - reportDiscarded(IS); - return; + if (!Config->Relocatable) { + for (EhInputSection *ES : In<ELFT>::EhFrame->Sections) + Fn(*ES); } - OutputSectionBase *Sec; - bool IsNew; - StringRef OutsecName = getOutputSectionName(IS->Name); - std::tie(Sec, IsNew) = Factory.create(IS, OutsecName); - if (IsNew) - OutputSections.push_back(Sec); - Sec->addSection(IS); } template <class ELFT> void Writer<ELFT>::createSections() { - for (InputSectionBase<ELFT> *IS : Symtab<ELFT>::X->Sections) - addInputSec(IS); + for (InputSectionBase *IS : InputSections) + if (IS) + Factory.addInputSec(IS, getOutputSectionName(IS->Name)); sortBySymbolsOrder<ELFT>(OutputSections); - sortInitFini<ELFT>(findSection(".init_array")); - sortInitFini<ELFT>(findSection(".fini_array")); - sortCtorsDtors<ELFT>(findSection(".ctors")); - sortCtorsDtors<ELFT>(findSection(".dtors")); + sortInitFini(findSection(".init_array")); + sortInitFini(findSection(".fini_array")); + sortCtorsDtors(findSection(".ctors")); + sortCtorsDtors(findSection(".dtors")); - for (OutputSectionBase *Sec : OutputSections) + for (OutputSection *Sec : OutputSections) Sec->assignOffsets(); } -template <class ELFT> -static bool canSharePtLoad(const OutputSectionBase &S1, - const OutputSectionBase &S2) { +static bool canSharePtLoad(const OutputSection &S1, const OutputSection &S2) { if (!(S1.Flags & SHF_ALLOC) || !(S2.Flags & SHF_ALLOC)) return false; @@ -881,12 +988,12 @@ template <class ELFT> void Writer<ELFT>::sortSections() { // relative order for SHF_LINK_ORDER sections. if (Config->Relocatable) return; - if (!ScriptConfig->HasSections) { + if (!Script->Opt.HasSections) { std::stable_sort(OutputSections.begin(), OutputSections.end(), compareSectionsNonScript<ELFT>); return; } - Script<ELFT>::X->adjustSectionsBeforeSorting(); + Script->adjustSectionsBeforeSorting(); // The order of the sections in the script is arbitrary and may not agree with // compareSectionsNonScript. This means that we cannot easily define a @@ -918,14 +1025,14 @@ template <class ELFT> void Writer<ELFT>::sortSections() { auto I = OutputSections.begin(); auto E = OutputSections.end(); auto NonScriptI = - std::find_if(OutputSections.begin(), E, [](OutputSectionBase *S) { - return Script<ELFT>::X->getSectionIndex(S->getName()) == INT_MAX; + std::find_if(OutputSections.begin(), E, [](OutputSection *S) { + return Script->getSectionIndex(S->Name) == INT_MAX; }); while (NonScriptI != E) { auto BestPos = std::max_element( - I, NonScriptI, [&](OutputSectionBase *&A, OutputSectionBase *&B) { - bool ACanSharePtLoad = canSharePtLoad<ELFT>(**NonScriptI, *A); - bool BCanSharePtLoad = canSharePtLoad<ELFT>(**NonScriptI, *B); + I, NonScriptI, [&](OutputSection *&A, OutputSection *&B) { + bool ACanSharePtLoad = canSharePtLoad(**NonScriptI, *A); + bool BCanSharePtLoad = canSharePtLoad(**NonScriptI, *B); if (ACanSharePtLoad != BCanSharePtLoad) return BCanSharePtLoad; @@ -949,16 +1056,14 @@ template <class ELFT> void Writer<ELFT>::sortSections() { ++NonScriptI; } - Script<ELFT>::X->adjustSectionsAfterSorting(); + Script->adjustSectionsAfterSorting(); } -template <class ELFT> -static void -finalizeSynthetic(const std::vector<SyntheticSection<ELFT> *> &Sections) { - for (SyntheticSection<ELFT> *SS : Sections) +static void applySynthetic(const std::vector<SyntheticSection *> &Sections, + std::function<void(SyntheticSection *)> Fn) { + for (SyntheticSection *SS : Sections) if (SS && SS->OutSec && !SS->empty()) { - SS->finalize(); - SS->OutSec->Size = 0; + Fn(SS); SS->OutSec->assignOffsets(); } } @@ -966,40 +1071,40 @@ finalizeSynthetic(const std::vector<SyntheticSection<ELFT> *> &Sections) { // We need to add input synthetic sections early in createSyntheticSections() // to make them visible from linkescript side. But not all sections are always // required to be in output. For example we don't need dynamic section content -// sometimes. This function filters out such unused sections from output. -template <class ELFT> -static void removeUnusedSyntheticSections(std::vector<OutputSectionBase *> &V) { - // Input synthetic sections are placed after all regular ones. We iterate over - // them all and exit at first non-synthetic. - for (InputSectionBase<ELFT> *S : llvm::reverse(Symtab<ELFT>::X->Sections)) { - SyntheticSection<ELFT> *SS = dyn_cast<SyntheticSection<ELFT>>(S); +// sometimes. This function filters out such unused sections from the output. +static void removeUnusedSyntheticSections(std::vector<OutputSection *> &V) { + // All input synthetic sections that can be empty are placed after + // all regular ones. We iterate over them all and exit at first + // non-synthetic. + for (InputSectionBase *S : llvm::reverse(InputSections)) { + SyntheticSection *SS = dyn_cast<SyntheticSection>(S); if (!SS) return; if (!SS->empty() || !SS->OutSec) continue; - OutputSection<ELFT> *OutSec = cast<OutputSection<ELFT>>(SS->OutSec); - OutSec->Sections.erase( - std::find(OutSec->Sections.begin(), OutSec->Sections.end(), SS)); - // If there is no other sections in output section, remove it from output. - if (OutSec->Sections.empty()) - V.erase(std::find(V.begin(), V.end(), OutSec)); + SS->OutSec->Sections.erase(std::find(SS->OutSec->Sections.begin(), + SS->OutSec->Sections.end(), SS)); + // If there are no other sections in the output section, remove it from the + // output. + if (SS->OutSec->Sections.empty()) + V.erase(std::find(V.begin(), V.end(), SS->OutSec)); } } // Create output section objects and add them to OutputSections. template <class ELFT> void Writer<ELFT>::finalizeSections() { - Out<ELFT>::DebugInfo = findSection(".debug_info"); - Out<ELFT>::PreinitArray = findSection(".preinit_array"); - Out<ELFT>::InitArray = findSection(".init_array"); - Out<ELFT>::FiniArray = findSection(".fini_array"); + Out::DebugInfo = findSection(".debug_info"); + Out::PreinitArray = findSection(".preinit_array"); + Out::InitArray = findSection(".init_array"); + Out::FiniArray = findSection(".fini_array"); // The linker needs to define SECNAME_start, SECNAME_end and SECNAME_stop // symbols for sections, so that the runtime can get the start and end // addresses of each section by section name. Add such symbols. if (!Config->Relocatable) { addStartEndSymbols(); - for (OutputSectionBase *Sec : OutputSections) + for (OutputSection *Sec : OutputSections) addStartStopSymbols(Sec); } @@ -1008,34 +1113,40 @@ template <class ELFT> void Writer<ELFT>::finalizeSections() { // Even the author of gold doesn't remember why gold behaves that way. // https://sourceware.org/ml/binutils/2002-03/msg00360.html if (In<ELFT>::DynSymTab) - addRegular("_DYNAMIC", In<ELFT>::Dynamic, 0); + addRegular<ELFT>("_DYNAMIC", In<ELFT>::Dynamic, 0); // Define __rel[a]_iplt_{start,end} symbols if needed. addRelIpltSymbols(); - if (!Out<ELFT>::EhFrame->empty()) { - OutputSections.push_back(Out<ELFT>::EhFrame); - Out<ELFT>::EhFrame->finalize(); - } + // This responsible for splitting up .eh_frame section into + // pieces. The relocation scan uses those pieces, so this has to be + // earlier. + applySynthetic({In<ELFT>::EhFrame}, + [](SyntheticSection *SS) { SS->finalizeContents(); }); // Scan relocations. This must be done after every symbol is declared so that // we can correctly decide if a dynamic relocation is needed. forEachRelSec(scanRelocations<ELFT>); - // Now that we have defined all possible symbols including linker- + if (In<ELFT>::Plt && !In<ELFT>::Plt->empty()) + In<ELFT>::Plt->addSymbols(); + if (In<ELFT>::Iplt && !In<ELFT>::Iplt->empty()) + In<ELFT>::Iplt->addSymbols(); + + // Now that we have defined all possible global symbols including linker- // synthesized ones. Visit all symbols to give the finishing touches. for (Symbol *S : Symtab<ELFT>::X->getSymbols()) { SymbolBody *Body = S->body(); - if (!includeInSymtab<ELFT>(*Body)) + if (!includeInSymtab(*Body)) continue; if (In<ELFT>::SymTab) - In<ELFT>::SymTab->addGlobal(Body); + In<ELFT>::SymTab->addSymbol(Body); if (In<ELFT>::DynSymTab && S->includeInDynsym()) { - In<ELFT>::DynSymTab->addGlobal(Body); - if (auto *SS = dyn_cast<SharedSymbol<ELFT>>(Body)) - if (SS->file()->isNeeded()) + In<ELFT>::DynSymTab->addSymbol(Body); + if (auto *SS = dyn_cast<SharedSymbol>(Body)) + if (cast<SharedFile<ELFT>>(SS->File)->isNeeded()) In<ELFT>::VerNeed->addSymbol(SS); } } @@ -1045,78 +1156,101 @@ template <class ELFT> void Writer<ELFT>::finalizeSections() { return; // So far we have added sections from input object files. - // This function adds linker-created Out<ELFT>::* sections. + // This function adds linker-created Out::* sections. addPredefinedSections(); - removeUnusedSyntheticSections<ELFT>(OutputSections); + removeUnusedSyntheticSections(OutputSections); sortSections(); + // This is a bit of a hack. A value of 0 means undef, so we set it + // to 1 t make __ehdr_start defined. The section number is not + // particularly relevant. + Out::ElfHeader->SectionIndex = 1; + unsigned I = 1; - for (OutputSectionBase *Sec : OutputSections) { + for (OutputSection *Sec : OutputSections) { Sec->SectionIndex = I++; - Sec->ShName = In<ELFT>::ShStrTab->addString(Sec->getName()); + Sec->ShName = In<ELFT>::ShStrTab->addString(Sec->Name); } // Binary and relocatable output does not have PHDRS. // The headers have to be created before finalize as that can influence the // image base and the dynamic section on mips includes the image base. if (!Config->Relocatable && !Config->OFormatBinary) { - Phdrs = Script<ELFT>::X->hasPhdrsCommands() ? Script<ELFT>::X->createPhdrs() - : createPhdrs(); + Phdrs = Script->hasPhdrsCommands() ? Script->createPhdrs() : createPhdrs(); addPtArmExid(Phdrs); fixHeaders(); } + // Dynamic section must be the last one in this list and dynamic + // symbol table section (DynSymTab) must be the first one. + applySynthetic({In<ELFT>::DynSymTab, In<ELFT>::Bss, In<ELFT>::BssRelRo, + In<ELFT>::GnuHashTab, In<ELFT>::HashTab, In<ELFT>::SymTab, + In<ELFT>::ShStrTab, In<ELFT>::StrTab, In<ELFT>::VerDef, + In<ELFT>::DynStrTab, In<ELFT>::GdbIndex, In<ELFT>::Got, + In<ELFT>::MipsGot, In<ELFT>::IgotPlt, In<ELFT>::GotPlt, + In<ELFT>::RelaDyn, In<ELFT>::RelaIplt, In<ELFT>::RelaPlt, + In<ELFT>::Plt, In<ELFT>::Iplt, In<ELFT>::Plt, + In<ELFT>::EhFrameHdr, In<ELFT>::VerSym, In<ELFT>::VerNeed, + In<ELFT>::Dynamic}, + [](SyntheticSection *SS) { SS->finalizeContents(); }); + + // Some architectures use small displacements for jump instructions. + // It is linker's responsibility to create thunks containing long + // jump instructions if jump targets are too far. Create thunks. + if (Target->NeedsThunks) { + // FIXME: only ARM Interworking and Mips LA25 Thunks are implemented, + // these + // do not require address information. To support range extension Thunks + // we need to assign addresses so that we can tell if jump instructions + // are out of range. This will need to turn into a loop that converges + // when no more Thunks are added + ThunkCreator<ELFT> TC; + if (TC.createThunks(OutputSections)) + applySynthetic({In<ELFT>::MipsGot}, + [](SyntheticSection *SS) { SS->updateAllocSize(); }); + } // Fill other section headers. The dynamic table is finalized // at the end because some tags like RELSZ depend on result // of finalizing other sections. - for (OutputSectionBase *Sec : OutputSections) - Sec->finalize(); + for (OutputSection *Sec : OutputSections) + Sec->finalize<ELFT>(); - // Dynamic section must be the last one in this list and dynamic - // symbol table section (DynSymTab) must be the first one. - finalizeSynthetic<ELFT>( - {In<ELFT>::DynSymTab, In<ELFT>::GnuHashTab, In<ELFT>::HashTab, - In<ELFT>::SymTab, In<ELFT>::ShStrTab, In<ELFT>::StrTab, - In<ELFT>::VerDef, In<ELFT>::DynStrTab, In<ELFT>::GdbIndex, - In<ELFT>::Got, In<ELFT>::MipsGot, In<ELFT>::IgotPlt, - In<ELFT>::GotPlt, In<ELFT>::RelaDyn, In<ELFT>::RelaIplt, - In<ELFT>::RelaPlt, In<ELFT>::Plt, In<ELFT>::Iplt, - In<ELFT>::Plt, In<ELFT>::EhFrameHdr, In<ELFT>::VerSym, - In<ELFT>::VerNeed, In<ELFT>::Dynamic}); + // createThunks may have added local symbols to the static symbol table + applySynthetic({In<ELFT>::SymTab, In<ELFT>::ShStrTab, In<ELFT>::StrTab}, + [](SyntheticSection *SS) { SS->postThunkContents(); }); } template <class ELFT> void Writer<ELFT>::addPredefinedSections() { - if (Out<ELFT>::Bss->Size > 0) - OutputSections.push_back(Out<ELFT>::Bss); - if (Out<ELFT>::BssRelRo->Size > 0) - OutputSections.push_back(Out<ELFT>::BssRelRo); - - auto OS = dyn_cast_or_null<OutputSection<ELFT>>(findSection(".ARM.exidx")); + // ARM ABI requires .ARM.exidx to be terminated by some piece of data. + // We have the terminater synthetic section class. Add that at the end. + auto *OS = dyn_cast_or_null<OutputSection>(findSection(".ARM.exidx")); if (OS && !OS->Sections.empty() && !Config->Relocatable) - OS->addSection(make<ARMExidxSentinelSection<ELFT>>()); - - addInputSec(In<ELFT>::SymTab); - addInputSec(In<ELFT>::ShStrTab); - addInputSec(In<ELFT>::StrTab); + OS->addSection(make<ARMExidxSentinelSection>()); } // The linker is expected to define SECNAME_start and SECNAME_end // symbols for a few sections. This function defines them. template <class ELFT> void Writer<ELFT>::addStartEndSymbols() { - auto Define = [&](StringRef Start, StringRef End, OutputSectionBase *OS) { + auto Define = [&](StringRef Start, StringRef End, OutputSection *OS) { // These symbols resolve to the image base if the section does not exist. // A special value -1 indicates end of the section. - addOptionalSynthetic<ELFT>(Start, OS, 0); - addOptionalSynthetic<ELFT>(End, OS, OS ? -1 : 0); + if (OS) { + addOptionalRegular<ELFT>(Start, OS, 0); + addOptionalRegular<ELFT>(End, OS, -1); + } else { + if (Config->Pic) + OS = Out::ElfHeader; + addOptionalRegular<ELFT>(Start, OS, 0); + addOptionalRegular<ELFT>(End, OS, 0); + } }; - Define("__preinit_array_start", "__preinit_array_end", - Out<ELFT>::PreinitArray); - Define("__init_array_start", "__init_array_end", Out<ELFT>::InitArray); - Define("__fini_array_start", "__fini_array_end", Out<ELFT>::FiniArray); + Define("__preinit_array_start", "__preinit_array_end", Out::PreinitArray); + Define("__init_array_start", "__init_array_end", Out::InitArray); + Define("__fini_array_start", "__fini_array_end", Out::FiniArray); - if (OutputSectionBase *Sec = findSection(".ARM.exidx")) + if (OutputSection *Sec = findSection(".ARM.exidx")) Define("__exidx_start", "__exidx_end", Sec); } @@ -1126,23 +1260,22 @@ template <class ELFT> void Writer<ELFT>::addStartEndSymbols() { // respectively. This is not requested by the ELF standard, but GNU ld and // gold provide the feature, and used by many programs. template <class ELFT> -void Writer<ELFT>::addStartStopSymbols(OutputSectionBase *Sec) { - StringRef S = Sec->getName(); +void Writer<ELFT>::addStartStopSymbols(OutputSection *Sec) { + StringRef S = Sec->Name; if (!isValidCIdentifier(S)) return; - addOptionalSynthetic<ELFT>(Saver.save("__start_" + S), Sec, 0, STV_DEFAULT); - addOptionalSynthetic<ELFT>(Saver.save("__stop_" + S), Sec, -1, STV_DEFAULT); + addOptionalRegular<ELFT>(Saver.save("__start_" + S), Sec, 0, STV_DEFAULT); + addOptionalRegular<ELFT>(Saver.save("__stop_" + S), Sec, -1, STV_DEFAULT); } -template <class ELFT> -OutputSectionBase *Writer<ELFT>::findSection(StringRef Name) { - for (OutputSectionBase *Sec : OutputSections) - if (Sec->getName() == Name) +template <class ELFT> OutputSection *Writer<ELFT>::findSection(StringRef Name) { + for (OutputSection *Sec : OutputSections) + if (Sec->Name == Name) return Sec; return nullptr; } -template <class ELFT> static bool needsPtLoad(OutputSectionBase *Sec) { +static bool needsPtLoad(OutputSection *Sec) { if (!(Sec->Flags & SHF_ALLOC)) return false; @@ -1158,13 +1291,12 @@ template <class ELFT> static bool needsPtLoad(OutputSectionBase *Sec) { // linker scripts are designed for creating two PT_LOADs only, one RX and one // RW. This means that there is no alignment in the RO to RX transition and we // cannot create a PT_LOAD there. -template <class ELFT> -static typename ELFT::uint computeFlags(typename ELFT::uint F) { - if (Config->OMagic) +static uint64_t computeFlags(uint64_t Flags) { + if (Config->Omagic) return PF_R | PF_W | PF_X; - if (Config->SingleRoRx && !(F & PF_W)) - return F | PF_X; - return F; + if (Config->SingleRoRx && !(Flags & PF_W)) + return Flags | PF_X; + return Flags; } // Decide which program headers to create and which sections to include in each @@ -1177,33 +1309,19 @@ template <class ELFT> std::vector<PhdrEntry> Writer<ELFT>::createPhdrs() { }; // The first phdr entry is PT_PHDR which describes the program header itself. - PhdrEntry &Hdr = *AddHdr(PT_PHDR, PF_R); - Hdr.add(Out<ELFT>::ProgramHeaders); + AddHdr(PT_PHDR, PF_R)->add(Out::ProgramHeaders); // PT_INTERP must be the second entry if exists. - if (OutputSectionBase *Sec = findSection(".interp")) { - PhdrEntry &Hdr = *AddHdr(PT_INTERP, Sec->getPhdrFlags()); - Hdr.add(Sec); - } + if (OutputSection *Sec = findSection(".interp")) + AddHdr(PT_INTERP, Sec->getPhdrFlags())->add(Sec); // Add the first PT_LOAD segment for regular output sections. - uintX_t Flags = computeFlags<ELFT>(PF_R); + uint64_t Flags = computeFlags(PF_R); PhdrEntry *Load = AddHdr(PT_LOAD, Flags); - - PhdrEntry TlsHdr(PT_TLS, PF_R); - PhdrEntry RelRo(PT_GNU_RELRO, PF_R); - PhdrEntry Note(PT_NOTE, PF_R); - for (OutputSectionBase *Sec : OutputSections) { + for (OutputSection *Sec : OutputSections) { if (!(Sec->Flags & SHF_ALLOC)) break; - - // If we meet TLS section then we create TLS header - // and put all TLS sections inside for further use when - // assign addresses. - if (Sec->Flags & SHF_TLS) - TlsHdr.add(Sec); - - if (!needsPtLoad<ELFT>(Sec)) + if (!needsPtLoad(Sec)) continue; // Segments are contiguous memory regions that has the same attributes @@ -1211,58 +1329,58 @@ template <class ELFT> std::vector<PhdrEntry> Writer<ELFT>::createPhdrs() { // Therefore, we need to create a new phdr when the next section has // different flags or is loaded at a discontiguous address using AT linker // script command. - uintX_t NewFlags = computeFlags<ELFT>(Sec->getPhdrFlags()); - if (Script<ELFT>::X->hasLMA(Sec->getName()) || Flags != NewFlags) { + uint64_t NewFlags = computeFlags(Sec->getPhdrFlags()); + if (Script->hasLMA(Sec->Name) || Flags != NewFlags) { Load = AddHdr(PT_LOAD, NewFlags); Flags = NewFlags; } Load->add(Sec); - - if (isRelroSection<ELFT>(Sec)) - RelRo.add(Sec); - if (Sec->Type == SHT_NOTE) - Note.add(Sec); } - // Add the TLS segment unless it's empty. + // Add a TLS segment if any. + PhdrEntry TlsHdr(PT_TLS, PF_R); + for (OutputSection *Sec : OutputSections) + if (Sec->Flags & SHF_TLS) + TlsHdr.add(Sec); if (TlsHdr.First) Ret.push_back(std::move(TlsHdr)); // Add an entry for .dynamic. - if (In<ELFT>::DynSymTab) { - PhdrEntry &H = - *AddHdr(PT_DYNAMIC, In<ELFT>::Dynamic->OutSec->getPhdrFlags()); - H.add(In<ELFT>::Dynamic->OutSec); - } + if (In<ELFT>::DynSymTab) + AddHdr(PT_DYNAMIC, In<ELFT>::Dynamic->OutSec->getPhdrFlags()) + ->add(In<ELFT>::Dynamic->OutSec); // PT_GNU_RELRO includes all sections that should be marked as // read-only by dynamic linker after proccessing relocations. + PhdrEntry RelRo(PT_GNU_RELRO, PF_R); + for (OutputSection *Sec : OutputSections) + if (needsPtLoad(Sec) && isRelroSection<ELFT>(Sec)) + RelRo.add(Sec); if (RelRo.First) Ret.push_back(std::move(RelRo)); // PT_GNU_EH_FRAME is a special section pointing on .eh_frame_hdr. - if (!Out<ELFT>::EhFrame->empty() && In<ELFT>::EhFrameHdr) { - PhdrEntry &Hdr = - *AddHdr(PT_GNU_EH_FRAME, In<ELFT>::EhFrameHdr->OutSec->getPhdrFlags()); - Hdr.add(In<ELFT>::EhFrameHdr->OutSec); - } + if (!In<ELFT>::EhFrame->empty() && In<ELFT>::EhFrameHdr && + In<ELFT>::EhFrame->OutSec && In<ELFT>::EhFrameHdr->OutSec) + AddHdr(PT_GNU_EH_FRAME, In<ELFT>::EhFrameHdr->OutSec->getPhdrFlags()) + ->add(In<ELFT>::EhFrameHdr->OutSec); - // PT_OPENBSD_RANDOMIZE specifies the location and size of a part of the - // memory image of the program that must be filled with random data before any - // code in the object is executed. - if (OutputSectionBase *Sec = findSection(".openbsd.randomdata")) { - PhdrEntry &Hdr = *AddHdr(PT_OPENBSD_RANDOMIZE, Sec->getPhdrFlags()); - Hdr.add(Sec); - } + // PT_OPENBSD_RANDOMIZE is an OpenBSD-specific feature. That makes + // the dynamic linker fill the segment with random data. + if (OutputSection *Sec = findSection(".openbsd.randomdata")) + AddHdr(PT_OPENBSD_RANDOMIZE, Sec->getPhdrFlags())->add(Sec); // PT_GNU_STACK is a special section to tell the loader to make the - // pages for the stack non-executable. - if (!Config->ZExecstack) { - PhdrEntry &Hdr = *AddHdr(PT_GNU_STACK, PF_R | PF_W); - if (Config->ZStackSize != uint64_t(-1)) - Hdr.p_memsz = Config->ZStackSize; - } + // pages for the stack non-executable. If you really want an executable + // stack, you can pass -z execstack, but that's not recommended for + // security reasons. + unsigned Perm; + if (Config->ZExecstack) + Perm = PF_R | PF_W | PF_X; + else + Perm = PF_R | PF_W; + AddHdr(PT_GNU_STACK, Perm)->p_memsz = Config->ZStackSize; // PT_OPENBSD_WXNEEDED is a OpenBSD-specific header to mark the executable // is expected to perform W^X violations, such as calling mprotect(2) or @@ -1271,8 +1389,17 @@ template <class ELFT> std::vector<PhdrEntry> Writer<ELFT>::createPhdrs() { if (Config->ZWxneeded) AddHdr(PT_OPENBSD_WXNEEDED, PF_X); - if (Note.First) - Ret.push_back(std::move(Note)); + // Create one PT_NOTE per a group of contiguous .note sections. + PhdrEntry *Note = nullptr; + for (OutputSection *Sec : OutputSections) { + if (Sec->Type == SHT_NOTE) { + if (!Note || Script->hasLMA(Sec->Name)) + Note = AddHdr(PT_NOTE, PF_R); + Note->add(Sec); + } else { + Note = nullptr; + } + } return Ret; } @@ -1282,7 +1409,7 @@ void Writer<ELFT>::addPtArmExid(std::vector<PhdrEntry> &Phdrs) { return; auto I = std::find_if( OutputSections.begin(), OutputSections.end(), - [](OutputSectionBase *Sec) { return Sec->Type == SHT_ARM_EXIDX; }); + [](OutputSection *Sec) { return Sec->Type == SHT_ARM_EXIDX; }); if (I == OutputSections.end()) return; @@ -1311,83 +1438,94 @@ template <class ELFT> void Writer<ELFT>::fixSectionAlignments() { auto I = std::find(OutputSections.begin(), End, P.Last); if (I == End || (I + 1) == End) continue; - OutputSectionBase *Sec = *(I + 1); - if (needsPtLoad<ELFT>(Sec)) + OutputSection *Sec = *(I + 1); + if (needsPtLoad(Sec)) Sec->PageAlign = true; } } -template <class ELFT> -void elf::allocateHeaders(MutableArrayRef<PhdrEntry> Phdrs, - ArrayRef<OutputSectionBase *> OutputSections) { +bool elf::allocateHeaders(std::vector<PhdrEntry> &Phdrs, + ArrayRef<OutputSection *> OutputSections, + uint64_t Min) { auto FirstPTLoad = std::find_if(Phdrs.begin(), Phdrs.end(), [](const PhdrEntry &E) { return E.p_type == PT_LOAD; }); if (FirstPTLoad == Phdrs.end()) - return; + return false; + + uint64_t HeaderSize = getHeaderSize(); + if (HeaderSize > Min) { + auto PhdrI = + std::find_if(Phdrs.begin(), Phdrs.end(), + [](const PhdrEntry &E) { return E.p_type == PT_PHDR; }); + if (PhdrI != Phdrs.end()) + Phdrs.erase(PhdrI); + return false; + } + Min = alignDown(Min - HeaderSize, Config->MaxPageSize); + + if (!Script->Opt.HasSections) + Config->ImageBase = Min = std::min(Min, Config->ImageBase); + + Out::ElfHeader->Addr = Min; + Out::ProgramHeaders->Addr = Min + Out::ElfHeader->Size; + + if (Script->hasPhdrsCommands()) + return true; + if (FirstPTLoad->First) - for (OutputSectionBase *Sec : OutputSections) + for (OutputSection *Sec : OutputSections) if (Sec->FirstInPtLoad == FirstPTLoad->First) - Sec->FirstInPtLoad = Out<ELFT>::ElfHeader; - FirstPTLoad->First = Out<ELFT>::ElfHeader; + Sec->FirstInPtLoad = Out::ElfHeader; + FirstPTLoad->First = Out::ElfHeader; if (!FirstPTLoad->Last) - FirstPTLoad->Last = Out<ELFT>::ProgramHeaders; + FirstPTLoad->Last = Out::ProgramHeaders; + return true; } // We should set file offsets and VAs for elf header and program headers // sections. These are special, we do not include them into output sections // list, but have them to simplify the code. template <class ELFT> void Writer<ELFT>::fixHeaders() { - Out<ELFT>::ProgramHeaders->Size = sizeof(Elf_Phdr) * Phdrs.size(); + Out::ProgramHeaders->Size = sizeof(Elf_Phdr) * Phdrs.size(); // If the script has SECTIONS, assignAddresses will compute the values. - if (ScriptConfig->HasSections) + if (Script->Opt.HasSections) return; - uintX_t HeaderSize = getHeaderSize<ELFT>(); // When -T<section> option is specified, lower the base to make room for those // sections. - if (!Config->SectionStartMap.empty()) { - uint64_t Min = -1; + uint64_t Min = -1; + if (!Config->SectionStartMap.empty()) for (const auto &P : Config->SectionStartMap) Min = std::min(Min, P.second); - if (HeaderSize < Min) - Min -= HeaderSize; - else - AllocateHeader = false; - if (Min < Config->ImageBase) - Config->ImageBase = alignDown(Min, Config->MaxPageSize); - } - if (AllocateHeader) - allocateHeaders<ELFT>(Phdrs, OutputSections); - - uintX_t BaseVA = Config->ImageBase; - Out<ELFT>::ElfHeader->Addr = BaseVA; - Out<ELFT>::ProgramHeaders->Addr = BaseVA + Out<ELFT>::ElfHeader->Size; + AllocateHeader = allocateHeaders(Phdrs, OutputSections, Min); } // Assign VAs (addresses at run-time) to output sections. template <class ELFT> void Writer<ELFT>::assignAddresses() { - uintX_t VA = Config->ImageBase; + uint64_t VA = Config->ImageBase; + uint64_t ThreadBssOffset = 0; + if (AllocateHeader) - VA += getHeaderSize<ELFT>(); - uintX_t ThreadBssOffset = 0; - for (OutputSectionBase *Sec : OutputSections) { - uintX_t Alignment = Sec->Addralign; + VA += getHeaderSize(); + + for (OutputSection *Sec : OutputSections) { + uint32_t Alignment = Sec->Alignment; if (Sec->PageAlign) - Alignment = std::max<uintX_t>(Alignment, Config->MaxPageSize); + Alignment = std::max<uint32_t>(Alignment, Config->MaxPageSize); - auto I = Config->SectionStartMap.find(Sec->getName()); + auto I = Config->SectionStartMap.find(Sec->Name); if (I != Config->SectionStartMap.end()) VA = I->second; // We only assign VAs to allocated sections. - if (needsPtLoad<ELFT>(Sec)) { + if (needsPtLoad(Sec)) { VA = alignTo(VA, Alignment); Sec->Addr = VA; VA += Sec->Size; } else if (Sec->Flags & SHF_TLS && Sec->Type == SHT_NOBITS) { - uintX_t TVA = VA + ThreadBssOffset; + uint64_t TVA = VA + ThreadBssOffset; TVA = alignTo(TVA, Alignment); Sec->Addr = TVA; ThreadBssOffset = TVA - VA + Sec->Size; @@ -1399,12 +1537,11 @@ template <class ELFT> void Writer<ELFT>::assignAddresses() { // its new file offset. The file offset must be the same with its // virtual address (modulo the page size) so that the loader can load // executables without any address adjustment. -template <class ELFT, class uintX_t> -static uintX_t getFileAlignment(uintX_t Off, OutputSectionBase *Sec) { - OutputSectionBase *First = Sec->FirstInPtLoad; +static uint64_t getFileAlignment(uint64_t Off, OutputSection *Sec) { + OutputSection *First = Sec->FirstInPtLoad; // If the section is not in a PT_LOAD, we just have to align it. if (!First) - return alignTo(Off, Sec->Addralign); + return alignTo(Off, Sec->Alignment); // The first section in a PT_LOAD has to have congruent offset and address // module the page size. @@ -1416,36 +1553,35 @@ static uintX_t getFileAlignment(uintX_t Off, OutputSectionBase *Sec) { return First->Offset + Sec->Addr - First->Addr; } -template <class ELFT, class uintX_t> -void setOffset(OutputSectionBase *Sec, uintX_t &Off) { +static uint64_t setOffset(OutputSection *Sec, uint64_t Off) { if (Sec->Type == SHT_NOBITS) { Sec->Offset = Off; - return; + return Off; } - Off = getFileAlignment<ELFT>(Off, Sec); + Off = getFileAlignment(Off, Sec); Sec->Offset = Off; - Off += Sec->Size; + return Off + Sec->Size; } template <class ELFT> void Writer<ELFT>::assignFileOffsetsBinary() { - uintX_t Off = 0; - for (OutputSectionBase *Sec : OutputSections) + uint64_t Off = 0; + for (OutputSection *Sec : OutputSections) if (Sec->Flags & SHF_ALLOC) - setOffset<ELFT>(Sec, Off); - FileSize = alignTo(Off, sizeof(uintX_t)); + Off = setOffset(Sec, Off); + FileSize = alignTo(Off, Config->Wordsize); } // Assign file offsets to output sections. template <class ELFT> void Writer<ELFT>::assignFileOffsets() { - uintX_t Off = 0; - setOffset<ELFT>(Out<ELFT>::ElfHeader, Off); - setOffset<ELFT>(Out<ELFT>::ProgramHeaders, Off); + uint64_t Off = 0; + Off = setOffset(Out::ElfHeader, Off); + Off = setOffset(Out::ProgramHeaders, Off); - for (OutputSectionBase *Sec : OutputSections) - setOffset<ELFT>(Sec, Off); + for (OutputSection *Sec : OutputSections) + Off = setOffset(Sec, Off); - SectionHeaderOff = alignTo(Off, sizeof(uintX_t)); + SectionHeaderOff = alignTo(Off, Config->Wordsize); FileSize = SectionHeaderOff + (OutputSections.size() + 1) * sizeof(Elf_Shdr); } @@ -1453,8 +1589,8 @@ template <class ELFT> void Writer<ELFT>::assignFileOffsets() { // file offsets and VAs to all sections. template <class ELFT> void Writer<ELFT>::setPhdrs() { for (PhdrEntry &P : Phdrs) { - OutputSectionBase *First = P.First; - OutputSectionBase *Last = P.Last; + OutputSection *First = P.First; + OutputSection *Last = P.Last; if (First) { P.p_filesz = Last->Offset - First->Offset; if (Last->Type != SHT_NOBITS) @@ -1478,7 +1614,7 @@ template <class ELFT> void Writer<ELFT>::setPhdrs() { // The TLS pointer goes after PT_TLS. At least glibc will align it, // so round up the size to make sure the offsets are correct. if (P.p_type == PT_TLS) { - Out<ELFT>::TlsPhdr = &P; + Out::TlsPhdr = &P; if (P.p_memsz) P.p_memsz = alignTo(P.p_memsz, P.p_align); } @@ -1492,17 +1628,17 @@ template <class ELFT> void Writer<ELFT>::setPhdrs() { // 3. the value of the symbol start, if present; // 4. the address of the first byte of the .text section, if present; // 5. the address 0. -template <class ELFT> typename ELFT::uint Writer<ELFT>::getEntryAddr() { +template <class ELFT> uint64_t Writer<ELFT>::getEntryAddr() { // Case 1, 2 or 3. As a special case, if the symbol is actually // a number, we'll use that number as an address. if (SymbolBody *B = Symtab<ELFT>::X->find(Config->Entry)) - return B->getVA<ELFT>(); + return B->getVA(); uint64_t Addr; if (!Config->Entry.getAsInteger(0, Addr)) return Addr; // Case 4 - if (OutputSectionBase *Sec = findSection(".text")) { + if (OutputSection *Sec = findSection(".text")) { if (Config->WarnMissingEntry) warn("cannot find entry symbol " + Config->Entry + "; defaulting to 0x" + utohexstr(Sec->Addr)); @@ -1516,12 +1652,6 @@ template <class ELFT> typename ELFT::uint Writer<ELFT>::getEntryAddr() { return 0; } -template <class ELFT> static uint8_t getELFEncoding() { - if (ELFT::TargetEndianness == llvm::support::little) - return ELFDATA2LSB; - return ELFDATA2MSB; -} - static uint16_t getELFType() { if (Config->Pic) return ET_DYN; @@ -1531,52 +1661,59 @@ static uint16_t getELFType() { } // This function is called after we have assigned address and size -// to each section. This function fixes some predefined absolute +// to each section. This function fixes some predefined // symbol values that depend on section address and size. -template <class ELFT> void Writer<ELFT>::fixAbsoluteSymbols() { - // __ehdr_start is the location of program headers. - if (ElfSym<ELFT>::EhdrStart) - ElfSym<ELFT>::EhdrStart->Value = Out<ELFT>::ProgramHeaders->Addr; - - auto Set = [](DefinedRegular<ELFT> *S1, DefinedRegular<ELFT> *S2, uintX_t V) { - if (S1) - S1->Value = V; - if (S2) - S2->Value = V; +template <class ELFT> void Writer<ELFT>::fixPredefinedSymbols() { + auto Set = [](DefinedRegular *S1, DefinedRegular *S2, OutputSection *Sec, + uint64_t Value) { + if (S1) { + S1->Section = Sec; + S1->Value = Value; + } + if (S2) { + S2->Section = Sec; + S2->Value = Value; + } }; // _etext is the first location after the last read-only loadable segment. // _edata is the first location after the last read-write loadable segment. // _end is the first location after the uninitialized data region. + PhdrEntry *Last = nullptr; + PhdrEntry *LastRO = nullptr; + PhdrEntry *LastRW = nullptr; for (PhdrEntry &P : Phdrs) { if (P.p_type != PT_LOAD) continue; - Set(ElfSym<ELFT>::End, ElfSym<ELFT>::End2, P.p_vaddr + P.p_memsz); - - uintX_t Val = P.p_vaddr + P.p_filesz; + Last = &P; if (P.p_flags & PF_W) - Set(ElfSym<ELFT>::Edata, ElfSym<ELFT>::Edata2, Val); + LastRW = &P; else - Set(ElfSym<ELFT>::Etext, ElfSym<ELFT>::Etext2, Val); + LastRO = &P; } + if (Last) + Set(ElfSym::End1, ElfSym::End2, Last->First, Last->p_memsz); + if (LastRO) + Set(ElfSym::Etext1, ElfSym::Etext2, LastRO->First, LastRO->p_filesz); + if (LastRW) + Set(ElfSym::Edata1, ElfSym::Edata2, LastRW->First, LastRW->p_filesz); + + if (ElfSym::Bss) + ElfSym::Bss->Section = findSection(".bss"); // Setup MIPS _gp_disp/__gnu_local_gp symbols which should // be equal to the _gp symbol's value. if (Config->EMachine == EM_MIPS) { - if (!ElfSym<ELFT>::MipsGp->Value) { + if (!ElfSym::MipsGp->Value) { // Find GP-relative section with the lowest address // and use this address to calculate default _gp value. - uintX_t Gp = -1; - for (const OutputSectionBase * OS : OutputSections) + uint64_t Gp = -1; + for (const OutputSection *OS : OutputSections) if ((OS->Flags & SHF_MIPS_GPREL) && OS->Addr < Gp) Gp = OS->Addr; - if (Gp != (uintX_t)-1) - ElfSym<ELFT>::MipsGp->Value = Gp + 0x7ff0; + if (Gp != (uint64_t)-1) + ElfSym::MipsGp->Value = Gp + 0x7ff0; } - if (ElfSym<ELFT>::MipsGpDisp) - ElfSym<ELFT>::MipsGpDisp->Value = ElfSym<ELFT>::MipsGp->Value; - if (ElfSym<ELFT>::MipsLocalGp) - ElfSym<ELFT>::MipsLocalGp->Value = ElfSym<ELFT>::MipsGp->Value; } } @@ -1586,8 +1723,8 @@ template <class ELFT> void Writer<ELFT>::writeHeader() { // Write the ELF header. auto *EHdr = reinterpret_cast<Elf_Ehdr *>(Buf); - EHdr->e_ident[EI_CLASS] = ELFT::Is64Bits ? ELFCLASS64 : ELFCLASS32; - EHdr->e_ident[EI_DATA] = getELFEncoding<ELFT>(); + EHdr->e_ident[EI_CLASS] = Config->Is64 ? ELFCLASS64 : ELFCLASS32; + EHdr->e_ident[EI_DATA] = Config->IsLE ? ELFDATA2LSB : ELFDATA2MSB; EHdr->e_ident[EI_VERSION] = EV_CURRENT; EHdr->e_ident[EI_OSABI] = Config->OSABI; EHdr->e_type = getELFType(); @@ -1630,63 +1767,33 @@ template <class ELFT> void Writer<ELFT>::writeHeader() { // Write the section header table. Note that the first table entry is null. auto *SHdrs = reinterpret_cast<Elf_Shdr *>(Buf + EHdr->e_shoff); - for (OutputSectionBase *Sec : OutputSections) + for (OutputSection *Sec : OutputSections) Sec->writeHeaderTo<ELFT>(++SHdrs); } -// Removes a given file asynchronously. This is a performance hack, -// so remove this when operating systems are improved. -// -// On Linux (and probably on other Unix-like systems), unlink(2) is a -// noticeably slow system call. As of 2016, unlink takes 250 -// milliseconds to remove a 1 GB file on ext4 filesystem on my machine. -// -// To create a new result file, we first remove existing file. So, if -// you repeatedly link a 1 GB program in a regular compile-link-debug -// cycle, every cycle wastes 250 milliseconds only to remove a file. -// Since LLD can link a 1 GB binary in about 5 seconds, that waste -// actually counts. -// -// This function spawns a background thread to call unlink. -// The calling thread returns almost immediately. -static void unlinkAsync(StringRef Path) { - if (!Config->Threads || !sys::fs::exists(Config->OutputFile)) - return; - - // First, rename Path to avoid race condition. We cannot remove - // Path from a different thread because we are now going to create - // Path as a new file. If we do that in a different thread, the new - // thread can remove the new file. - SmallString<128> TempPath; - if (sys::fs::createUniqueFile(Path + "tmp%%%%%%%%", TempPath)) - return; - if (sys::fs::rename(Path, TempPath)) { - sys::fs::remove(TempPath); +// Open a result file. +template <class ELFT> void Writer<ELFT>::openFile() { + if (!Config->Is64 && FileSize > UINT32_MAX) { + error("output file too large: " + Twine(FileSize) + " bytes"); return; } - // Remove TempPath in background. - std::thread([=] { ::remove(TempPath.str().str().c_str()); }).detach(); -} - -// Open a result file. -template <class ELFT> void Writer<ELFT>::openFile() { unlinkAsync(Config->OutputFile); ErrorOr<std::unique_ptr<FileOutputBuffer>> BufferOrErr = FileOutputBuffer::create(Config->OutputFile, FileSize, FileOutputBuffer::F_executable); if (auto EC = BufferOrErr.getError()) - error(EC, "failed to open " + Config->OutputFile); + error("failed to open " + Config->OutputFile + ": " + EC.message()); else Buffer = std::move(*BufferOrErr); } template <class ELFT> void Writer<ELFT>::writeSectionsBinary() { uint8_t *Buf = Buffer->getBufferStart(); - for (OutputSectionBase *Sec : OutputSections) + for (OutputSection *Sec : OutputSections) if (Sec->Flags & SHF_ALLOC) - Sec->writeTo(Buf + Sec->Offset); + Sec->writeTo<ELFT>(Buf + Sec->Offset); } // Write section contents to a mmap'ed file. @@ -1695,22 +1802,31 @@ template <class ELFT> void Writer<ELFT>::writeSections() { // PPC64 needs to process relocations in the .opd section // before processing relocations in code-containing sections. - Out<ELFT>::Opd = findSection(".opd"); - if (Out<ELFT>::Opd) { - Out<ELFT>::OpdBuf = Buf + Out<ELFT>::Opd->Offset; - Out<ELFT>::Opd->writeTo(Buf + Out<ELFT>::Opd->Offset); + Out::Opd = findSection(".opd"); + if (Out::Opd) { + Out::OpdBuf = Buf + Out::Opd->Offset; + Out::Opd->template writeTo<ELFT>(Buf + Out::Opd->Offset); } - OutputSectionBase *EhFrameHdr = + OutputSection *EhFrameHdr = In<ELFT>::EhFrameHdr ? In<ELFT>::EhFrameHdr->OutSec : nullptr; - for (OutputSectionBase *Sec : OutputSections) - if (Sec != Out<ELFT>::Opd && Sec != EhFrameHdr) - Sec->writeTo(Buf + Sec->Offset); + + // In -r or -emit-relocs mode, write the relocation sections first as in + // ELf_Rel targets we might find out that we need to modify the relocated + // section while doing it. + for (OutputSection *Sec : OutputSections) + if (Sec->Type == SHT_REL || Sec->Type == SHT_RELA) + Sec->writeTo<ELFT>(Buf + Sec->Offset); + + for (OutputSection *Sec : OutputSections) + if (Sec != Out::Opd && Sec != EhFrameHdr && Sec->Type != SHT_REL && + Sec->Type != SHT_RELA) + Sec->writeTo<ELFT>(Buf + Sec->Offset); // The .eh_frame_hdr depends on .eh_frame section contents, therefore // it should be written after .eh_frame is written. - if (!Out<ELFT>::EhFrame->empty() && EhFrameHdr) - EhFrameHdr->writeTo(Buf + EhFrameHdr->Offset); + if (EhFrameHdr && !EhFrameHdr->Sections.empty()) + EhFrameHdr->writeTo<ELFT>(Buf + EhFrameHdr->Offset); } template <class ELFT> void Writer<ELFT>::writeBuildId() { @@ -1728,21 +1844,7 @@ template void elf::writeResult<ELF32BE>(); template void elf::writeResult<ELF64LE>(); template void elf::writeResult<ELF64BE>(); -template void elf::allocateHeaders<ELF32LE>(MutableArrayRef<PhdrEntry>, - ArrayRef<OutputSectionBase *>); -template void elf::allocateHeaders<ELF32BE>(MutableArrayRef<PhdrEntry>, - ArrayRef<OutputSectionBase *>); -template void elf::allocateHeaders<ELF64LE>(MutableArrayRef<PhdrEntry>, - ArrayRef<OutputSectionBase *>); -template void elf::allocateHeaders<ELF64BE>(MutableArrayRef<PhdrEntry>, - ArrayRef<OutputSectionBase *>); - -template bool elf::isRelroSection<ELF32LE>(const OutputSectionBase *); -template bool elf::isRelroSection<ELF32BE>(const OutputSectionBase *); -template bool elf::isRelroSection<ELF64LE>(const OutputSectionBase *); -template bool elf::isRelroSection<ELF64BE>(const OutputSectionBase *); - -template void elf::reportDiscarded<ELF32LE>(InputSectionBase<ELF32LE> *); -template void elf::reportDiscarded<ELF32BE>(InputSectionBase<ELF32BE> *); -template void elf::reportDiscarded<ELF64LE>(InputSectionBase<ELF64LE> *); -template void elf::reportDiscarded<ELF64BE>(InputSectionBase<ELF64BE> *); +template bool elf::isRelroSection<ELF32LE>(const OutputSection *); +template bool elf::isRelroSection<ELF32BE>(const OutputSection *); +template bool elf::isRelroSection<ELF64LE>(const OutputSection *); +template bool elf::isRelroSection<ELF64BE>(const OutputSection *); diff --git a/ELF/Writer.h b/ELF/Writer.h index 718e3139a809..a669e42ef205 100644 --- a/ELF/Writer.h +++ b/ELF/Writer.h @@ -18,41 +18,39 @@ namespace lld { namespace elf { class InputFile; -class OutputSectionBase; -template <class ELFT> class InputSectionBase; +class OutputSection; +class InputSectionBase; template <class ELFT> class ObjectFile; template <class ELFT> class SymbolTable; template <class ELFT> void writeResult(); template <class ELFT> void markLive(); -template <class ELFT> bool isRelroSection(const OutputSectionBase *Sec); +template <class ELFT> bool isRelroSection(const OutputSection *Sec); // This describes a program header entry. // Each contains type, access flags and range of output sections that will be // placed in it. struct PhdrEntry { PhdrEntry(unsigned Type, unsigned Flags); - void add(OutputSectionBase *Sec); + void add(OutputSection *Sec); uint64_t p_paddr = 0; uint64_t p_vaddr = 0; - uint64_t p_align = 0; uint64_t p_memsz = 0; uint64_t p_filesz = 0; uint64_t p_offset = 0; + uint32_t p_align = 0; uint32_t p_type = 0; uint32_t p_flags = 0; - OutputSectionBase *First = nullptr; - OutputSectionBase *Last = nullptr; + OutputSection *First = nullptr; + OutputSection *Last = nullptr; bool HasLMA = false; }; llvm::StringRef getOutputSectionName(llvm::StringRef Name); -template <class ELFT> -void allocateHeaders(llvm::MutableArrayRef<PhdrEntry>, - llvm::ArrayRef<OutputSectionBase *>); -template <class ELFT> void reportDiscarded(InputSectionBase<ELFT> *IS); +bool allocateHeaders(std::vector<PhdrEntry> &, llvm::ArrayRef<OutputSection *>, + uint64_t Min); template <class ELFT> uint32_t getMipsEFlags(); |