diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2022-07-14 18:50:02 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2022-07-14 18:50:02 +0000 |
commit | 1f917f69ff07f09b6dbb670971f57f8efe718b84 (patch) | |
tree | 99293cbc1411737cd995dac10a99b2c40ef0944c /lld | |
parent | 145449b1e420787bb99721a429341fa6be3adfb6 (diff) | |
download | src-1f917f69ff07f09b6dbb670971f57f8efe718b84.tar.gz src-1f917f69ff07f09b6dbb670971f57f8efe718b84.zip |
Vendor import of llvm-project main llvmorg-15-init-16436-g18a6ab5b8d1f.vendor/llvm-project/llvmorg-15-init-16436-g18a6ab5b8d1f
Diffstat (limited to 'lld')
32 files changed, 816 insertions, 155 deletions
diff --git a/lld/COFF/Config.h b/lld/COFF/Config.h index 8edb545cd653..dd089f5ab671 100644 --- a/lld/COFF/Config.h +++ b/lld/COFF/Config.h @@ -15,6 +15,7 @@ #include "llvm/ADT/StringRef.h" #include "llvm/Object/COFF.h" #include "llvm/Support/CachePruning.h" +#include "llvm/Support/VirtualFileSystem.h" #include <cstdint> #include <map> #include <set> @@ -238,6 +239,9 @@ struct Configuration { // Used for /print-symbol-order: StringRef printSymbolOrder; + // Used for /vfsoverlay: + std::unique_ptr<llvm::vfs::FileSystem> vfs; + uint64_t align = 4096; uint64_t imageBase = -1; uint64_t fileAlign = 512; diff --git a/lld/COFF/Driver.cpp b/lld/COFF/Driver.cpp index ffa900d42f2d..155e4ca6ee3f 100644 --- a/lld/COFF/Driver.cpp +++ b/lld/COFF/Driver.cpp @@ -436,17 +436,26 @@ void LinkerDriver::parseDirectives(InputFile *file) { // Find file from search paths. You can omit ".obj", this function takes // care of that. Note that the returned path is not guaranteed to exist. StringRef LinkerDriver::doFindFile(StringRef filename) { + auto getFilename = [](StringRef filename) -> StringRef { + if (config->vfs) + if (auto statOrErr = config->vfs->status(filename)) + return saver().save(statOrErr->getName()); + return filename; + }; + bool hasPathSep = (filename.find_first_of("/\\") != StringRef::npos); if (hasPathSep) - return filename; + return getFilename(filename); bool hasExt = filename.contains('.'); for (StringRef dir : searchPaths) { SmallString<128> path = dir; sys::path::append(path, filename); + path = SmallString<128>{getFilename(path.str())}; if (sys::fs::exists(path.str())) return saver().save(path.str()); if (!hasExt) { path.append(".obj"); + path = SmallString<128>{getFilename(path.str())}; if (sys::fs::exists(path.str())) return saver().save(path.str()); } @@ -1349,6 +1358,28 @@ Optional<std::string> getReproduceFile(const opt::InputArgList &args) { return None; } +static std::unique_ptr<llvm::vfs::FileSystem> +getVFS(const opt::InputArgList &args) { + using namespace llvm::vfs; + + const opt::Arg *arg = args.getLastArg(OPT_vfsoverlay); + if (!arg) + return nullptr; + + auto bufOrErr = llvm::MemoryBuffer::getFile(arg->getValue()); + if (!bufOrErr) { + checkError(errorCodeToError(bufOrErr.getError())); + return nullptr; + } + + if (auto ret = vfs::getVFSFromYAML(std::move(*bufOrErr), /*DiagHandler*/ nullptr, + arg->getValue())) + return ret; + + error("Invalid vfs overlay"); + return nullptr; +} + void LinkerDriver::linkerMain(ArrayRef<const char *> argsArr) { ScopedTimer rootTimer(ctx.rootTimer); @@ -1390,6 +1421,8 @@ void LinkerDriver::linkerMain(ArrayRef<const char *> argsArr) { errorHandler().errorLimit = n; } + config->vfs = getVFS(args); + // Handle /help if (args.hasArg(OPT_help)) { printHelp(argsArr[0]); diff --git a/lld/COFF/Options.td b/lld/COFF/Options.td index 9f29ea0d523a..5135f4ea34af 100644 --- a/lld/COFF/Options.td +++ b/lld/COFF/Options.td @@ -278,6 +278,8 @@ def print_symbol_order: P< "/call-graph-profile-sort into the specified file">; def wrap : P_priv<"wrap">; +def vfsoverlay : P<"vfsoverlay", "Path to a vfsoverlay yaml file to optionally look for /defaultlib's in">; + // Flags for debugging def lldmap : F<"lldmap">; def lldmap_file : P_priv<"lldmap">; diff --git a/lld/ELF/Arch/RISCV.cpp b/lld/ELF/Arch/RISCV.cpp index 7ba0214eb2a7..c09bb2e60786 100644 --- a/lld/ELF/Arch/RISCV.cpp +++ b/lld/ELF/Arch/RISCV.cpp @@ -7,9 +7,11 @@ //===----------------------------------------------------------------------===// #include "InputFiles.h" +#include "OutputSections.h" #include "Symbols.h" #include "SyntheticSections.h" #include "Target.h" +#include "llvm/Support/TimeProfiler.h" using namespace llvm; using namespace llvm::object; @@ -36,6 +38,7 @@ public: const uint8_t *loc) const override; void relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const override; + bool relaxOnce(int pass) const override; }; } // end anonymous namespace @@ -267,16 +270,12 @@ RelExpr RISCV::getRelExpr(const RelType type, const Symbol &s, case R_RISCV_TPREL_LO12_I: case R_RISCV_TPREL_LO12_S: return R_TPREL; - case R_RISCV_RELAX: case R_RISCV_TPREL_ADD: return R_NONE; case R_RISCV_ALIGN: - // Not just a hint; always padded to the worst-case number of NOPs, so may - // not currently be aligned, and without linker relaxation support we can't - // delete NOPs to realign. - errorOrWarn(getErrorLocation(loc) + "relocation R_RISCV_ALIGN requires " - "unimplemented linker relaxation; recompile with -mno-relax"); - return R_NONE; + return R_RELAX_HINT; + case R_RISCV_RELAX: + return config->relax ? R_RELAX_HINT : R_NONE; default: error(getErrorLocation(loc) + "unknown relocation (" + Twine(type) + ") against symbol " + toString(s)); @@ -301,7 +300,7 @@ void RISCV::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const { return; case R_RISCV_RVC_BRANCH: { - checkInt(loc, static_cast<int64_t>(val) >> 1, 8, rel); + checkInt(loc, val, 9, rel); checkAlignment(loc, val, 2, rel); uint16_t insn = read16le(loc) & 0xE383; uint16_t imm8 = extractBits(val, 8, 8) << 12; @@ -316,7 +315,7 @@ void RISCV::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const { } case R_RISCV_RVC_JUMP: { - checkInt(loc, static_cast<int64_t>(val) >> 1, 11, rel); + checkInt(loc, val, 12, rel); checkAlignment(loc, val, 2, rel); uint16_t insn = read16le(loc) & 0xE003; uint16_t imm11 = extractBits(val, 11, 11) << 12; @@ -347,7 +346,7 @@ void RISCV::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const { } case R_RISCV_JAL: { - checkInt(loc, static_cast<int64_t>(val) >> 1, 20, rel); + checkInt(loc, val, 21, rel); checkAlignment(loc, val, 2, rel); uint32_t insn = read32le(loc) & 0xFFF; @@ -362,7 +361,7 @@ void RISCV::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const { } case R_RISCV_BRANCH: { - checkInt(loc, static_cast<int64_t>(val) >> 1, 12, rel); + checkInt(loc, val, 13, rel); checkAlignment(loc, val, 2, rel); uint32_t insn = read32le(loc) & 0x1FFF07F; @@ -476,6 +475,291 @@ void RISCV::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const { } } +namespace { +struct SymbolAnchor { + uint64_t offset; + Defined *d; + bool end; // true for the anchor of st_value+st_size +}; +} // namespace + +struct elf::RISCVRelaxAux { + // This records symbol start and end offsets which will be adjusted according + // to the nearest relocDeltas element. + SmallVector<SymbolAnchor, 0> anchors; + // For relocations[i], the actual offset is r_offset - (i ? relocDeltas[i-1] : + // 0). + std::unique_ptr<uint32_t[]> relocDeltas; + // For relocations[i], the actual type is relocTypes[i]. + std::unique_ptr<RelType[]> relocTypes; + SmallVector<uint32_t, 0> writes; +}; + +static void initSymbolAnchors() { + SmallVector<InputSection *, 0> storage; + for (OutputSection *osec : outputSections) { + if (!(osec->flags & SHF_EXECINSTR)) + continue; + for (InputSection *sec : getInputSections(*osec, storage)) { + sec->relaxAux = make<RISCVRelaxAux>(); + if (sec->relocations.size()) { + sec->relaxAux->relocDeltas = + std::make_unique<uint32_t[]>(sec->relocations.size()); + sec->relaxAux->relocTypes = + std::make_unique<RelType[]>(sec->relocations.size()); + } + } + } + // Store anchors (st_value and st_value+st_size) for symbols relative to text + // sections. + for (InputFile *file : ctx->objectFiles) + for (Symbol *sym : file->getSymbols()) { + auto *d = dyn_cast<Defined>(sym); + if (!d || d->file != file) + continue; + if (auto *sec = dyn_cast_or_null<InputSection>(d->section)) + if (sec->flags & SHF_EXECINSTR && sec->relaxAux) { + // If sec is discarded, relaxAux will be nullptr. + sec->relaxAux->anchors.push_back({d->value, d, false}); + sec->relaxAux->anchors.push_back({d->value + d->size, d, true}); + } + } + // Sort anchors by offset so that we can find the closest relocation + // efficiently. For a zero size symbol, ensure that its start anchor precedes + // its end anchor. For two symbols with anchors at the same offset, their + // order does not matter. + for (OutputSection *osec : outputSections) { + if (!(osec->flags & SHF_EXECINSTR)) + continue; + for (InputSection *sec : getInputSections(*osec, storage)) { + llvm::sort(sec->relaxAux->anchors, [](auto &a, auto &b) { + return std::make_pair(a.offset, a.end) < + std::make_pair(b.offset, b.end); + }); + } + } +} + +// Relax R_RISCV_CALL/R_RISCV_CALL_PLT auipc+jalr to c.j, c.jal, or jal. +static void relaxCall(const InputSection &sec, size_t i, uint64_t loc, + Relocation &r, uint32_t &remove) { + const bool rvc = config->eflags & EF_RISCV_RVC; + const Symbol &sym = *r.sym; + const uint64_t insnPair = read64le(sec.rawData.data() + r.offset); + const uint32_t rd = extractBits(insnPair, 32 + 11, 32 + 7); + const uint64_t dest = + (r.expr == R_PLT_PC ? sym.getPltVA() : sym.getVA()) + r.addend; + const int64_t displace = dest - loc; + + if (rvc && isInt<12>(displace) && rd == 0) { + sec.relaxAux->relocTypes[i] = R_RISCV_RVC_JUMP; + sec.relaxAux->writes.push_back(0xa001); // c.j + remove = 6; + } else if (rvc && isInt<12>(displace) && rd == X_RA && + !config->is64) { // RV32C only + sec.relaxAux->relocTypes[i] = R_RISCV_RVC_JUMP; + sec.relaxAux->writes.push_back(0x2001); // c.jal + remove = 6; + } else if (isInt<21>(displace)) { + sec.relaxAux->relocTypes[i] = R_RISCV_JAL; + sec.relaxAux->writes.push_back(0x6f | rd << 7); // jal + remove = 4; + } +} + +static bool relax(InputSection &sec) { + const uint64_t secAddr = sec.getVA(); + auto &aux = *sec.relaxAux; + bool changed = false; + + // Get st_value delta for symbols relative to this section from the previous + // iteration. + DenseMap<const Defined *, uint64_t> valueDelta; + ArrayRef<SymbolAnchor> sa = makeArrayRef(aux.anchors); + uint32_t delta = 0; + for (auto it : llvm::enumerate(sec.relocations)) { + for (; sa.size() && sa[0].offset <= it.value().offset; sa = sa.slice(1)) + if (!sa[0].end) + valueDelta[sa[0].d] = delta; + delta = aux.relocDeltas[it.index()]; + } + for (const SymbolAnchor &sa : sa) + if (!sa.end) + valueDelta[sa.d] = delta; + sa = makeArrayRef(aux.anchors); + delta = 0; + + std::fill_n(aux.relocTypes.get(), sec.relocations.size(), R_RISCV_NONE); + aux.writes.clear(); + for (auto it : llvm::enumerate(sec.relocations)) { + Relocation &r = it.value(); + const size_t i = it.index(); + const uint64_t loc = secAddr + r.offset - delta; + uint32_t &cur = aux.relocDeltas[i], remove = 0; + switch (r.type) { + case R_RISCV_ALIGN: { + const uint64_t nextLoc = loc + r.addend; + const uint64_t align = PowerOf2Ceil(r.addend + 2); + // All bytes beyond the alignment boundary should be removed. + remove = nextLoc - ((loc + align - 1) & -align); + assert(static_cast<int32_t>(remove) >= 0 && + "R_RISCV_ALIGN needs expanding the content"); + break; + } + case R_RISCV_CALL: + case R_RISCV_CALL_PLT: + if (i + 1 != sec.relocations.size() && + sec.relocations[i + 1].type == R_RISCV_RELAX) + relaxCall(sec, i, loc, r, remove); + break; + } + + // For all anchors whose offsets are <= r.offset, they are preceded by + // the previous relocation whose `relocDeltas` value equals `delta`. + // Decrease their st_value and update their st_size. + for (; sa.size() && sa[0].offset <= r.offset; sa = sa.slice(1)) { + if (sa[0].end) + sa[0].d->size = sa[0].offset - delta - sa[0].d->value; + else + sa[0].d->value -= delta - valueDelta.find(sa[0].d)->second; + } + delta += remove; + if (delta != cur) { + cur = delta; + changed = true; + } + } + + for (const SymbolAnchor &a : sa) { + if (a.end) + a.d->size = a.offset - delta - a.d->value; + else + a.d->value -= delta - valueDelta.find(a.d)->second; + } + // Inform assignAddresses that the size has changed. + if (!isUInt<16>(delta)) + fatal("section size decrease is too large"); + sec.bytesDropped = delta; + return changed; +} + +// When relaxing just R_RISCV_ALIGN, relocDeltas is usually changed only once in +// the absence of a linker script. For call and load/store R_RISCV_RELAX, code +// shrinkage may reduce displacement and make more relocations eligible for +// relaxation. Code shrinkage may increase displacement to a call/load/store +// target at a higher fixed address, invalidating an earlier relaxation. Any +// change in section sizes can have cascading effect and require another +// relaxation pass. +bool RISCV::relaxOnce(int pass) const { + llvm::TimeTraceScope timeScope("RISC-V relaxOnce"); + if (config->relocatable) + return false; + + if (pass == 0) + initSymbolAnchors(); + + SmallVector<InputSection *, 0> storage; + bool changed = false; + for (OutputSection *osec : outputSections) { + if (!(osec->flags & SHF_EXECINSTR)) + continue; + for (InputSection *sec : getInputSections(*osec, storage)) + changed |= relax(*sec); + } + return changed; +} + +void elf::riscvFinalizeRelax(int passes) { + llvm::TimeTraceScope timeScope("Finalize RISC-V relaxation"); + log("relaxation passes: " + Twine(passes)); + SmallVector<InputSection *, 0> storage; + for (OutputSection *osec : outputSections) { + if (!(osec->flags & SHF_EXECINSTR)) + continue; + for (InputSection *sec : getInputSections(*osec, storage)) { + RISCVRelaxAux &aux = *sec->relaxAux; + if (!aux.relocDeltas) + continue; + + auto &rels = sec->relocations; + ArrayRef<uint8_t> old = sec->rawData; + size_t newSize = + old.size() - aux.relocDeltas[sec->relocations.size() - 1]; + size_t writesIdx = 0; + uint8_t *p = context().bAlloc.Allocate<uint8_t>(newSize); + uint64_t offset = 0; + int64_t delta = 0; + sec->rawData = makeArrayRef(p, newSize); + sec->bytesDropped = 0; + + // Update section content: remove NOPs for R_RISCV_ALIGN and rewrite + // instructions for relaxed relocations. + for (size_t i = 0, e = rels.size(); i != e; ++i) { + uint32_t remove = aux.relocDeltas[i] - delta; + delta = aux.relocDeltas[i]; + if (remove == 0) + continue; + + // Copy from last location to the current relocated location. + const Relocation &r = rels[i]; + uint64_t size = r.offset - offset; + memcpy(p, old.data() + offset, size); + p += size; + + // For R_RISCV_ALIGN, we will place `offset` in a location (among NOPs) + // to satisfy the alignment requirement. If `remove` is a multiple of 4, + // it is as if we have skipped some NOPs. Otherwise we are in the middle + // of a 4-byte NOP, and we need to rewrite the NOP sequence. + int64_t skip = 0; + if (r.type == R_RISCV_ALIGN) { + if (remove % 4 != 0) { + skip = r.addend - remove; + int64_t j = 0; + for (; j + 4 <= skip; j += 4) + write32le(p + j, 0x00000013); // nop + if (j != skip) { + assert(j + 2 == skip); + write16le(p + j, 0x0001); // c.nop + } + } + } else if (RelType newType = aux.relocTypes[i]) { + const uint32_t insn = aux.writes[writesIdx++]; + switch (newType) { + case R_RISCV_RVC_JUMP: + skip = 2; + write16le(p, insn); + break; + case R_RISCV_JAL: + skip = 4; + write32le(p, insn); + break; + default: + llvm_unreachable("unsupported type"); + } + } + + p += skip; + offset = r.offset + skip + remove; + } + memcpy(p, old.data() + offset, old.size() - offset); + + // Subtract the previous relocDeltas value from the relocation offset. + // For a pair of R_RISCV_CALL/R_RISCV_RELAX with the same offset, decrease + // their r_offset by the same delta. + delta = 0; + for (size_t i = 0, e = rels.size(); i != e;) { + uint64_t cur = rels[i].offset; + do { + rels[i].offset -= delta; + if (aux.relocTypes[i] != R_RISCV_NONE) + rels[i].type = aux.relocTypes[i]; + } while (++i != e && rels[i].offset == cur); + delta = aux.relocDeltas[i - 1]; + } + } + } +} + TargetInfo *elf::getRISCVTargetInfo() { static RISCV target; return ⌖ diff --git a/lld/ELF/Config.h b/lld/ELF/Config.h index c593880d5cd3..39723f092784 100644 --- a/lld/ELF/Config.h +++ b/lld/ELF/Config.h @@ -11,6 +11,7 @@ #include "lld/Common/ErrorHandler.h" #include "llvm/ADT/CachedHashString.h" +#include "llvm/ADT/DenseSet.h" #include "llvm/ADT/MapVector.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/StringRef.h" @@ -215,7 +216,7 @@ struct Configuration { bool relocatable; bool relrGlibc = false; bool relrPackDynRelocs = false; - bool saveTemps; + llvm::DenseSet<llvm::StringRef> saveTempsArgs; std::vector<std::pair<llvm::GlobPattern, uint32_t>> shuffleSections; bool singleRoRx; bool shared; diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp index 7500b68a9bef..4c26cba1cb4f 100644 --- a/lld/ELF/Driver.cpp +++ b/lld/ELF/Driver.cpp @@ -500,6 +500,10 @@ static void checkZOptions(opt::InputArgList &args) { warn("unknown -z value: " + StringRef(arg->getValue())); } +constexpr const char *saveTempsValues[] = { + "resolution", "preopt", "promote", "internalize", "import", + "opt", "precodegen", "prelink", "combinedindex"}; + void LinkerDriver::linkerMain(ArrayRef<const char *> argsArr) { ELFOptTable parser; opt::InputArgList args = parser.parse(argsArr.slice(1)); @@ -947,7 +951,7 @@ static bool getCompressDebugSections(opt::InputArgList &args) { return false; if (s != "zlib") error("unknown --compress-debug-sections value: " + s); - if (!zlib::isAvailable()) + if (!compression::zlib::isAvailable()) error("--compress-debug-sections: zlib is not available"); return true; } @@ -1151,7 +1155,21 @@ static void readConfigs(opt::InputArgList &args) { config->relax = args.hasFlag(OPT_relax, OPT_no_relax, true); config->rpath = getRpath(args); config->relocatable = args.hasArg(OPT_relocatable); - config->saveTemps = args.hasArg(OPT_save_temps); + + if (args.hasArg(OPT_save_temps)) { + // --save-temps implies saving all temps. + for (const char *s : saveTempsValues) + config->saveTempsArgs.insert(s); + } else { + for (auto *arg : args.filtered(OPT_save_temps_eq)) { + StringRef s = arg->getValue(); + if (llvm::is_contained(saveTempsValues, s)) + config->saveTempsArgs.insert(s); + else + error("unknown --save-temps value: " + s); + } + } + config->searchPaths = args::getStrings(args, OPT_library_path); config->sectionStartMap = getSectionStartMap(args); config->shared = args.hasArg(OPT_shared); diff --git a/lld/ELF/InputSection.cpp b/lld/ELF/InputSection.cpp index 97fc18b58244..8fe36eca6a4b 100644 --- a/lld/ELF/InputSection.cpp +++ b/lld/ELF/InputSection.cpp @@ -72,12 +72,8 @@ InputSectionBase::InputSectionBase(InputFile *file, uint64_t flags, // If SHF_COMPRESSED is set, parse the header. The legacy .zdebug format is no // longer supported. - if (flags & SHF_COMPRESSED) { - if (!zlib::isAvailable()) - error(toString(file) + ": contains a compressed section, " + - "but zlib is not available"); + if (flags & SHF_COMPRESSED) invokeELFT(parseCompressedHeader); - } } // Drop SHF_GROUP bit unless we are producing a re-linkable object file. @@ -115,17 +111,17 @@ size_t InputSectionBase::getSize() const { void InputSectionBase::uncompress() const { size_t size = uncompressedSize; - char *uncompressedBuf; + uint8_t *uncompressedBuf; { static std::mutex mu; std::lock_guard<std::mutex> lock(mu); - uncompressedBuf = bAlloc().Allocate<char>(size); + uncompressedBuf = bAlloc().Allocate<uint8_t>(size); } - if (Error e = zlib::uncompress(toStringRef(rawData), uncompressedBuf, size)) + if (Error e = compression::zlib::uncompress(rawData, uncompressedBuf, size)) fatal(toString(this) + ": uncompress failed: " + llvm::toString(std::move(e))); - rawData = makeArrayRef((uint8_t *)uncompressedBuf, size); + rawData = makeArrayRef(uncompressedBuf, size); uncompressedSize = -1; } @@ -211,8 +207,13 @@ template <typename ELFT> void InputSectionBase::parseCompressedHeader() { } auto *hdr = reinterpret_cast<const typename ELFT::Chdr *>(rawData.data()); - if (hdr->ch_type != ELFCOMPRESS_ZLIB) { - error(toString(this) + ": unsupported compression type"); + if (hdr->ch_type == ELFCOMPRESS_ZLIB) { + if (!compression::zlib::isAvailable()) + error(toString(this) + " is compressed with ELFCOMPRESS_ZLIB, but lld is " + "not built with zlib support"); + } else { + error(toString(this) + ": unsupported compression type (" + + Twine(hdr->ch_type) + ")"); return; } @@ -622,6 +623,8 @@ uint64_t InputSectionBase::getRelocTargetVA(const InputFile *file, RelType type, return sym.getVA(a); case R_ADDEND: return a; + case R_RELAX_HINT: + return 0; case R_ARM_SBREL: return sym.getVA(a) - getARMStaticBase(sym); case R_GOT: @@ -987,6 +990,8 @@ void InputSectionBase::relocateAlloc(uint8_t *buf, uint8_t *bufEnd) { *rel.sym, rel.expr), bits); switch (rel.expr) { + case R_RELAX_HINT: + continue; case R_RELAX_GOT_PC: case R_RELAX_GOT_PC_NOPIC: target.relaxGot(bufLoc, rel, targetVA); @@ -1213,7 +1218,7 @@ template <class ELFT> void InputSection::writeTo(uint8_t *buf) { // to the buffer. if (uncompressedSize >= 0) { size_t size = uncompressedSize; - if (Error e = zlib::uncompress(toStringRef(rawData), (char *)buf, size)) + if (Error e = compression::zlib::uncompress(rawData, buf, size)) fatal(toString(this) + ": uncompress failed: " + llvm::toString(std::move(e))); uint8_t *bufEnd = buf + size; diff --git a/lld/ELF/InputSection.h b/lld/ELF/InputSection.h index c7c8f45f432d..d1b889750bbd 100644 --- a/lld/ELF/InputSection.h +++ b/lld/ELF/InputSection.h @@ -10,7 +10,9 @@ #define LLD_ELF_INPUT_SECTION_H #include "Relocations.h" +#include "lld/Common/CommonLinkerContext.h" #include "lld/Common/LLVM.h" +#include "lld/Common/Memory.h" #include "llvm/ADT/CachedHashString.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/TinyPtrVector.h" @@ -97,6 +99,8 @@ protected: link(link), info(info) {} }; +struct RISCVRelaxAux; + // This corresponds to a section of an input file. class InputSectionBase : public SectionBase { public: @@ -129,11 +133,10 @@ public: return cast_or_null<ObjFile<ELFT>>(file); } - // If basic block sections are enabled, many code sections could end up with - // one or two jump instructions at the end that could be relaxed to a smaller - // instruction. The members below help trimming the trailing jump instruction - // and shrinking a section. - uint8_t bytesDropped = 0; + // Used by --optimize-bb-jumps and RISC-V linker relaxation temporarily to + // indicate the number of bytes which is not counted in the size. This should + // be reset to zero after uses. + uint16_t bytesDropped = 0; // Whether the section needs to be padded with a NOP filler due to // deleteFallThruJmpInsn. @@ -201,11 +204,17 @@ public: // This vector contains such "cooked" relocations. SmallVector<Relocation, 0> relocations; - // These are modifiers to jump instructions that are necessary when basic - // block sections are enabled. Basic block sections creates opportunities to - // relax jump instructions at basic block boundaries after reordering the - // basic blocks. - JumpInstrMod *jumpInstrMod = nullptr; + union { + // These are modifiers to jump instructions that are necessary when basic + // block sections are enabled. Basic block sections creates opportunities + // to relax jump instructions at basic block boundaries after reordering the + // basic blocks. + JumpInstrMod *jumpInstrMod = nullptr; + + // Auxiliary information for RISC-V linker relaxation. RISC-V does not use + // jumpInstrMod. + RISCVRelaxAux *relaxAux; + }; // A function compiled with -fsplit-stack calling a function // compiled without -fsplit-stack needs its prologue adjusted. Find diff --git a/lld/ELF/LTO.cpp b/lld/ELF/LTO.cpp index e44ef0d3c2c8..8c5001af3a91 100644 --- a/lld/ELF/LTO.cpp +++ b/lld/ELF/LTO.cpp @@ -178,9 +178,10 @@ static lto::Config createConfig() { if (config->ltoEmitAsm) c.CGFileType = CGFT_AssemblyFile; - if (config->saveTemps) + if (!config->saveTempsArgs.empty()) checkError(c.addSaveTemps(config->outputFile.str() + ".", - /*UseInputModulePath*/ true)); + /*UseInputModulePath*/ true, + config->saveTempsArgs)); return c; } @@ -365,7 +366,7 @@ std::vector<InputFile *> BitcodeCompiler::compile() { saveBuffer(buf[i], config->ltoObjPath + Twine(i)); } - if (config->saveTemps) { + if (config->saveTempsArgs.contains("prelink")) { if (!buf[0].empty()) saveBuffer(buf[0], config->outputFile + ".lto.o"); for (unsigned i = 1; i != maxTasks; ++i) diff --git a/lld/ELF/Options.td b/lld/ELF/Options.td index 1d9fbcbcee3c..c98d21717de0 100644 --- a/lld/ELF/Options.td +++ b/lld/ELF/Options.td @@ -585,6 +585,8 @@ def opt_remarks_with_hotness: FF<"opt-remarks-with-hotness">, def opt_remarks_format: Separate<["--"], "opt-remarks-format">, HelpText<"The format used for serializing remarks (default: YAML)">; def save_temps: F<"save-temps">, HelpText<"Save intermediate LTO compilation results">; +def save_temps_eq: JJ<"save-temps=">, HelpText<"Save select intermediate LTO compilation results">, + Values<"resolution,preopt,promote,internalize,import,opt,precodegen,prelink,combinedindex">; def lto_basic_block_sections: JJ<"lto-basic-block-sections=">, HelpText<"Enable basic block sections for LTO">; defm lto_unique_basic_block_section_names: BB<"lto-unique-basic-block-section-names", diff --git a/lld/ELF/OutputSections.cpp b/lld/ELF/OutputSections.cpp index bc940c7e6546..cbde8ac800d3 100644 --- a/lld/ELF/OutputSections.cpp +++ b/lld/ELF/OutputSections.cpp @@ -419,7 +419,8 @@ template <class ELFT> void OutputSection::writeTo(uint8_t *buf) { } // Write leading padding. - SmallVector<InputSection *, 0> sections = getInputSections(*this); + SmallVector<InputSection *, 0> storage; + ArrayRef<InputSection *> sections = getInputSections(*this, storage); std::array<uint8_t, 4> filler = getFiller(); bool nonZeroFiller = read32(filler.data()) != 0; if (nonZeroFiller) @@ -592,12 +593,24 @@ InputSection *elf::getFirstInputSection(const OutputSection *os) { return nullptr; } -SmallVector<InputSection *, 0> elf::getInputSections(const OutputSection &os) { - SmallVector<InputSection *, 0> ret; - for (SectionCommand *cmd : os.commands) - if (auto *isd = dyn_cast<InputSectionDescription>(cmd)) - ret.insert(ret.end(), isd->sections.begin(), isd->sections.end()); - return ret; +ArrayRef<InputSection *> +elf::getInputSections(const OutputSection &os, + SmallVector<InputSection *, 0> &storage) { + ArrayRef<InputSection *> ret; + storage.clear(); + for (SectionCommand *cmd : os.commands) { + auto *isd = dyn_cast<InputSectionDescription>(cmd); + if (!isd) + continue; + if (ret.empty()) { + ret = isd->sections; + } else { + if (storage.empty()) + storage.assign(ret.begin(), ret.end()); + storage.insert(storage.end(), isd->sections.begin(), isd->sections.end()); + } + } + return storage.empty() ? ret : makeArrayRef(storage); } // Sorts input sections by section name suffixes, so that .foo.N comes @@ -622,7 +635,8 @@ std::array<uint8_t, 4> OutputSection::getFiller() { void OutputSection::checkDynRelAddends(const uint8_t *bufStart) { assert(config->writeAddends && config->checkDynamicRelocs); assert(type == SHT_REL || type == SHT_RELA); - SmallVector<InputSection *, 0> sections = getInputSections(*this); + SmallVector<InputSection *, 0> storage; + ArrayRef<InputSection *> sections = getInputSections(*this, storage); parallelFor(0, sections.size(), [&](size_t i) { // When linking with -r or --emit-relocs we might also call this function // for input .rel[a].<sec> sections which we simply pass through to the diff --git a/lld/ELF/OutputSections.h b/lld/ELF/OutputSections.h index 020eeaec368e..6bdbcfce7b12 100644 --- a/lld/ELF/OutputSections.h +++ b/lld/ELF/OutputSections.h @@ -134,7 +134,9 @@ struct OutputDesc final : SectionCommand { int getPriority(StringRef s); InputSection *getFirstInputSection(const OutputSection *os); -SmallVector<InputSection *, 0> getInputSections(const OutputSection &os); +llvm::ArrayRef<InputSection *> +getInputSections(const OutputSection &os, + SmallVector<InputSection *, 0> &storage); // All output sections that are handled by the linker specially are // globally accessible. Writer initializes them, so don't use them diff --git a/lld/ELF/Relocations.cpp b/lld/ELF/Relocations.cpp index 64381ae75414..e54e1ebd41bb 100644 --- a/lld/ELF/Relocations.cpp +++ b/lld/ELF/Relocations.cpp @@ -958,8 +958,8 @@ bool RelocationScanner::isStaticLinkTimeConstant(RelExpr e, RelType type, const Symbol &sym, uint64_t relOff) const { // These expressions always compute a constant - if (oneof<R_GOTPLT, R_GOT_OFF, R_MIPS_GOT_LOCAL_PAGE, R_MIPS_GOTREL, - R_MIPS_GOT_OFF, R_MIPS_GOT_OFF32, R_MIPS_GOT_GP_PC, + if (oneof<R_GOTPLT, R_GOT_OFF, R_RELAX_HINT, R_MIPS_GOT_LOCAL_PAGE, + R_MIPS_GOTREL, R_MIPS_GOT_OFF, R_MIPS_GOT_OFF32, R_MIPS_GOT_GP_PC, R_AARCH64_GOT_PAGE_PC, R_GOT_PC, R_GOTONLY_PC, R_GOTPLTONLY_PC, R_PLT_PC, R_PLT_GOTPLT, R_PPC32_PLTREL, R_PPC64_CALL_PLT, R_PPC64_RELAX_TOC, R_RISCV_ADD, R_AARCH64_GOT_PAGE>(e)) @@ -2118,7 +2118,9 @@ bool ThunkCreator::normalizeExistingThunk(Relocation &rel, uint64_t src) { // made no changes. If the target requires range extension thunks, currently // ARM, then any future change in offset between caller and callee risks a // relocation out of range error. -bool ThunkCreator::createThunks(ArrayRef<OutputSection *> outputSections) { +bool ThunkCreator::createThunks(uint32_t pass, + ArrayRef<OutputSection *> outputSections) { + this->pass = pass; bool addressesChanged = false; if (pass == 0 && target->getThunkSectionSpacing()) @@ -2180,7 +2182,6 @@ bool ThunkCreator::createThunks(ArrayRef<OutputSection *> outputSections) { // Merge all created synthetic ThunkSections back into OutputSection mergeThunks(outputSections); - ++pass; return addressesChanged; } diff --git a/lld/ELF/Relocations.h b/lld/ELF/Relocations.h index da0f2289bc90..f70d255ba229 100644 --- a/lld/ELF/Relocations.h +++ b/lld/ELF/Relocations.h @@ -46,6 +46,7 @@ enum RelExpr { R_PLT, R_PLT_PC, R_PLT_GOTPLT, + R_RELAX_HINT, R_RELAX_GOT_PC, R_RELAX_GOT_PC_NOPIC, R_RELAX_TLS_GD_TO_IE, @@ -139,12 +140,7 @@ class InputSectionDescription; class ThunkCreator { public: // Return true if Thunks have been added to OutputSections - bool createThunks(ArrayRef<OutputSection *> outputSections); - - // The number of completed passes of createThunks this permits us - // to do one time initialization on Pass 0 and put a limit on the - // number of times it can be called to prevent infinite loops. - uint32_t pass = 0; + bool createThunks(uint32_t pass, ArrayRef<OutputSection *> outputSections); private: void mergeThunks(ArrayRef<OutputSection *> outputSections); @@ -186,6 +182,11 @@ private: // so we need to make sure that there is only one of them. // The Mips LA25 Thunk is an example of an inline ThunkSection. llvm::DenseMap<InputSection *, ThunkSection *> thunkedSections; + + // The number of completed passes of createThunks this permits us + // to do one time initialization on Pass 0 and put a limit on the + // number of times it can be called to prevent infinite loops. + uint32_t pass = 0; }; // Return a int64_t to make sure we get the sign extension out of the way as diff --git a/lld/ELF/Target.h b/lld/ELF/Target.h index 5e9bbd62572d..14b1f53c6a81 100644 --- a/lld/ELF/Target.h +++ b/lld/ELF/Target.h @@ -89,6 +89,9 @@ public: relocate(loc, Relocation{R_NONE, type, 0, 0, nullptr}, val); } + // Do a linker relaxation pass and return true if we changed something. + virtual bool relaxOnce(int pass) const { return false; } + virtual void applyJumpInstrMod(uint8_t *loc, JumpModType type, JumpModType val) const {} @@ -221,6 +224,7 @@ void writePrefixedInstruction(uint8_t *loc, uint64_t insn); void addPPC64SaveRestore(); uint64_t getPPC64TocBase(); uint64_t getAArch64Page(uint64_t expr); +void riscvFinalizeRelax(int passes); class AArch64Relaxer { bool safeToRelaxAdrpLdr = true; diff --git a/lld/ELF/Thunks.cpp b/lld/ELF/Thunks.cpp index d6172edc76f2..738eb24f2200 100644 --- a/lld/ELF/Thunks.cpp +++ b/lld/ELF/Thunks.cpp @@ -916,25 +916,18 @@ void PPC64R2SaveStub::writeTo(uint8_t *buf) { write32(buf + 4, 0x48000000 | (offset & 0x03fffffc)); // b <offset> } else if (isInt<34>(offset)) { int nextInstOffset; - if (!config->power10Stubs) { - uint64_t tocOffset = destination.getVA() - getPPC64TocBase(); - if (tocOffset >> 16 > 0) { - const uint64_t addi = ADDI_R12_TO_R12_NO_DISP | (tocOffset & 0xffff); - const uint64_t addis = ADDIS_R12_TO_R2_NO_DISP | ((tocOffset >> 16) & 0xffff); - write32(buf + 4, addis); // addis r12, r2 , top of offset - write32(buf + 8, addi); // addi r12, r12, bottom of offset - nextInstOffset = 12; - } else { - const uint64_t addi = ADDI_R12_TO_R2_NO_DISP | (tocOffset & 0xffff); - write32(buf + 4, addi); // addi r12, r2, offset - nextInstOffset = 8; - } - } else { - const uint64_t paddi = PADDI_R12_NO_DISP | - (((offset >> 16) & 0x3ffff) << 32) | - (offset & 0xffff); - writePrefixedInstruction(buf + 4, paddi); // paddi r12, 0, func@pcrel, 1 + uint64_t tocOffset = destination.getVA() - getPPC64TocBase(); + if (tocOffset >> 16 > 0) { + const uint64_t addi = ADDI_R12_TO_R12_NO_DISP | (tocOffset & 0xffff); + const uint64_t addis = + ADDIS_R12_TO_R2_NO_DISP | ((tocOffset >> 16) & 0xffff); + write32(buf + 4, addis); // addis r12, r2 , top of offset + write32(buf + 8, addi); // addi r12, r12, bottom of offset nextInstOffset = 12; + } else { + const uint64_t addi = ADDI_R12_TO_R2_NO_DISP | (tocOffset & 0xffff); + write32(buf + 4, addi); // addi r12, r2, offset + nextInstOffset = 8; } write32(buf + nextInstOffset, MTCTR_R12); // mtctr r12 write32(buf + nextInstOffset + 4, BCTR); // bctr diff --git a/lld/ELF/Writer.cpp b/lld/ELF/Writer.cpp index 7bfe29eda695..705cc7bf9766 100644 --- a/lld/ELF/Writer.cpp +++ b/lld/ELF/Writer.cpp @@ -1312,12 +1312,16 @@ static DenseMap<const InputSectionBase *, int> buildSectionOrder() { // Sorts the sections in ISD according to the provided section order. static void sortISDBySectionOrder(InputSectionDescription *isd, - const DenseMap<const InputSectionBase *, int> &order) { + const DenseMap<const InputSectionBase *, int> &order, + bool executableOutputSection) { SmallVector<InputSection *, 0> unorderedSections; SmallVector<std::pair<InputSection *, int>, 0> orderedSections; uint64_t unorderedSize = 0; + uint64_t totalSize = 0; for (InputSection *isec : isd->sections) { + if (executableOutputSection) + totalSize += isec->getSize(); auto i = order.find(isec); if (i == order.end()) { unorderedSections.push_back(isec); @@ -1355,8 +1359,15 @@ sortISDBySectionOrder(InputSectionDescription *isd, // of the second block of cold code can call the hot code without a thunk. So // we effectively double the amount of code that could potentially call into // the hot code without a thunk. + // + // The above is not necessary if total size of input sections in this "isd" + // is small. Note that we assume all input sections are executable if the + // output section is executable (which is not always true but supposed to + // cover most cases). size_t insPt = 0; - if (target->getThunkSectionSpacing() && !orderedSections.empty()) { + if (executableOutputSection && !orderedSections.empty() && + target->getThunkSectionSpacing() && + totalSize >= target->getThunkSectionSpacing()) { uint64_t unorderedPos = 0; for (; insPt != unorderedSections.size(); ++insPt) { unorderedPos += unorderedSections[insPt]->getSize(); @@ -1397,7 +1408,7 @@ static void sortSection(OutputSection &osec, if (!order.empty()) for (SectionCommand *b : osec.commands) if (auto *isd = dyn_cast<InputSectionDescription>(b)) - sortISDBySectionOrder(isd, order); + sortISDBySectionOrder(isd, order, osec.flags & SHF_EXECINSTR); if (script->hasSectionsCommand) return; @@ -1630,14 +1641,17 @@ template <class ELFT> void Writer<ELFT>::finalizeAddressDependentContent() { if (config->emachine == EM_HEXAGON) hexagonTLSSymbolUpdate(outputSections); - int assignPasses = 0; + uint32_t pass = 0, assignPasses = 0; for (;;) { - bool changed = target->needsThunks && tc.createThunks(outputSections); + bool changed = target->needsThunks ? tc.createThunks(pass, outputSections) + : target->relaxOnce(pass); + ++pass; // With Thunk Size much smaller than branch range we expect to // converge quickly; if we get to 15 something has gone wrong. - if (changed && tc.pass >= 15) { - error("thunk creation not converged"); + if (changed && pass >= 15) { + error(target->needsThunks ? "thunk creation not converged" + : "relaxation not converged"); break; } @@ -1675,6 +1689,8 @@ template <class ELFT> void Writer<ELFT>::finalizeAddressDependentContent() { } } } + if (!config->relocatable && config->emachine == EM_RISCV) + riscvFinalizeRelax(pass); if (config->relocatable) for (OutputSection *sec : outputSections) @@ -1741,6 +1757,7 @@ static void fixSymbolsAfterShrinking() { // option is used. template <class ELFT> void Writer<ELFT>::optimizeBasicBlockJumps() { assert(config->optimizeBBJumps); + SmallVector<InputSection *, 0> storage; script->assignAddresses(); // For every output section that has executable input sections, this @@ -1752,7 +1769,7 @@ template <class ELFT> void Writer<ELFT>::optimizeBasicBlockJumps() { for (OutputSection *osec : outputSections) { if (!(osec->flags & SHF_EXECINSTR)) continue; - SmallVector<InputSection *, 0> sections = getInputSections(*osec); + ArrayRef<InputSection *> sections = getInputSections(*osec, storage); size_t numDeleted = 0; // Delete all fall through jump instructions. Also, check if two // consecutive jump instructions can be flipped so that a fall @@ -1772,7 +1789,7 @@ template <class ELFT> void Writer<ELFT>::optimizeBasicBlockJumps() { fixSymbolsAfterShrinking(); for (OutputSection *osec : outputSections) - for (InputSection *is : getInputSections(*osec)) + for (InputSection *is : getInputSections(*osec, storage)) is->trim(); } @@ -2165,9 +2182,10 @@ template <class ELFT> void Writer<ELFT>::checkExecuteOnly() { if (!config->executeOnly) return; + SmallVector<InputSection *, 0> storage; for (OutputSection *osec : outputSections) if (osec->flags & SHF_EXECINSTR) - for (InputSection *isec : getInputSections(*osec)) + for (InputSection *isec : getInputSections(*osec, storage)) if (!(isec->flags & SHF_EXECINSTR)) error("cannot place " + toString(isec) + " into " + toString(osec->name) + diff --git a/lld/MachO/Arch/ARM.cpp b/lld/MachO/Arch/ARM.cpp index 7de0837fcf38..fd215ed99b59 100644 --- a/lld/MachO/Arch/ARM.cpp +++ b/lld/MachO/Arch/ARM.cpp @@ -40,6 +40,9 @@ struct ARM : TargetInfo { void relaxGotLoad(uint8_t *loc, uint8_t type) const override; const RelocAttrs &getRelocAttrs(uint8_t type) const override; uint64_t getPageSize() const override { return 4 * 1024; } + + void handleDtraceReloc(const Symbol *sym, const Reloc &r, + uint8_t *loc) const override; }; } // namespace @@ -170,3 +173,36 @@ TargetInfo *macho::createARMTargetInfo(uint32_t cpuSubtype) { static ARM t(cpuSubtype); return &t; } + +void ARM::handleDtraceReloc(const Symbol *sym, const Reloc &r, + uint8_t *loc) const { + if (config->outputType == MH_OBJECT) + return; + + switch (r.type) { + case ARM_RELOC_BR24: + if (sym->getName().startswith("___dtrace_probe")) { + // change call site to a NOP + write32le(loc, 0xE1A00000); + } else if (sym->getName().startswith("___dtrace_isenabled")) { + // change call site to 'eor r0, r0, r0' + write32le(loc, 0xE0200000); + } else { + error("Unrecognized dtrace symbol prefix: " + toString(*sym)); + } + break; + case ARM_THUMB_RELOC_BR22: + if (sym->getName().startswith("___dtrace_probe")) { + // change 32-bit blx call site to two thumb NOPs + write32le(loc, 0x46C046C0); + } else if (sym->getName().startswith("___dtrace_isenabled")) { + // change 32-bit blx call site to 'nop', 'eor r0, r0' + write32le(loc, 0x46C04040); + } else { + error("Unrecognized dtrace symbol prefix: " + toString(*sym)); + } + break; + default: + llvm_unreachable("Unsupported dtrace relocation type for ARM"); + } +} diff --git a/lld/MachO/Arch/ARM64.cpp b/lld/MachO/Arch/ARM64.cpp index 7064df5793aa..5901a9e09b35 100644 --- a/lld/MachO/Arch/ARM64.cpp +++ b/lld/MachO/Arch/ARM64.cpp @@ -168,10 +168,10 @@ enum ExtendType { ZeroExtend = 1, Sign64 = 2, Sign32 = 3 }; struct Ldr { uint8_t destRegister; uint8_t baseRegister; - uint8_t size; + uint8_t p2Size; bool isFloat; ExtendType extendType; - uint64_t offset; + int64_t offset; }; struct PerformedReloc { @@ -189,6 +189,8 @@ public: void applyAdrpAdd(const OptimizationHint &); void applyAdrpAdrp(const OptimizationHint &); void applyAdrpLdr(const OptimizationHint &); + void applyAdrpLdrGot(const OptimizationHint &); + void applyAdrpLdrGotLdr(const OptimizationHint &); private: uint8_t *buf; @@ -227,34 +229,35 @@ static bool parseLdr(uint32_t insn, Ldr &ldr) { if ((insn & 0x3fc00000) == 0x39400000) { // LDR (immediate), LDRB (immediate), LDRH (immediate) - ldr.size = 1 << size; + ldr.p2Size = size; ldr.extendType = ZeroExtend; ldr.isFloat = false; } else if ((insn & 0x3f800000) == 0x39800000) { // LDRSB (immediate), LDRSH (immediate), LDRSW (immediate) - ldr.size = 1 << size; + ldr.p2Size = size; ldr.extendType = static_cast<ExtendType>(opc); ldr.isFloat = false; } else if ((insn & 0x3f400000) == 0x3d400000) { // LDR (immediate, SIMD&FP) ldr.extendType = ZeroExtend; ldr.isFloat = true; - if (size == 2 && opc == 1) - ldr.size = 4; - else if (size == 3 && opc == 1) - ldr.size = 8; + if (opc == 1) + ldr.p2Size = size; else if (size == 0 && opc == 3) - ldr.size = 16; + ldr.p2Size = 4; else return false; } else { return false; } - ldr.offset = ((insn >> 10) & 0xfff) * ldr.size; + ldr.offset = ((insn >> 10) & 0xfff) << ldr.p2Size; return true; } +static bool isValidAdrOffset(int32_t delta) { return isInt<21>(delta); } + static void writeAdr(void *loc, uint32_t dest, int32_t delta) { + assert(isValidAdrOffset(delta)); uint32_t opcode = 0x10000000; uint32_t immHi = (delta & 0x001ffffc) << 3; uint32_t immLo = (delta & 0x00000003) << 29; @@ -263,26 +266,63 @@ static void writeAdr(void *loc, uint32_t dest, int32_t delta) { static void writeNop(void *loc) { write32le(loc, 0xd503201f); } -static void writeLiteralLdr(void *loc, Ldr original, int32_t delta) { - uint32_t imm19 = (delta & 0x001ffffc) << 3; - uint32_t opcode = 0; - switch (original.size) { - case 4: - if (original.isFloat) +static bool isLiteralLdrEligible(const Ldr &ldr) { + return ldr.p2Size > 1 && isShiftedInt<19, 2>(ldr.offset); +} + +static void writeLiteralLdr(void *loc, const Ldr &ldr) { + assert(isLiteralLdrEligible(ldr)); + uint32_t imm19 = (ldr.offset / 4 & maskTrailingOnes<uint32_t>(19)) << 5; + uint32_t opcode; + switch (ldr.p2Size) { + case 2: + if (ldr.isFloat) opcode = 0x1c000000; else - opcode = original.extendType == Sign64 ? 0x98000000 : 0x18000000; + opcode = ldr.extendType == Sign64 ? 0x98000000 : 0x18000000; break; - case 8: - opcode = original.isFloat ? 0x5c000000 : 0x58000000; + case 3: + opcode = ldr.isFloat ? 0x5c000000 : 0x58000000; break; - case 16: + case 4: opcode = 0x9c000000; break; default: - assert(false && "Invalid size for literal ldr"); + llvm_unreachable("Invalid literal ldr size"); } - write32le(loc, opcode | imm19 | original.destRegister); + write32le(loc, opcode | imm19 | ldr.destRegister); +} + +static bool isImmediateLdrEligible(const Ldr &ldr) { + // Note: We deviate from ld64's behavior, which converts to immediate loads + // only if ldr.offset < 4096, even though the offset is divided by the load's + // size in the 12-bit immediate operand. Only the unsigned offset variant is + // supported. + + uint32_t size = 1 << ldr.p2Size; + return ldr.offset >= 0 && (ldr.offset % size) == 0 && + isUInt<12>(ldr.offset >> ldr.p2Size); +} + +static void writeImmediateLdr(void *loc, const Ldr &ldr) { + assert(isImmediateLdrEligible(ldr)); + uint32_t opcode = 0x39000000; + if (ldr.isFloat) { + opcode |= 0x04000000; + assert(ldr.extendType == ZeroExtend); + } + opcode |= ldr.destRegister; + opcode |= ldr.baseRegister << 5; + uint8_t size, opc; + if (ldr.p2Size == 4) { + size = 0; + opc = 3; + } else { + opc = ldr.extendType; + size = ldr.p2Size; + } + uint32_t immBits = ldr.offset >> ldr.p2Size; + write32le(loc, opcode | (immBits << 10) | (opc << 22) | (size << 30)); } uint64_t OptimizationHintContext::getRelocTarget(const Reloc &reloc) { @@ -351,7 +391,7 @@ void OptimizationHintContext::applyAdrpAdd(const OptimizationHint &hint) { if (rel1->referentVA != rel2->referentVA) return; int64_t delta = rel1->referentVA - rel1->rel.offset - isec->getVA(); - if (delta >= (1 << 20) || delta < -(1 << 20)) + if (!isValidAdrOffset(delta)) return; writeAdr(buf + hint.offset0, add.destRegister, delta); @@ -412,16 +452,120 @@ void OptimizationHintContext::applyAdrpLdr(const OptimizationHint &hint) { return; if (ldr.offset != (rel1->referentVA & 0xfff)) return; - if ((rel1->referentVA & 3) != 0) + ldr.offset = rel1->referentVA - rel2->rel.offset - isec->getVA(); + if (!isLiteralLdrEligible(ldr)) return; - if (ldr.size == 1 || ldr.size == 2) + + writeNop(buf + hint.offset0); + writeLiteralLdr(buf + hint.offset0 + hint.delta[0], ldr); +} + +// GOT loads are emitted by the compiler as a pair of adrp and ldr instructions, +// but they may be changed to adrp+add by relaxGotLoad(). This hint performs +// the AdrpLdr or AdrpAdd transformation depending on whether it was relaxed. +void OptimizationHintContext::applyAdrpLdrGot(const OptimizationHint &hint) { + uint32_t ins2 = read32le(buf + hint.offset0 + hint.delta[0]); + Add add; + Ldr ldr; + if (parseAdd(ins2, add)) + applyAdrpAdd(hint); + else if (parseLdr(ins2, ldr)) + applyAdrpLdr(hint); +} + +// Relaxes a GOT-indirect load. +// If the referenced symbol is external and its GOT entry is within +/- 1 MiB, +// the GOT entry can be loaded with a single literal ldr instruction. +// If the referenced symbol is local, its address may be loaded directly if it's +// close enough, or with an adr(p) + ldr pair if it's not. +void OptimizationHintContext::applyAdrpLdrGotLdr(const OptimizationHint &hint) { + uint32_t ins1 = read32le(buf + hint.offset0); + Adrp adrp; + if (!parseAdrp(ins1, adrp)) return; - int64_t delta = rel1->referentVA - rel2->rel.offset - isec->getVA(); - if (delta >= (1 << 20) || delta < -(1 << 20)) + uint32_t ins3 = read32le(buf + hint.offset0 + hint.delta[1]); + Ldr ldr3; + if (!parseLdr(ins3, ldr3)) return; + uint32_t ins2 = read32le(buf + hint.offset0 + hint.delta[0]); + Ldr ldr2; + Add add2; - writeNop(buf + hint.offset0); - writeLiteralLdr(buf + hint.offset0 + hint.delta[0], ldr, delta); + Optional<PerformedReloc> rel1 = findPrimaryReloc(hint.offset0); + Optional<PerformedReloc> rel2 = findReloc(hint.offset0 + hint.delta[0]); + if (!rel1 || !rel2) + return; + + if (parseAdd(ins2, add2)) { + // adrp x0, _foo@PAGE + // add x1, x0, _foo@PAGEOFF + // ldr x2, [x1, #off] + + if (adrp.destRegister != add2.srcRegister) + return; + if (add2.destRegister != ldr3.baseRegister) + return; + + // Load from the target address directly. + // nop + // nop + // ldr x2, [_foo + #off] + uint64_t rel3VA = hint.offset0 + hint.delta[1] + isec->getVA(); + Ldr literalLdr = ldr3; + literalLdr.offset += rel1->referentVA - rel3VA; + if (isLiteralLdrEligible(literalLdr)) { + writeNop(buf + hint.offset0); + writeNop(buf + hint.offset0 + hint.delta[0]); + writeLiteralLdr(buf + hint.offset0 + hint.delta[1], literalLdr); + return; + } + + // Load the target address into a register and load from there indirectly. + // adr x1, _foo + // nop + // ldr x2, [x1, #off] + int64_t adrOffset = rel1->referentVA - rel1->rel.offset - isec->getVA(); + if (isValidAdrOffset(adrOffset)) { + writeAdr(buf + hint.offset0, ldr3.baseRegister, adrOffset); + writeNop(buf + hint.offset0 + hint.delta[0]); + return; + } + + // Move the target's page offset into the ldr's immediate offset. + // adrp x0, _foo@PAGE + // nop + // ldr x2, [x0, _foo@PAGEOFF + #off] + Ldr immediateLdr = ldr3; + immediateLdr.baseRegister = adrp.destRegister; + immediateLdr.offset += add2.addend; + if (isImmediateLdrEligible(immediateLdr)) { + writeNop(buf + hint.offset0 + hint.delta[0]); + writeImmediateLdr(buf + hint.offset0 + hint.delta[1], immediateLdr); + return; + } + } else if (parseLdr(ins2, ldr2)) { + // adrp x1, _foo@GOTPAGE + // ldr x2, [x1, _foo@GOTPAGEOFF] + // ldr x3, [x2, #off] + if (ldr2.baseRegister != adrp.destRegister) + return; + if (ldr3.baseRegister != ldr2.destRegister) + return; + // Loads from the GOT must be pointer sized. + if (ldr2.p2Size != 3 || ldr2.isFloat) + return; + + // Load the GOT entry's address directly. + // nop + // ldr x2, _foo@GOTPAGE + _foo@GOTPAGEOFF + // ldr x3, [x2, #off] + Ldr literalLdr = ldr2; + literalLdr.offset = rel1->referentVA - rel2->rel.offset - isec->getVA(); + if (isLiteralLdrEligible(literalLdr)) { + writeNop(buf + hint.offset0); + writeLiteralLdr(buf + hint.offset0 + hint.delta[0], literalLdr); + } + } } void ARM64::applyOptimizationHints(uint8_t *buf, const ConcatInputSection *isec, @@ -443,7 +587,11 @@ void ARM64::applyOptimizationHints(uint8_t *buf, const ConcatInputSection *isec, ctx1.applyAdrpLdr(hint); break; case LOH_ARM64_ADRP_ADD_LDR: + // TODO: Implement this + break; case LOH_ARM64_ADRP_LDR_GOT_LDR: + ctx1.applyAdrpLdrGotLdr(hint); + break; case LOH_ARM64_ADRP_ADD_STR: case LOH_ARM64_ADRP_LDR_GOT_STR: // TODO: Implement these @@ -452,7 +600,7 @@ void ARM64::applyOptimizationHints(uint8_t *buf, const ConcatInputSection *isec, ctx1.applyAdrpAdd(hint); break; case LOH_ARM64_ADRP_LDR_GOT: - // TODO: Implement this as well + ctx1.applyAdrpLdrGot(hint); break; } } diff --git a/lld/MachO/Arch/ARM64Common.cpp b/lld/MachO/Arch/ARM64Common.cpp index f55258ce8ec9..27fdf4ba14d9 100644 --- a/lld/MachO/Arch/ARM64Common.cpp +++ b/lld/MachO/Arch/ARM64Common.cpp @@ -109,3 +109,21 @@ void ARM64Common::relaxGotLoad(uint8_t *loc, uint8_t type) const { instruction = ((instruction & 0x001fffff) | 0x91000000); write32le(loc, instruction); } + +void ARM64Common::handleDtraceReloc(const Symbol *sym, const Reloc &r, + uint8_t *loc) const { + assert(r.type == ARM64_RELOC_BRANCH26); + + if (config->outputType == MH_OBJECT) + return; + + if (sym->getName().startswith("___dtrace_probe")) { + // change call site to a NOP + write32le(loc, 0xD503201F); + } else if (sym->getName().startswith("___dtrace_isenabled")) { + // change call site to 'MOVZ X0,0' + write32le(loc, 0xD2800000); + } else { + error("Unrecognized dtrace symbol prefix: " + toString(*sym)); + } +} diff --git a/lld/MachO/Arch/ARM64Common.h b/lld/MachO/Arch/ARM64Common.h index 54f94ee76c06..1bd85066b35a 100644 --- a/lld/MachO/Arch/ARM64Common.h +++ b/lld/MachO/Arch/ARM64Common.h @@ -29,6 +29,9 @@ struct ARM64Common : TargetInfo { void relaxGotLoad(uint8_t *loc, uint8_t type) const override; uint64_t getPageSize() const override { return 16 * 1024; } + + void handleDtraceReloc(const Symbol *sym, const Reloc &r, + uint8_t *loc) const override; }; inline uint64_t bitField(uint64_t value, int right, int width, int left) { diff --git a/lld/MachO/Arch/X86_64.cpp b/lld/MachO/Arch/X86_64.cpp index d675356b9ffb..d2efa5bb3451 100644 --- a/lld/MachO/Arch/X86_64.cpp +++ b/lld/MachO/Arch/X86_64.cpp @@ -39,6 +39,9 @@ struct X86_64 : TargetInfo { void relaxGotLoad(uint8_t *loc, uint8_t type) const override; const RelocAttrs &getRelocAttrs(uint8_t type) const override; uint64_t getPageSize() const override { return 4 * 1024; } + + void handleDtraceReloc(const Symbol *sym, const Reloc &r, + uint8_t *loc) const override; }; } // namespace @@ -199,3 +202,23 @@ TargetInfo *macho::createX86_64TargetInfo() { static X86_64 t; return &t; } + +void X86_64::handleDtraceReloc(const Symbol *sym, const Reloc &r, + uint8_t *loc) const { + assert(r.type == X86_64_RELOC_BRANCH); + + if (config->outputType == MH_OBJECT) + return; + + if (sym->getName().startswith("___dtrace_probe")) { + // change call site to a NOP + loc[-1] = 0x90; + write32le(loc, 0x00401F0F); + } else if (sym->getName().startswith("___dtrace_isenabled")) { + // change call site to a clear eax + loc[-1] = 0x33; + write32le(loc, 0x909090C0); + } else { + error("Unrecognized dtrace symbol prefix: " + toString(*sym)); + } +} diff --git a/lld/MachO/Config.h b/lld/MachO/Config.h index b6c6abb44c65..ccf71b6535ea 100644 --- a/lld/MachO/Config.h +++ b/lld/MachO/Config.h @@ -131,9 +131,6 @@ struct Configuration { bool omitDebugInfo = false; bool warnDylibInstallName = false; bool ignoreOptimizationHints = false; - // Temporary config flag that will be removed once we have fully implemented - // support for __eh_frame. - bool parseEhFrames = false; uint32_t headerPad; uint32_t dylibCompatibilityVersion = 0; uint32_t dylibCurrentVersion = 0; diff --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp index 708facd180ba..abfe381f41e0 100644 --- a/lld/MachO/Driver.cpp +++ b/lld/MachO/Driver.cpp @@ -1305,7 +1305,6 @@ bool macho::link(ArrayRef<const char *> argsArr, llvm::raw_ostream &stdoutOS, config->callGraphProfileSort = args.hasFlag( OPT_call_graph_profile_sort, OPT_no_call_graph_profile_sort, true); config->printSymbolOrder = args.getLastArgValue(OPT_print_symbol_order); - config->parseEhFrames = static_cast<bool>(getenv("LLD_IN_TEST")); // FIXME: Add a commandline flag for this too. config->zeroModTime = getenv("ZERO_AR_DATE"); diff --git a/lld/MachO/InputFiles.cpp b/lld/MachO/InputFiles.cpp index c53874133a78..fda6900edabe 100644 --- a/lld/MachO/InputFiles.cpp +++ b/lld/MachO/InputFiles.cpp @@ -347,7 +347,7 @@ void ObjFile::parseSections(ArrayRef<SectionHeader> sectionHeaders) { section.subsections.push_back({0, isec}); } else if (auto recordSize = getRecordSize(segname, name)) { splitRecords(*recordSize); - } else if (config->parseEhFrames && name == section_names::ehFrame && + } else if (name == section_names::ehFrame && segname == segment_names::text) { splitEhFrames(data, *sections.back()); } else if (segname == segment_names::llvm) { @@ -1117,7 +1117,7 @@ template <class LP> void ObjFile::parse() { } if (compactUnwindSection) registerCompactUnwind(*compactUnwindSection); - if (config->parseEhFrames && ehFrameSection) + if (ehFrameSection) registerEhFrames(*ehFrameSection); } @@ -1687,7 +1687,6 @@ DylibFile::DylibFile(MemoryBufferRef mb, DylibFile *umbrella, umbrella = this; this->umbrella = umbrella; - auto *buf = reinterpret_cast<const uint8_t *>(mb.getBufferStart()); auto *hdr = reinterpret_cast<const mach_header *>(mb.getBufferStart()); // Initialize installName. @@ -1722,39 +1721,53 @@ DylibFile::DylibFile(MemoryBufferRef mb, DylibFile *umbrella, // Initialize symbols. exportingFile = isImplicitlyLinked(installName) ? this : this->umbrella; - if (const load_command *cmd = findCommand(hdr, LC_DYLD_INFO_ONLY)) { - auto *c = reinterpret_cast<const dyld_info_command *>(cmd); - struct TrieEntry { - StringRef name; - uint64_t flags; - }; - std::vector<TrieEntry> entries; - // Find all the $ld$* symbols to process first. - parseTrie(buf + c->export_off, c->export_size, - [&](const Twine &name, uint64_t flags) { - StringRef savedName = saver().save(name); - if (handleLDSymbol(savedName)) - return; - entries.push_back({savedName, flags}); - }); - - // Process the "normal" symbols. - for (TrieEntry &entry : entries) { - if (exportingFile->hiddenSymbols.contains( - CachedHashStringRef(entry.name))) - continue; + const auto *dyldInfo = findCommand<dyld_info_command>(hdr, LC_DYLD_INFO_ONLY); + const auto *exportsTrie = + findCommand<linkedit_data_command>(hdr, LC_DYLD_EXPORTS_TRIE); + if (dyldInfo && exportsTrie) { + // It's unclear what should happen in this case. Maybe we should only error + // out if the two load commands refer to different data? + error("dylib " + toString(this) + + " has both LC_DYLD_INFO_ONLY and LC_DYLD_EXPORTS_TRIE"); + return; + } else if (dyldInfo) { + parseExportedSymbols(dyldInfo->export_off, dyldInfo->export_size); + } else if (exportsTrie) { + parseExportedSymbols(exportsTrie->dataoff, exportsTrie->datasize); + } else { + error("No LC_DYLD_INFO_ONLY or LC_DYLD_EXPORTS_TRIE found in " + + toString(this)); + return; + } +} - bool isWeakDef = entry.flags & EXPORT_SYMBOL_FLAGS_WEAK_DEFINITION; - bool isTlv = entry.flags & EXPORT_SYMBOL_FLAGS_KIND_THREAD_LOCAL; +void DylibFile::parseExportedSymbols(uint32_t offset, uint32_t size) { + struct TrieEntry { + StringRef name; + uint64_t flags; + }; - symbols.push_back( - symtab->addDylib(entry.name, exportingFile, isWeakDef, isTlv)); - } + auto *buf = reinterpret_cast<const uint8_t *>(mb.getBufferStart()); + std::vector<TrieEntry> entries; + // Find all the $ld$* symbols to process first. + parseTrie(buf + offset, size, [&](const Twine &name, uint64_t flags) { + StringRef savedName = saver().save(name); + if (handleLDSymbol(savedName)) + return; + entries.push_back({savedName, flags}); + }); - } else { - error("LC_DYLD_INFO_ONLY not found in " + toString(this)); - return; + // Process the "normal" symbols. + for (TrieEntry &entry : entries) { + if (exportingFile->hiddenSymbols.contains(CachedHashStringRef(entry.name))) + continue; + + bool isWeakDef = entry.flags & EXPORT_SYMBOL_FLAGS_WEAK_DEFINITION; + bool isTlv = entry.flags & EXPORT_SYMBOL_FLAGS_KIND_THREAD_LOCAL; + + symbols.push_back( + symtab->addDylib(entry.name, exportingFile, isWeakDef, isTlv)); } } diff --git a/lld/MachO/InputFiles.h b/lld/MachO/InputFiles.h index 524418b91ee1..efddc1c46782 100644 --- a/lld/MachO/InputFiles.h +++ b/lld/MachO/InputFiles.h @@ -250,6 +250,7 @@ private: void handleLDInstallNameSymbol(StringRef name, StringRef originalName); void handleLDHideSymbol(StringRef name, StringRef originalName); void checkAppExtensionSafety(bool dylibIsAppExtensionSafe) const; + void parseExportedSymbols(uint32_t offset, uint32_t size); llvm::DenseSet<llvm::CachedHashStringRef> hiddenSymbols; }; diff --git a/lld/MachO/InputSection.cpp b/lld/MachO/InputSection.cpp index 25eb878736d9..df312525df61 100644 --- a/lld/MachO/InputSection.cpp +++ b/lld/MachO/InputSection.cpp @@ -201,6 +201,12 @@ void ConcatInputSection::writeTo(uint8_t *buf) { if (target->hasAttr(r.type, RelocAttrBits::LOAD) && !referentSym->isInGot()) target->relaxGotLoad(loc, r.type); + // For dtrace symbols, do not handle them as normal undefined symbols + if (referentSym->getName().startswith("___dtrace_")) { + // Change dtrace call site to pre-defined instructions + target->handleDtraceReloc(referentSym, r, loc); + continue; + } referentVA = resolveSymbolVA(referentSym, r.type) + r.addend; if (isThreadLocalVariables(getFlags())) { diff --git a/lld/MachO/SymbolTable.cpp b/lld/MachO/SymbolTable.cpp index d309f66c119f..7bda1d13069f 100644 --- a/lld/MachO/SymbolTable.cpp +++ b/lld/MachO/SymbolTable.cpp @@ -332,6 +332,10 @@ static bool recoverFromUndefinedSymbol(const Undefined &sym) { return true; } + // Leave dtrace symbols, since we will handle them when we do the relocation + if (name.startswith("___dtrace_")) + return true; + // Handle -U. if (config->explicitDynamicLookups.count(sym.getName())) { symtab->addDynamicLookup(sym.getName()); diff --git a/lld/MachO/Target.h b/lld/MachO/Target.h index 09ff3c5639ea..597502275dee 100644 --- a/lld/MachO/Target.h +++ b/lld/MachO/Target.h @@ -79,6 +79,15 @@ public: bool usesThunks() const { return thunkSize > 0; } + // For now, handleDtraceReloc only implements -no_dtrace_dof, and ensures + // that the linking would not fail even when there are user-provided dtrace + // symbols. However, unlike ld64, lld currently does not emit __dof sections. + virtual void handleDtraceReloc(const Symbol *sym, const Reloc &r, + uint8_t *loc) const { + llvm_unreachable("Unsupported architecture for dtrace symbols"); + } + + virtual void applyOptimizationHints(uint8_t *buf, const ConcatInputSection *, llvm::ArrayRef<uint64_t>) const {}; diff --git a/lld/MachO/UnwindInfoSection.cpp b/lld/MachO/UnwindInfoSection.cpp index 2c4a469578de..8c3425a17459 100644 --- a/lld/MachO/UnwindInfoSection.cpp +++ b/lld/MachO/UnwindInfoSection.cpp @@ -506,7 +506,7 @@ void UnwindInfoSectionImpl::finalize() { secondLevelPages.emplace_back(); SecondLevelPage &page = secondLevelPages.back(); page.entryIndex = i; - uintptr_t functionAddressMax = + uint64_t functionAddressMax = cuEntries[idx].functionAddress + COMPRESSED_ENTRY_FUNC_OFFSET_MASK; size_t n = commonEncodings.size(); size_t wordsRemaining = diff --git a/lld/docs/ReleaseNotes.rst b/lld/docs/ReleaseNotes.rst index 527ab4fead38..936d800cabc3 100644 --- a/lld/docs/ReleaseNotes.rst +++ b/lld/docs/ReleaseNotes.rst @@ -29,6 +29,8 @@ ELF Improvements * ``-z pack-relative-relocs`` is now available to support ``DT_RELR`` for glibc 2.36+. (`D120701 <https://reviews.llvm.org/D120701>`_) * ``--no-fortran-common`` (pre 12.0.0 behavior) is now the default. +* ``FORCE_LLD_DIAGNOSTICS_CRASH`` environment variable is now available to force LLD to crash. + (`D128195 <https://reviews.llvm.org/D128195>`_) Breaking changes ---------------- @@ -63,7 +65,11 @@ MinGW Improvements MachO Improvements ------------------ -* Item 1. +* We now support proper relocation and pruning of EH frames. **Note:** this + comes at some performance overhead on x86_64 builds, and we recommend adding + the ``-femit-compact-unwind=no-compact-unwind`` compile flag to avoid it. + (`D129540 <https://reviews.llvm.org/D129540>`_, + `D122258 <https://reviews.llvm.org/D122258>`_) WebAssembly Improvements ------------------------ diff --git a/lld/tools/lld/lld.cpp b/lld/tools/lld/lld.cpp index d6c39fa7cce4..3ac59877affb 100644 --- a/lld/tools/lld/lld.cpp +++ b/lld/tools/lld/lld.cpp @@ -214,6 +214,12 @@ int main(int argc, const char **argv) { InitLLVM x(argc, argv); sys::Process::UseANSIEscapeCodes(true); + if (::getenv("FORCE_LLD_DIAGNOSTICS_CRASH")) { + llvm::errs() + << "crashing due to environment variable FORCE_LLD_DIAGNOSTICS_CRASH\n"; + LLVM_BUILTIN_TRAP; + } + // Not running in lit tests, just take the shortest codepath with global // exception handling and no memory cleanup on exit. if (!inTestVerbosity()) |