diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2020-01-17 20:45:01 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2020-01-17 20:45:01 +0000 |
commit | 706b4fc47bbc608932d3b491ae19a3b9cde9497b (patch) | |
tree | 4adf86a776049cbf7f69a1929c4babcbbef925eb /lld/ELF | |
parent | 7cc9cf2bf09f069cb2dd947ead05d0b54301fb71 (diff) | |
download | src-706b4fc47bbc608932d3b491ae19a3b9cde9497b.tar.gz src-706b4fc47bbc608932d3b491ae19a3b9cde9497b.zip |
Vendor import of llvm-project master e26a78e70, the last commit beforevendor/llvm-project/llvmorg-10-init-17466-ge26a78e7085
the llvmorg-11-init tag, from which release/10.x was branched.
Notes
Notes:
svn path=/vendor/llvm-project/master/; revision=356843
svn path=/vendor/llvm-project/llvmorg-10-init-17466-ge26a78e7085/; revision=356844; tag=vendor/llvm-project/llvmorg-10-init-17466-ge26a78e7085
Diffstat (limited to 'lld/ELF')
41 files changed, 1198 insertions, 571 deletions
diff --git a/lld/ELF/AArch64ErrataFix.cpp b/lld/ELF/AArch64ErrataFix.cpp index 7fb3e02e7ee4..398320af71e3 100644 --- a/lld/ELF/AArch64ErrataFix.cpp +++ b/lld/ELF/AArch64ErrataFix.cpp @@ -61,7 +61,7 @@ static bool isADRP(uint32_t instr) { // Instructions appear in order of appearance starting from table in // C4.1.3 Loads and Stores. -// All loads and stores have 1 (at bit postion 27), (0 at bit position 25). +// All loads and stores have 1 (at bit position 27), (0 at bit position 25). // | op0 x op1 (2) | 1 op2 0 op3 (2) | x | op4 (5) | xxxx | op5 (2) | x (10) | static bool isLoadStoreClass(uint32_t instr) { return (instr & 0x0a000000) == 0x08000000; @@ -381,6 +381,10 @@ public: uint64_t getLDSTAddr() const; + static bool classof(const SectionBase *d) { + return d->kind() == InputSectionBase::Synthetic && d->name == ".text.patch"; + } + // The Section we are patching. const InputSection *patchee; // The offset of the instruction in the patchee section we are patching. @@ -619,7 +623,7 @@ AArch64Err843419Patcher::patchInputSectionDescription( // // PostConditions: // Returns true if at least one patch was added. The addresses of the -// Ouptut and Input Sections may have been changed. +// Output and Input Sections may have been changed. // Returns false if no patches were required and no changes were made. bool AArch64Err843419Patcher::createFixes() { if (!initialized) diff --git a/lld/ELF/ARMErrataFix.cpp b/lld/ELF/ARMErrataFix.cpp index 493fafc6a0b2..91cd2b5a2f5f 100644 --- a/lld/ELF/ARMErrataFix.cpp +++ b/lld/ELF/ARMErrataFix.cpp @@ -82,6 +82,10 @@ public: // Get the virtual address of the branch instruction at patcheeOffset. uint64_t getBranchAddr() const; + static bool classof(const SectionBase *d) { + return d->kind() == InputSectionBase::Synthetic && d->name ==".text.patch"; + } + // The Section we are patching. const InputSection *patchee; // The offset of the instruction in the Patchee section we are patching. @@ -97,7 +101,7 @@ public: // Return true if the half-word, when taken as the first of a pair of halfwords // is the first half of a 32-bit instruction. -// Reference from ARM Architecure Reference Manual ARMv7-A and ARMv7-R edition +// Reference from ARM Architecture Reference Manual ARMv7-A and ARMv7-R edition // section A6.3: 32-bit Thumb instruction encoding // | HW1 | HW2 | // | 1 1 1 | op1 (2) | op2 (7) | x (4) |op| x (15) | @@ -108,7 +112,7 @@ static bool is32bitInstruction(uint16_t hw) { return (hw & 0xe000) == 0xe000 && (hw & 0x1800) != 0x0000; } -// Reference from ARM Architecure Reference Manual ARMv7-A and ARMv7-R edition +// Reference from ARM Architecture Reference Manual ARMv7-A and ARMv7-R edition // section A6.3.4 Branches and miscellaneous control. // | HW1 | HW2 | // | 1 1 1 | 1 0 | op (7) | x (4) | 1 | op1 (3) | op2 (4) | imm8 (8) | diff --git a/lld/ELF/Arch/AArch64.cpp b/lld/ELF/Arch/AArch64.cpp index 5cf07029fa1d..df41a12f7454 100644 --- a/lld/ELF/Arch/AArch64.cpp +++ b/lld/ELF/Arch/AArch64.cpp @@ -37,10 +37,11 @@ public: RelType getDynRel(RelType type) const override; void writeGotPlt(uint8_t *buf, const Symbol &s) const override; void writePltHeader(uint8_t *buf) const override; - void writePlt(uint8_t *buf, uint64_t gotPltEntryAddr, uint64_t pltEntryAddr, - int32_t index, unsigned relOff) const override; + void writePlt(uint8_t *buf, const Symbol &sym, + uint64_t pltEntryAddr) const override; bool needsThunk(RelExpr expr, RelType type, const InputFile *file, - uint64_t branchAddr, const Symbol &s) const override; + uint64_t branchAddr, const Symbol &s, + int64_t a) const override; uint32_t getThunkSectionSpacing() const override; bool inBranchRange(RelType type, uint64_t src, uint64_t dst) const override; bool usesOnlyLowPageBits(RelType type) const override; @@ -63,8 +64,9 @@ AArch64::AArch64() { symbolicRel = R_AARCH64_ABS64; tlsDescRel = R_AARCH64_TLSDESC; tlsGotRel = R_AARCH64_TLS_TPREL64; - pltEntrySize = 16; pltHeaderSize = 32; + pltEntrySize = 16; + ipltEntrySize = 16; defaultMaxPageSize = 65536; // Align to the 2 MiB page size (known as a superpage or huge page). @@ -212,9 +214,8 @@ void AArch64::writePltHeader(uint8_t *buf) const { relocateOne(buf + 12, R_AARCH64_ADD_ABS_LO12_NC, got + 16); } -void AArch64::writePlt(uint8_t *buf, uint64_t gotPltEntryAddr, - uint64_t pltEntryAddr, int32_t index, - unsigned relOff) const { +void AArch64::writePlt(uint8_t *buf, const Symbol &sym, + uint64_t pltEntryAddr) const { const uint8_t inst[] = { 0x10, 0x00, 0x00, 0x90, // adrp x16, Page(&(.plt.got[n])) 0x11, 0x02, 0x40, 0xf9, // ldr x17, [x16, Offset(&(.plt.got[n]))] @@ -223,6 +224,7 @@ void AArch64::writePlt(uint8_t *buf, uint64_t gotPltEntryAddr, }; memcpy(buf, inst, sizeof(inst)); + uint64_t gotPltEntryAddr = sym.getGotPltVA(); relocateOne(buf, R_AARCH64_ADR_PREL_PG_HI21, getAArch64Page(gotPltEntryAddr) - getAArch64Page(pltEntryAddr)); relocateOne(buf + 4, R_AARCH64_LDST64_ABS_LO12_NC, gotPltEntryAddr); @@ -230,13 +232,18 @@ void AArch64::writePlt(uint8_t *buf, uint64_t gotPltEntryAddr, } bool AArch64::needsThunk(RelExpr expr, RelType type, const InputFile *file, - uint64_t branchAddr, const Symbol &s) const { + uint64_t branchAddr, const Symbol &s, + int64_t a) const { + // If s is an undefined weak symbol and does not have a PLT entry then it + // will be resolved as a branch to the next instruction. + if (s.isUndefWeak() && !s.isInPlt()) + return false; // ELF for the ARM 64-bit architecture, section Call and Jump relocations // only permits range extension thunks for R_AARCH64_CALL26 and // R_AARCH64_JUMP26 relocation types. if (type != R_AARCH64_CALL26 && type != R_AARCH64_JUMP26) return false; - uint64_t dst = (expr == R_PLT_PC) ? s.getPltVA() : s.getVA(); + uint64_t dst = expr == R_PLT_PC ? s.getPltVA() : s.getVA(a); return !inBranchRange(type, branchAddr, dst); } @@ -567,8 +574,8 @@ class AArch64BtiPac final : public AArch64 { public: AArch64BtiPac(); void writePltHeader(uint8_t *buf) const override; - void writePlt(uint8_t *buf, uint64_t gotPltEntryAddr, uint64_t pltEntryAddr, - int32_t index, unsigned relOff) const override; + void writePlt(uint8_t *buf, const Symbol &sym, + uint64_t pltEntryAddr) const override; private: bool btiHeader; // bti instruction needed in PLT Header @@ -589,8 +596,10 @@ AArch64BtiPac::AArch64BtiPac() { btiEntry = btiHeader && !config->shared; pacEntry = (config->andFeatures & GNU_PROPERTY_AARCH64_FEATURE_1_PAC); - if (btiEntry || pacEntry) + if (btiEntry || pacEntry) { pltEntrySize = 24; + ipltEntrySize = 24; + } } void AArch64BtiPac::writePltHeader(uint8_t *buf) const { @@ -627,9 +636,8 @@ void AArch64BtiPac::writePltHeader(uint8_t *buf) const { memcpy(buf + sizeof(pltData), nopData, sizeof(nopData)); } -void AArch64BtiPac::writePlt(uint8_t *buf, uint64_t gotPltEntryAddr, - uint64_t pltEntryAddr, int32_t index, - unsigned relOff) const { +void AArch64BtiPac::writePlt(uint8_t *buf, const Symbol &sym, + uint64_t pltEntryAddr) const { // The PLT entry is of the form: // [btiData] addrInst (pacBr | stdBr) [nopData] const uint8_t btiData[] = { 0x5f, 0x24, 0x03, 0xd5 }; // bti c @@ -654,6 +662,7 @@ void AArch64BtiPac::writePlt(uint8_t *buf, uint64_t gotPltEntryAddr, pltEntryAddr += sizeof(btiData); } + uint64_t gotPltEntryAddr = sym.getGotPltVA(); memcpy(buf, addrInst, sizeof(addrInst)); relocateOne(buf, R_AARCH64_ADR_PREL_PG_HI21, getAArch64Page(gotPltEntryAddr) - diff --git a/lld/ELF/Arch/ARM.cpp b/lld/ELF/Arch/ARM.cpp index 41baea496d36..de1023346aa5 100644 --- a/lld/ELF/Arch/ARM.cpp +++ b/lld/ELF/Arch/ARM.cpp @@ -34,12 +34,13 @@ public: void writeGotPlt(uint8_t *buf, const Symbol &s) const override; void writeIgotPlt(uint8_t *buf, const Symbol &s) const override; void writePltHeader(uint8_t *buf) const override; - void writePlt(uint8_t *buf, uint64_t gotPltEntryAddr, uint64_t pltEntryAddr, - int32_t index, unsigned relOff) const override; + void writePlt(uint8_t *buf, const Symbol &sym, + uint64_t pltEntryAddr) const override; void addPltSymbols(InputSection &isec, uint64_t off) const override; void addPltHeaderSymbols(InputSection &isd) const override; bool needsThunk(RelExpr expr, RelType type, const InputFile *file, - uint64_t branchAddr, const Symbol &s) const override; + uint64_t branchAddr, const Symbol &s, + int64_t a) const override; uint32_t getThunkSectionSpacing() const override; bool inBranchRange(RelType type, uint64_t src, uint64_t dst) const override; void relocateOne(uint8_t *loc, RelType type, uint64_t val) const override; @@ -58,8 +59,9 @@ ARM::ARM() { tlsModuleIndexRel = R_ARM_TLS_DTPMOD32; tlsOffsetRel = R_ARM_TLS_DTPOFF32; gotBaseSymInGotPlt = false; - pltEntrySize = 16; pltHeaderSize = 32; + pltEntrySize = 16; + ipltEntrySize = 16; trapInstr = {0xd4, 0xd4, 0xd4, 0xd4}; needsThunks = true; } @@ -138,7 +140,7 @@ RelExpr ARM::getRelExpr(RelType type, const Symbol &s, // given address. It can be used to implement a special linker mode which // rewrites ARMv4T inputs to ARMv4. Since we support only ARMv4 input and // not ARMv4 output, we can just ignore it. - return R_HINT; + return R_NONE; default: return R_ABS; } @@ -215,8 +217,7 @@ void ARM::addPltHeaderSymbols(InputSection &isec) const { // Long form PLT entries that do not have any restrictions on the displacement // of the .plt from the .plt.got. static void writePltLong(uint8_t *buf, uint64_t gotPltEntryAddr, - uint64_t pltEntryAddr, int32_t index, - unsigned relOff) { + uint64_t pltEntryAddr) { const uint8_t pltData[] = { 0x04, 0xc0, 0x9f, 0xe5, // ldr ip, L2 0x0f, 0xc0, 0x8c, 0xe0, // L1: add ip, ip, pc @@ -230,9 +231,8 @@ static void writePltLong(uint8_t *buf, uint64_t gotPltEntryAddr, // The default PLT entries require the .plt.got to be within 128 Mb of the // .plt in the positive direction. -void ARM::writePlt(uint8_t *buf, uint64_t gotPltEntryAddr, - uint64_t pltEntryAddr, int32_t index, - unsigned relOff) const { +void ARM::writePlt(uint8_t *buf, const Symbol &sym, + uint64_t pltEntryAddr) const { // The PLT entry is similar to the example given in Appendix A of ELF for // the Arm Architecture. Instead of using the Group Relocations to find the // optimal rotation for the 8-bit immediate used in the add instructions we @@ -244,10 +244,10 @@ void ARM::writePlt(uint8_t *buf, uint64_t gotPltEntryAddr, 0xe5bcf000, // ldr pc, [ip, #0x00000NNN] Offset(&(.plt.got) - L1 - 8 }; - uint64_t offset = gotPltEntryAddr - pltEntryAddr - 8; + uint64_t offset = sym.getGotPltVA() - pltEntryAddr - 8; if (!llvm::isUInt<27>(offset)) { // We cannot encode the Offset, use the long form. - writePltLong(buf, gotPltEntryAddr, pltEntryAddr, index, relOff); + writePltLong(buf, sym.getGotPltVA(), pltEntryAddr); return; } write32le(buf + 0, pltData[0] | ((offset >> 20) & 0xff)); @@ -262,7 +262,7 @@ void ARM::addPltSymbols(InputSection &isec, uint64_t off) const { } bool ARM::needsThunk(RelExpr expr, RelType type, const InputFile *file, - uint64_t branchAddr, const Symbol &s) const { + uint64_t branchAddr, const Symbol &s, int64_t /*a*/) const { // If S is an undefined weak symbol and does not have a PLT entry then it // will be resolved as a branch to the next instruction. if (s.isUndefWeak() && !s.isInPlt()) diff --git a/lld/ELF/Arch/Hexagon.cpp b/lld/ELF/Arch/Hexagon.cpp index 67264a2272dd..106bc9bab5bd 100644 --- a/lld/ELF/Arch/Hexagon.cpp +++ b/lld/ELF/Arch/Hexagon.cpp @@ -33,8 +33,8 @@ public: RelType getDynRel(RelType type) const override; void relocateOne(uint8_t *loc, RelType type, uint64_t val) const override; void writePltHeader(uint8_t *buf) const override; - void writePlt(uint8_t *buf, uint64_t gotPltEntryAddr, uint64_t pltEntryAddr, - int32_t index, unsigned relOff) const override; + void writePlt(uint8_t *buf, const Symbol &sym, + uint64_t pltEntryAddr) const override; }; } // namespace @@ -54,6 +54,7 @@ Hexagon::Hexagon() { // Hexagon Linux uses 64K pages by default. defaultMaxPageSize = 0x10000; noneRel = R_HEX_NONE; + tlsGotRel = R_HEX_TPREL_32; } uint32_t Hexagon::calcEFlags() const { @@ -115,6 +116,11 @@ RelExpr Hexagon::getRelExpr(RelType type, const Symbol &s, case R_HEX_B22_PCREL_X: case R_HEX_B32_PCREL_X: return R_PLT_PC; + case R_HEX_IE_32_6_X: + case R_HEX_IE_16_X: + case R_HEX_IE_HI16: + case R_HEX_IE_LO16: + return R_GOT; case R_HEX_GOTREL_11_X: case R_HEX_GOTREL_16_X: case R_HEX_GOTREL_32_6_X: @@ -125,6 +131,20 @@ RelExpr Hexagon::getRelExpr(RelType type, const Symbol &s, case R_HEX_GOT_16_X: case R_HEX_GOT_32_6_X: return R_GOTPLT; + case R_HEX_IE_GOT_11_X: + case R_HEX_IE_GOT_16_X: + case R_HEX_IE_GOT_32_6_X: + case R_HEX_IE_GOT_HI16: + case R_HEX_IE_GOT_LO16: + config->hasStaticTlsModel = true; + return R_GOTPLT; + case R_HEX_TPREL_11_X: + case R_HEX_TPREL_16: + case R_HEX_TPREL_16_X: + case R_HEX_TPREL_32_6_X: + case R_HEX_TPREL_HI16: + case R_HEX_TPREL_LO16: + return R_TLS; default: error(getErrorLocation(loc) + "unknown relocation (" + Twine(type) + ") against symbol " + toString(s)); @@ -220,18 +240,26 @@ void Hexagon::relocateOne(uint8_t *loc, RelType type, uint64_t val) const { or32le(loc, applyMask(0x00203fe0, val & 0x3f)); break; case R_HEX_11_X: + case R_HEX_IE_GOT_11_X: case R_HEX_GOT_11_X: case R_HEX_GOTREL_11_X: + case R_HEX_TPREL_11_X: or32le(loc, applyMask(findMaskR11(read32le(loc)), val & 0x3f)); break; case R_HEX_12_X: or32le(loc, applyMask(0x000007e0, val)); break; case R_HEX_16_X: // These relocs only have 6 effective bits. + case R_HEX_IE_16_X: + case R_HEX_IE_GOT_16_X: case R_HEX_GOT_16_X: case R_HEX_GOTREL_16_X: + case R_HEX_TPREL_16_X: or32le(loc, applyMask(findMaskR16(read32le(loc)), val & 0x3f)); break; + case R_HEX_TPREL_16: + or32le(loc, applyMask(findMaskR16(read32le(loc)), val & 0xffff)); + break; case R_HEX_32: case R_HEX_32_PCREL: or32le(loc, val); @@ -239,6 +267,9 @@ void Hexagon::relocateOne(uint8_t *loc, RelType type, uint64_t val) const { case R_HEX_32_6_X: case R_HEX_GOT_32_6_X: case R_HEX_GOTREL_32_6_X: + case R_HEX_IE_GOT_32_6_X: + case R_HEX_IE_32_6_X: + case R_HEX_TPREL_32_6_X: or32le(loc, applyMask(0x0fff3fff, val >> 6)); break; case R_HEX_B9_PCREL: @@ -272,10 +303,16 @@ void Hexagon::relocateOne(uint8_t *loc, RelType type, uint64_t val) const { break; case R_HEX_GOTREL_HI16: case R_HEX_HI16: + case R_HEX_IE_GOT_HI16: + case R_HEX_IE_HI16: + case R_HEX_TPREL_HI16: or32le(loc, applyMask(0x00c03fff, val >> 16)); break; case R_HEX_GOTREL_LO16: case R_HEX_LO16: + case R_HEX_IE_GOT_LO16: + case R_HEX_IE_LO16: + case R_HEX_TPREL_LO16: or32le(loc, applyMask(0x00c03fff, val)); break; default: @@ -302,9 +339,8 @@ void Hexagon::writePltHeader(uint8_t *buf) const { relocateOne(buf + 4, R_HEX_6_PCREL_X, off); } -void Hexagon::writePlt(uint8_t *buf, uint64_t gotPltEntryAddr, - uint64_t pltEntryAddr, int32_t index, - unsigned relOff) const { +void Hexagon::writePlt(uint8_t *buf, const Symbol &sym, + uint64_t pltEntryAddr) const { const uint8_t inst[] = { 0x00, 0x40, 0x00, 0x00, // { immext (#0) 0x0e, 0xc0, 0x49, 0x6a, // r14 = add (pc, ##GOTn@PCREL) } @@ -313,6 +349,7 @@ void Hexagon::writePlt(uint8_t *buf, uint64_t gotPltEntryAddr, }; memcpy(buf, inst, sizeof(inst)); + uint64_t gotPltEntryAddr = sym.getGotPltVA(); relocateOne(buf, R_HEX_B32_PCREL_X, gotPltEntryAddr - pltEntryAddr); relocateOne(buf + 4, R_HEX_6_PCREL_X, gotPltEntryAddr - pltEntryAddr); } diff --git a/lld/ELF/Arch/Mips.cpp b/lld/ELF/Arch/Mips.cpp index d8fa306a6205..ed6f4ca24130 100644 --- a/lld/ELF/Arch/Mips.cpp +++ b/lld/ELF/Arch/Mips.cpp @@ -32,10 +32,11 @@ public: RelType getDynRel(RelType type) const override; void writeGotPlt(uint8_t *buf, const Symbol &s) const override; void writePltHeader(uint8_t *buf) const override; - void writePlt(uint8_t *buf, uint64_t gotPltEntryAddr, uint64_t pltEntryAddr, - int32_t index, unsigned relOff) const override; + void writePlt(uint8_t *buf, const Symbol &sym, + uint64_t pltEntryAddr) const override; bool needsThunk(RelExpr expr, RelType type, const InputFile *file, - uint64_t branchAddr, const Symbol &s) const override; + uint64_t branchAddr, const Symbol &s, + int64_t a) const override; void relocateOne(uint8_t *loc, RelType type, uint64_t val) const override; bool usesOnlyLowPageBits(RelType type) const override; }; @@ -83,6 +84,17 @@ RelExpr MIPS<ELFT>::getRelExpr(RelType type, const Symbol &s, switch (type) { case R_MIPS_JALR: + // Older versions of clang would erroneously emit this relocation not only + // against functions (loaded from the GOT) but also against data symbols + // (e.g. a table of function pointers). When we encounter this, ignore the + // relocation and emit a warning instead. + if (!s.isFunc() && s.type != STT_NOTYPE) { + warn(getErrorLocation(loc) + + "found R_MIPS_JALR relocation against non-function symbol " + + toString(s) + ". This is invalid and most likely a compiler bug."); + return R_NONE; + } + // If the target symbol is not preemptible and is not microMIPS, // it might be possible to replace jalr/jr instruction by bal/b. // It depends on the target symbol's offset. @@ -306,9 +318,9 @@ template <class ELFT> void MIPS<ELFT>::writePltHeader(uint8_t *buf) const { } template <class ELFT> -void MIPS<ELFT>::writePlt(uint8_t *buf, uint64_t gotPltEntryAddr, - uint64_t pltEntryAddr, int32_t index, - unsigned relOff) const { +void MIPS<ELFT>::writePlt(uint8_t *buf, const Symbol &sym, + uint64_t pltEntryAddr) const { + uint64_t gotPltEntryAddr = sym.getGotPltVA(); if (isMicroMips()) { // Overwrite trap instructions written by Writer::writeTrapInstr. memset(buf, 0, pltEntrySize); @@ -345,7 +357,8 @@ void MIPS<ELFT>::writePlt(uint8_t *buf, uint64_t gotPltEntryAddr, template <class ELFT> bool MIPS<ELFT>::needsThunk(RelExpr expr, RelType type, const InputFile *file, - uint64_t branchAddr, const Symbol &s) const { + uint64_t branchAddr, const Symbol &s, + int64_t /*a*/) const { // Any MIPS PIC code function is invoked with its address in register $t9. // So if we have a branch instruction from non-PIC code to the PIC one // we cannot make the jump directly and need to create a small stubs diff --git a/lld/ELF/Arch/PPC.cpp b/lld/ELF/Arch/PPC.cpp index c4eecb9a29c2..3c0b0c290b58 100644 --- a/lld/ELF/Arch/PPC.cpp +++ b/lld/ELF/Arch/PPC.cpp @@ -10,6 +10,7 @@ #include "Symbols.h" #include "SyntheticSections.h" #include "Target.h" +#include "Thunks.h" #include "lld/Common/ErrorHandler.h" #include "llvm/Support/Endian.h" @@ -31,13 +32,16 @@ public: void writePltHeader(uint8_t *buf) const override { llvm_unreachable("should call writePPC32GlinkSection() instead"); } - void writePlt(uint8_t *buf, uint64_t gotPltEntryAddr, uint64_t pltEntryAddr, - int32_t index, unsigned relOff) const override { + void writePlt(uint8_t *buf, const Symbol &sym, + uint64_t pltEntryAddr) const override { llvm_unreachable("should call writePPC32GlinkSection() instead"); } + void writeIplt(uint8_t *buf, const Symbol &sym, + uint64_t pltEntryAddr) const override; void writeGotPlt(uint8_t *buf, const Symbol &s) const override; bool needsThunk(RelExpr expr, RelType relocType, const InputFile *file, - uint64_t branchAddr, const Symbol &s) const override; + uint64_t branchAddr, const Symbol &s, + int64_t a) const override; uint32_t getThunkSectionSpacing() const override; bool inBranchRange(RelType type, uint64_t src, uint64_t dst) const override; void relocateOne(uint8_t *loc, RelType type, uint64_t val) const override; @@ -143,6 +147,7 @@ PPC::PPC() { gotPltHeaderEntriesNum = 0; pltHeaderSize = 64; // size of PLTresolve in .glink pltEntrySize = 4; + ipltEntrySize = 16; needsThunks = true; @@ -156,6 +161,13 @@ PPC::PPC() { write32(trapInstr.data(), 0x7fe00008); } +void PPC::writeIplt(uint8_t *buf, const Symbol &sym, + uint64_t /*pltEntryAddr*/) const { + // In -pie or -shared mode, assume r30 points to .got2+0x8000, and use a + // .got2.plt_pic32. thunk. + writePPC32PltCallStub(buf, sym.getGotPltVA(), sym.file, 0x8000); +} + void PPC::writeGotHeader(uint8_t *buf) const { // _GLOBAL_OFFSET_TABLE_[0] = _DYNAMIC // glibc stores _dl_runtime_resolve in _GLOBAL_OFFSET_TABLE_[1], @@ -169,7 +181,7 @@ void PPC::writeGotPlt(uint8_t *buf, const Symbol &s) const { } bool PPC::needsThunk(RelExpr expr, RelType type, const InputFile *file, - uint64_t branchAddr, const Symbol &s) const { + uint64_t branchAddr, const Symbol &s, int64_t /*a*/) const { if (type != R_PPC_REL24 && type != R_PPC_PLTREL24) return false; if (s.isInPlt()) diff --git a/lld/ELF/Arch/PPC64.cpp b/lld/ELF/Arch/PPC64.cpp index 905903fa4d66..da77a4ddaddf 100644 --- a/lld/ELF/Arch/PPC64.cpp +++ b/lld/ELF/Arch/PPC64.cpp @@ -9,6 +9,7 @@ #include "Symbols.h" #include "SyntheticSections.h" #include "Target.h" +#include "Thunks.h" #include "lld/Common/ErrorHandler.h" #include "llvm/Support/Endian.h" @@ -200,12 +201,15 @@ public: const uint8_t *loc) const override; RelType getDynRel(RelType type) const override; void writePltHeader(uint8_t *buf) const override; - void writePlt(uint8_t *buf, uint64_t gotPltEntryAddr, uint64_t pltEntryAddr, - int32_t index, unsigned relOff) const override; + void writePlt(uint8_t *buf, const Symbol &sym, + uint64_t pltEntryAddr) const override; + void writeIplt(uint8_t *buf, const Symbol &sym, + uint64_t pltEntryAddr) const override; void relocateOne(uint8_t *loc, RelType type, uint64_t val) const override; void writeGotHeader(uint8_t *buf) const override; bool needsThunk(RelExpr expr, RelType type, const InputFile *file, - uint64_t branchAddr, const Symbol &s) const override; + uint64_t branchAddr, const Symbol &s, + int64_t a) const override; uint32_t getThunkSectionSpacing() const override; bool inBranchRange(RelType type, uint64_t src, uint64_t dst) const override; RelExpr adjustRelaxExpr(RelType type, const uint8_t *data, @@ -295,11 +299,12 @@ PPC64::PPC64() { relativeRel = R_PPC64_RELATIVE; iRelativeRel = R_PPC64_IRELATIVE; symbolicRel = R_PPC64_ADDR64; + pltHeaderSize = 60; pltEntrySize = 4; + ipltEntrySize = 16; // PPC64PltCallStub::size gotBaseSymInGotPlt = false; gotHeaderEntriesNum = 1; gotPltHeaderEntriesNum = 2; - pltHeaderSize = 60; needsThunks = true; tlsModuleIndexRel = R_PPC64_DTPMOD64; @@ -667,14 +672,18 @@ void PPC64::writePltHeader(uint8_t *buf) const { write64(buf + 52, gotPltOffset); } -void PPC64::writePlt(uint8_t *buf, uint64_t gotPltEntryAddr, - uint64_t pltEntryAddr, int32_t index, - unsigned relOff) const { - int32_t offset = pltHeaderSize + index * pltEntrySize; +void PPC64::writePlt(uint8_t *buf, const Symbol &sym, + uint64_t /*pltEntryAddr*/) const { + int32_t offset = pltHeaderSize + sym.pltIndex * pltEntrySize; // bl __glink_PLTresolve write32(buf, 0x48000000 | ((-offset) & 0x03FFFFFc)); } +void PPC64::writeIplt(uint8_t *buf, const Symbol &sym, + uint64_t /*pltEntryAddr*/) const { + writePPC64LoadAndBranch(buf, sym.getGotPltVA() - getPPC64TocBase()); +} + static std::pair<RelType, uint64_t> toAddr16Rel(RelType type, uint64_t val) { // Relocations relative to the toc-base need to be adjusted by the Toc offset. uint64_t tocBiasedVal = val - ppc64TocOffset; @@ -827,7 +836,7 @@ void PPC64::relocateOne(uint8_t *loc, RelType type, uint64_t val) const { case R_PPC64_ADDR16_LO: case R_PPC64_REL16_LO: case R_PPC64_TPREL16_LO: - // When the high-adjusted part of a toc relocation evalutes to 0, it is + // When the high-adjusted part of a toc relocation evaluates to 0, it is // changed into a nop. The lo part then needs to be updated to use the // toc-pointer register r2, as the base register. if (config->tocOptimize && shouldTocOptimize && ha(val) == 0) { @@ -849,7 +858,7 @@ void PPC64::relocateOne(uint8_t *loc, RelType type, uint64_t val) const { uint16_t mask = isDQFormInstruction(insn) ? 0xf : 0x3; checkAlignment(loc, lo(val), mask + 1, originalType); if (config->tocOptimize && shouldTocOptimize && ha(val) == 0) { - // When the high-adjusted part of a toc relocation evalutes to 0, it is + // When the high-adjusted part of a toc relocation evaluates to 0, it is // changed into a nop. The lo part then needs to be updated to use the toc // pointer register r2, as the base register. if (isInstructionUpdateForm(insn)) @@ -898,7 +907,7 @@ void PPC64::relocateOne(uint8_t *loc, RelType type, uint64_t val) const { } bool PPC64::needsThunk(RelExpr expr, RelType type, const InputFile *file, - uint64_t branchAddr, const Symbol &s) const { + uint64_t branchAddr, const Symbol &s, int64_t a) const { if (type != R_PPC64_REL14 && type != R_PPC64_REL24) return false; @@ -915,7 +924,7 @@ bool PPC64::needsThunk(RelExpr expr, RelType type, const InputFile *file, // a range-extending thunk. // See the comment in getRelocTargetVA() about R_PPC64_CALL. return !inBranchRange(type, branchAddr, - s.getVA() + + s.getVA(a) + getPPC64GlobalEntryToLocalEntryOffset(s.stOther)); } @@ -990,7 +999,7 @@ void PPC64::relaxTlsGdToIe(uint8_t *loc, RelType type, uint64_t val) const { // The prologue for a split-stack function is expected to look roughly // like this: // .Lglobal_entry_point: -// # TOC pointer initalization. +// # TOC pointer initialization. // ... // .Llocal_entry_point: // # load the __private_ss member of the threads tcbhead. diff --git a/lld/ELF/Arch/RISCV.cpp b/lld/ELF/Arch/RISCV.cpp index e7c0e36e0327..42db8e08162d 100644 --- a/lld/ELF/Arch/RISCV.cpp +++ b/lld/ELF/Arch/RISCV.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "InputFiles.h" +#include "Symbols.h" #include "SyntheticSections.h" #include "Target.h" @@ -27,8 +28,8 @@ public: void writeGotHeader(uint8_t *buf) const override; void writeGotPlt(uint8_t *buf, const Symbol &s) const override; void writePltHeader(uint8_t *buf) const override; - void writePlt(uint8_t *buf, uint64_t gotPltEntryAddr, uint64_t pltEntryAddr, - int32_t index, unsigned relOff) const override; + void writePlt(uint8_t *buf, const Symbol &sym, + uint64_t pltEntryAddr) const override; RelType getDynRel(RelType type) const override; RelExpr getRelExpr(RelType type, const Symbol &s, const uint8_t *loc) const override; @@ -95,8 +96,9 @@ RISCV::RISCV() { // .got.plt[0] = _dl_runtime_resolve, .got.plt[1] = link_map gotPltHeaderEntriesNum = 2; - pltEntrySize = 16; pltHeaderSize = 32; + pltEntrySize = 16; + ipltEntrySize = 16; } static uint32_t getEFlags(InputFile *f) { @@ -106,7 +108,10 @@ static uint32_t getEFlags(InputFile *f) { } uint32_t RISCV::calcEFlags() const { - assert(!objectFiles.empty()); + // If there are only binary input files (from -b binary), use a + // value of 0 for the ELF header flags. + if (objectFiles.empty()) + return 0; uint32_t target = getEFlags(objectFiles.front()); @@ -162,14 +167,13 @@ void RISCV::writePltHeader(uint8_t *buf) const { write32le(buf + 28, itype(JALR, 0, X_T3, 0)); } -void RISCV::writePlt(uint8_t *buf, uint64_t gotPltEntryAddr, - uint64_t pltEntryAddr, int32_t index, - unsigned relOff) const { +void RISCV::writePlt(uint8_t *buf, const Symbol &sym, + uint64_t pltEntryAddr) const { // 1: auipc t3, %pcrel_hi(f@.got.plt) // l[wd] t3, %pcrel_lo(1b)(t3) // jalr t1, t3 // nop - uint32_t offset = gotPltEntryAddr - pltEntryAddr; + uint32_t offset = sym.getGotPltVA() - pltEntryAddr; write32le(buf + 0, utype(AUIPC, X_T3, hi20(offset))); write32le(buf + 4, itype(config->is64 ? LD : LW, X_T3, X_T3, lo12(offset))); write32le(buf + 8, itype(JALR, X_T1, X_T3, 0)); @@ -184,6 +188,15 @@ RelType RISCV::getDynRel(RelType type) const { RelExpr RISCV::getRelExpr(const RelType type, const Symbol &s, const uint8_t *loc) const { switch (type) { + case R_RISCV_NONE: + return R_NONE; + case R_RISCV_32: + case R_RISCV_64: + case R_RISCV_HI20: + case R_RISCV_LO12_I: + case R_RISCV_LO12_S: + case R_RISCV_RVC_LUI: + return R_ABS; case R_RISCV_ADD8: case R_RISCV_ADD16: case R_RISCV_ADD32: @@ -225,9 +238,11 @@ RelExpr RISCV::getRelExpr(const RelType type, const Symbol &s, case R_RISCV_RELAX: case R_RISCV_ALIGN: case R_RISCV_TPREL_ADD: - return R_HINT; + return R_NONE; default: - return R_ABS; + error(getErrorLocation(loc) + "unknown relocation (" + Twine(type) + + ") against symbol " + toString(s)); + return R_NONE; } } @@ -419,21 +434,9 @@ void RISCV::relocateOne(uint8_t *loc, const RelType type, case R_RISCV_ALIGN: case R_RISCV_RELAX: return; // Ignored (for now) - case R_RISCV_NONE: - return; // Do nothing - - // These are handled by the dynamic linker - case R_RISCV_RELATIVE: - case R_RISCV_COPY: - case R_RISCV_JUMP_SLOT: - // GP-relative relocations are only produced after relaxation, which - // we don't support for now - case R_RISCV_GPREL_I: - case R_RISCV_GPREL_S: + default: - error(getErrorLocation(loc) + - "unimplemented relocation: " + toString(type)); - return; + llvm_unreachable("unknown relocation"); } } diff --git a/lld/ELF/Arch/SPARCV9.cpp b/lld/ELF/Arch/SPARCV9.cpp index a0afdff08a63..08ef52099de9 100644 --- a/lld/ELF/Arch/SPARCV9.cpp +++ b/lld/ELF/Arch/SPARCV9.cpp @@ -26,8 +26,8 @@ public: SPARCV9(); RelExpr getRelExpr(RelType type, const Symbol &s, const uint8_t *loc) const override; - void writePlt(uint8_t *buf, uint64_t gotEntryAddr, uint64_t pltEntryAddr, - int32_t index, unsigned relOff) const override; + void writePlt(uint8_t *buf, const Symbol &sym, + uint64_t pltEntryAddr) const override; void relocateOne(uint8_t *loc, RelType type, uint64_t val) const override; }; } // namespace @@ -124,9 +124,8 @@ void SPARCV9::relocateOne(uint8_t *loc, RelType type, uint64_t val) const { } } -void SPARCV9::writePlt(uint8_t *buf, uint64_t gotEntryAddr, - uint64_t pltEntryAddr, int32_t index, - unsigned relOff) const { +void SPARCV9::writePlt(uint8_t *buf, const Symbol & /*sym*/, + uint64_t pltEntryAddr) const { const uint8_t pltData[] = { 0x03, 0x00, 0x00, 0x00, // sethi (. - .PLT0), %g1 0x30, 0x68, 0x00, 0x00, // ba,a %xcc, .PLT1 @@ -139,7 +138,7 @@ void SPARCV9::writePlt(uint8_t *buf, uint64_t gotEntryAddr, }; memcpy(buf, pltData, sizeof(pltData)); - uint64_t off = pltHeaderSize + pltEntrySize * index; + uint64_t off = pltEntryAddr - in.plt->getVA(); relocateOne(buf, R_SPARC_22, off); relocateOne(buf + 4, R_SPARC_WDISP19, -(off + 4 - pltEntrySize)); } diff --git a/lld/ELF/Arch/X86.cpp b/lld/ELF/Arch/X86.cpp index b27a6e302e78..b4daedc0f5dc 100644 --- a/lld/ELF/Arch/X86.cpp +++ b/lld/ELF/Arch/X86.cpp @@ -33,8 +33,8 @@ public: void writeGotPlt(uint8_t *buf, const Symbol &s) const override; void writeIgotPlt(uint8_t *buf, const Symbol &s) const override; void writePltHeader(uint8_t *buf) const override; - void writePlt(uint8_t *buf, uint64_t gotPltEntryAddr, uint64_t pltEntryAddr, - int32_t index, unsigned relOff) const override; + void writePlt(uint8_t *buf, const Symbol &sym, + uint64_t pltEntryAddr) const override; void relocateOne(uint8_t *loc, RelType type, uint64_t val) const override; RelExpr adjustRelaxExpr(RelType type, const uint8_t *data, @@ -57,8 +57,9 @@ X86::X86() { tlsGotRel = R_386_TLS_TPOFF; tlsModuleIndexRel = R_386_TLS_DTPMOD32; tlsOffsetRel = R_386_TLS_DTPOFF32; - pltEntrySize = 16; pltHeaderSize = 16; + pltEntrySize = 16; + ipltEntrySize = 16; trapInstr = {0xcc, 0xcc, 0xcc, 0xcc}; // 0xcc = INT3 // Align to the non-PAE large page size (known as a superpage or huge page). @@ -115,7 +116,7 @@ RelExpr X86::getRelExpr(RelType type, const Symbol &s, // address at runtime (which means code is position-independent but // compilers need to emit extra code for each GOT access.) This decision // is made at compile-time. In the latter case, compilers emit code to - // load an GOT address to a register, which is usually %ebx. + // load a GOT address to a register, which is usually %ebx. // // So, there are two ways to refer to symbol foo's GOT entry: foo@GOT or // foo@GOT(%ebx). @@ -213,9 +214,9 @@ void X86::writePltHeader(uint8_t *buf) const { write32le(buf + 8, gotPlt + 8); } -void X86::writePlt(uint8_t *buf, uint64_t gotPltEntryAddr, - uint64_t pltEntryAddr, int32_t index, - unsigned relOff) const { +void X86::writePlt(uint8_t *buf, const Symbol &sym, + uint64_t pltEntryAddr) const { + unsigned relOff = in.relaPlt->entsize * sym.pltIndex; if (config->isPic) { const uint8_t inst[] = { 0xff, 0xa3, 0, 0, 0, 0, // jmp *foo@GOT(%ebx) @@ -223,7 +224,7 @@ void X86::writePlt(uint8_t *buf, uint64_t gotPltEntryAddr, 0xe9, 0, 0, 0, 0, // jmp .PLT0@PC }; memcpy(buf, inst, sizeof(inst)); - write32le(buf + 2, gotPltEntryAddr - in.gotPlt->getVA()); + write32le(buf + 2, sym.getGotPltVA() - in.gotPlt->getVA()); } else { const uint8_t inst[] = { 0xff, 0x25, 0, 0, 0, 0, // jmp *foo@GOT @@ -231,11 +232,11 @@ void X86::writePlt(uint8_t *buf, uint64_t gotPltEntryAddr, 0xe9, 0, 0, 0, 0, // jmp .PLT0@PC }; memcpy(buf, inst, sizeof(inst)); - write32le(buf + 2, gotPltEntryAddr); + write32le(buf + 2, sym.getGotPltVA()); } write32le(buf + 7, relOff); - write32le(buf + 12, -pltHeaderSize - pltEntrySize * index - 16); + write32le(buf + 12, in.plt->getVA() - pltEntryAddr - 16); } int64_t X86::getImplicitAddend(const uint8_t *buf, RelType type) const { @@ -409,14 +410,79 @@ void X86::relaxTlsLdToLe(uint8_t *loc, RelType type, uint64_t val) const { memcpy(loc - 2, inst, sizeof(inst)); } +// If Intel Indirect Branch Tracking is enabled, we have to emit special PLT +// entries containing endbr32 instructions. A PLT entry will be split into two +// parts, one in .plt.sec (writePlt), and the other in .plt (writeIBTPlt). +namespace { +class IntelIBT : public X86 { +public: + IntelIBT(); + void writeGotPlt(uint8_t *buf, const Symbol &s) const override; + void writePlt(uint8_t *buf, const Symbol &sym, + uint64_t pltEntryAddr) const override; + void writeIBTPlt(uint8_t *buf, size_t numEntries) const override; + + static const unsigned IBTPltHeaderSize = 16; +}; +} // namespace + +IntelIBT::IntelIBT() { pltHeaderSize = 0; } + +void IntelIBT::writeGotPlt(uint8_t *buf, const Symbol &s) const { + uint64_t va = + in.ibtPlt->getVA() + IBTPltHeaderSize + s.pltIndex * pltEntrySize; + write32le(buf, va); +} + +void IntelIBT::writePlt(uint8_t *buf, const Symbol &sym, + uint64_t /*pltEntryAddr*/) const { + if (config->isPic) { + const uint8_t inst[] = { + 0xf3, 0x0f, 0x1e, 0xfb, // endbr32 + 0xff, 0xa3, 0, 0, 0, 0, // jmp *name@GOT(%ebx) + 0x66, 0x0f, 0x1f, 0x44, 0, 0, // nop + }; + memcpy(buf, inst, sizeof(inst)); + write32le(buf + 6, sym.getGotPltVA() - in.gotPlt->getVA()); + return; + } + + const uint8_t inst[] = { + 0xf3, 0x0f, 0x1e, 0xfb, // endbr32 + 0xff, 0x25, 0, 0, 0, 0, // jmp *foo@GOT + 0x66, 0x0f, 0x1f, 0x44, 0, 0, // nop + }; + memcpy(buf, inst, sizeof(inst)); + write32le(buf + 6, sym.getGotPltVA()); +} + +void IntelIBT::writeIBTPlt(uint8_t *buf, size_t numEntries) const { + writePltHeader(buf); + buf += IBTPltHeaderSize; + + const uint8_t inst[] = { + 0xf3, 0x0f, 0x1e, 0xfb, // endbr32 + 0x68, 0, 0, 0, 0, // pushl $reloc_offset + 0xe9, 0, 0, 0, 0, // jmpq .PLT0@PC + 0x66, 0x90, // nop + }; + + for (size_t i = 0; i < numEntries; ++i) { + memcpy(buf, inst, sizeof(inst)); + write32le(buf + 5, i * sizeof(object::ELF32LE::Rel)); + write32le(buf + 10, -pltHeaderSize - sizeof(inst) * i - 30); + buf += sizeof(inst); + } +} + namespace { class RetpolinePic : public X86 { public: RetpolinePic(); void writeGotPlt(uint8_t *buf, const Symbol &s) const override; void writePltHeader(uint8_t *buf) const override; - void writePlt(uint8_t *buf, uint64_t gotPltEntryAddr, uint64_t pltEntryAddr, - int32_t index, unsigned relOff) const override; + void writePlt(uint8_t *buf, const Symbol &sym, + uint64_t pltEntryAddr) const override; }; class RetpolineNoPic : public X86 { @@ -424,14 +490,15 @@ public: RetpolineNoPic(); void writeGotPlt(uint8_t *buf, const Symbol &s) const override; void writePltHeader(uint8_t *buf) const override; - void writePlt(uint8_t *buf, uint64_t gotPltEntryAddr, uint64_t pltEntryAddr, - int32_t index, unsigned relOff) const override; + void writePlt(uint8_t *buf, const Symbol &sym, + uint64_t pltEntryAddr) const override; }; } // namespace RetpolinePic::RetpolinePic() { pltHeaderSize = 48; pltEntrySize = 32; + ipltEntrySize = 32; } void RetpolinePic::writeGotPlt(uint8_t *buf, const Symbol &s) const { @@ -459,9 +526,9 @@ void RetpolinePic::writePltHeader(uint8_t *buf) const { memcpy(buf, insn, sizeof(insn)); } -void RetpolinePic::writePlt(uint8_t *buf, uint64_t gotPltEntryAddr, - uint64_t pltEntryAddr, int32_t index, - unsigned relOff) const { +void RetpolinePic::writePlt(uint8_t *buf, const Symbol &sym, + uint64_t pltEntryAddr) const { + unsigned relOff = in.relaPlt->entsize * sym.pltIndex; const uint8_t insn[] = { 0x50, // pushl %eax 0x8b, 0x83, 0, 0, 0, 0, // mov foo@GOT(%ebx), %eax @@ -474,8 +541,8 @@ void RetpolinePic::writePlt(uint8_t *buf, uint64_t gotPltEntryAddr, memcpy(buf, insn, sizeof(insn)); uint32_t ebx = in.gotPlt->getVA(); - unsigned off = pltHeaderSize + pltEntrySize * index; - write32le(buf + 3, gotPltEntryAddr - ebx); + unsigned off = pltEntryAddr - in.plt->getVA(); + write32le(buf + 3, sym.getGotPltVA() - ebx); write32le(buf + 8, -off - 12 + 32); write32le(buf + 13, -off - 17 + 18); write32le(buf + 18, relOff); @@ -485,6 +552,7 @@ void RetpolinePic::writePlt(uint8_t *buf, uint64_t gotPltEntryAddr, RetpolineNoPic::RetpolineNoPic() { pltHeaderSize = 48; pltEntrySize = 32; + ipltEntrySize = 32; } void RetpolineNoPic::writeGotPlt(uint8_t *buf, const Symbol &s) const { @@ -517,9 +585,9 @@ void RetpolineNoPic::writePltHeader(uint8_t *buf) const { write32le(buf + 8, gotPlt + 8); } -void RetpolineNoPic::writePlt(uint8_t *buf, uint64_t gotPltEntryAddr, - uint64_t pltEntryAddr, int32_t index, - unsigned relOff) const { +void RetpolineNoPic::writePlt(uint8_t *buf, const Symbol &sym, + uint64_t pltEntryAddr) const { + unsigned relOff = in.relaPlt->entsize * sym.pltIndex; const uint8_t insn[] = { 0x50, // 0: pushl %eax 0xa1, 0, 0, 0, 0, // 1: mov foo_in_GOT, %eax @@ -532,8 +600,8 @@ void RetpolineNoPic::writePlt(uint8_t *buf, uint64_t gotPltEntryAddr, }; memcpy(buf, insn, sizeof(insn)); - unsigned off = pltHeaderSize + pltEntrySize * index; - write32le(buf + 2, gotPltEntryAddr); + unsigned off = pltEntryAddr - in.plt->getVA(); + write32le(buf + 2, sym.getGotPltVA()); write32le(buf + 7, -off - 11 + 32); write32le(buf + 12, -off - 16 + 17); write32le(buf + 17, relOff); @@ -550,6 +618,11 @@ TargetInfo *getX86TargetInfo() { return &t; } + if (config->andFeatures & GNU_PROPERTY_X86_FEATURE_1_IBT) { + static IntelIBT t; + return &t; + } + static X86 t; return &t; } diff --git a/lld/ELF/Arch/X86_64.cpp b/lld/ELF/Arch/X86_64.cpp index bb8d92fc61b9..74b72eb91293 100644 --- a/lld/ELF/Arch/X86_64.cpp +++ b/lld/ELF/Arch/X86_64.cpp @@ -33,8 +33,8 @@ public: void writeGotPltHeader(uint8_t *buf) const override; void writeGotPlt(uint8_t *buf, const Symbol &s) const override; void writePltHeader(uint8_t *buf) const override; - void writePlt(uint8_t *buf, uint64_t gotPltEntryAddr, uint64_t pltEntryAddr, - int32_t index, unsigned relOff) const override; + void writePlt(uint8_t *buf, const Symbol &sym, + uint64_t pltEntryAddr) const override; void relocateOne(uint8_t *loc, RelType type, uint64_t val) const override; RelExpr adjustRelaxExpr(RelType type, const uint8_t *data, @@ -61,8 +61,9 @@ X86_64::X86_64() { tlsGotRel = R_X86_64_TPOFF64; tlsModuleIndexRel = R_X86_64_DTPMOD64; tlsOffsetRel = R_X86_64_DTPOFF64; - pltEntrySize = 16; pltHeaderSize = 16; + pltEntrySize = 16; + ipltEntrySize = 16; trapInstr = {0xcc, 0xcc, 0xcc, 0xcc}; // 0xcc = INT3 // Align to the large page size (known as a superpage or huge page). @@ -150,14 +151,13 @@ void X86_64::writePltHeader(uint8_t *buf) const { }; memcpy(buf, pltData, sizeof(pltData)); uint64_t gotPlt = in.gotPlt->getVA(); - uint64_t plt = in.plt->getVA(); + uint64_t plt = in.ibtPlt ? in.ibtPlt->getVA() : in.plt->getVA(); write32le(buf + 2, gotPlt - plt + 2); // GOTPLT+8 write32le(buf + 8, gotPlt - plt + 4); // GOTPLT+16 } -void X86_64::writePlt(uint8_t *buf, uint64_t gotPltEntryAddr, - uint64_t pltEntryAddr, int32_t index, - unsigned relOff) const { +void X86_64::writePlt(uint8_t *buf, const Symbol &sym, + uint64_t pltEntryAddr) const { const uint8_t inst[] = { 0xff, 0x25, 0, 0, 0, 0, // jmpq *got(%rip) 0x68, 0, 0, 0, 0, // pushq <relocation index> @@ -165,9 +165,9 @@ void X86_64::writePlt(uint8_t *buf, uint64_t gotPltEntryAddr, }; memcpy(buf, inst, sizeof(inst)); - write32le(buf + 2, gotPltEntryAddr - pltEntryAddr - 6); - write32le(buf + 7, index); - write32le(buf + 12, -pltHeaderSize - pltEntrySize * index - 16); + write32le(buf + 2, sym.getGotPltVA() - pltEntryAddr - 6); + write32le(buf + 7, sym.pltIndex); + write32le(buf + 12, in.plt->getVA() - pltEntryAddr - 16); } RelType X86_64::getDynRel(RelType type) const { @@ -568,6 +568,60 @@ bool X86_64::adjustPrologueForCrossSplitStack(uint8_t *loc, uint8_t *end, return false; } +// If Intel Indirect Branch Tracking is enabled, we have to emit special PLT +// entries containing endbr64 instructions. A PLT entry will be split into two +// parts, one in .plt.sec (writePlt), and the other in .plt (writeIBTPlt). +namespace { +class IntelIBT : public X86_64 { +public: + IntelIBT(); + void writeGotPlt(uint8_t *buf, const Symbol &s) const override; + void writePlt(uint8_t *buf, const Symbol &sym, + uint64_t pltEntryAddr) const override; + void writeIBTPlt(uint8_t *buf, size_t numEntries) const override; + + static const unsigned IBTPltHeaderSize = 16; +}; +} // namespace + +IntelIBT::IntelIBT() { pltHeaderSize = 0; } + +void IntelIBT::writeGotPlt(uint8_t *buf, const Symbol &s) const { + uint64_t va = + in.ibtPlt->getVA() + IBTPltHeaderSize + s.pltIndex * pltEntrySize; + write64le(buf, va); +} + +void IntelIBT::writePlt(uint8_t *buf, const Symbol &sym, + uint64_t pltEntryAddr) const { + const uint8_t Inst[] = { + 0xf3, 0x0f, 0x1e, 0xfa, // endbr64 + 0xff, 0x25, 0, 0, 0, 0, // jmpq *got(%rip) + 0x66, 0x0f, 0x1f, 0x44, 0, 0, // nop + }; + memcpy(buf, Inst, sizeof(Inst)); + write32le(buf + 6, sym.getGotPltVA() - pltEntryAddr - 10); +} + +void IntelIBT::writeIBTPlt(uint8_t *buf, size_t numEntries) const { + writePltHeader(buf); + buf += IBTPltHeaderSize; + + const uint8_t inst[] = { + 0xf3, 0x0f, 0x1e, 0xfa, // endbr64 + 0x68, 0, 0, 0, 0, // pushq <relocation index> + 0xe9, 0, 0, 0, 0, // jmpq plt[0] + 0x66, 0x90, // nop + }; + + for (size_t i = 0; i < numEntries; ++i) { + memcpy(buf, inst, sizeof(inst)); + write32le(buf + 5, i); + write32le(buf + 10, -pltHeaderSize - sizeof(inst) * i - 30); + buf += sizeof(inst); + } +} + // These nonstandard PLT entries are to migtigate Spectre v2 security // vulnerability. In order to mitigate Spectre v2, we want to avoid indirect // branch instructions such as `jmp *GOTPLT(%rip)`. So, in the following PLT @@ -583,8 +637,8 @@ public: Retpoline(); void writeGotPlt(uint8_t *buf, const Symbol &s) const override; void writePltHeader(uint8_t *buf) const override; - void writePlt(uint8_t *buf, uint64_t gotPltEntryAddr, uint64_t pltEntryAddr, - int32_t index, unsigned relOff) const override; + void writePlt(uint8_t *buf, const Symbol &sym, + uint64_t pltEntryAddr) const override; }; class RetpolineZNow : public X86_64 { @@ -592,14 +646,15 @@ public: RetpolineZNow(); void writeGotPlt(uint8_t *buf, const Symbol &s) const override {} void writePltHeader(uint8_t *buf) const override; - void writePlt(uint8_t *buf, uint64_t gotPltEntryAddr, uint64_t pltEntryAddr, - int32_t index, unsigned relOff) const override; + void writePlt(uint8_t *buf, const Symbol &sym, + uint64_t pltEntryAddr) const override; }; } // namespace Retpoline::Retpoline() { pltHeaderSize = 48; pltEntrySize = 32; + ipltEntrySize = 32; } void Retpoline::writeGotPlt(uint8_t *buf, const Symbol &s) const { @@ -628,9 +683,8 @@ void Retpoline::writePltHeader(uint8_t *buf) const { write32le(buf + 9, gotPlt - plt - 13 + 16); } -void Retpoline::writePlt(uint8_t *buf, uint64_t gotPltEntryAddr, - uint64_t pltEntryAddr, int32_t index, - unsigned relOff) const { +void Retpoline::writePlt(uint8_t *buf, const Symbol &sym, + uint64_t pltEntryAddr) const { const uint8_t insn[] = { 0x4c, 0x8b, 0x1d, 0, 0, 0, 0, // 0: mov foo@GOTPLT(%rip), %r11 0xe8, 0, 0, 0, 0, // 7: callq plt+0x20 @@ -641,18 +695,19 @@ void Retpoline::writePlt(uint8_t *buf, uint64_t gotPltEntryAddr, }; memcpy(buf, insn, sizeof(insn)); - uint64_t off = pltHeaderSize + pltEntrySize * index; + uint64_t off = pltEntryAddr - in.plt->getVA(); - write32le(buf + 3, gotPltEntryAddr - pltEntryAddr - 7); + write32le(buf + 3, sym.getGotPltVA() - pltEntryAddr - 7); write32le(buf + 8, -off - 12 + 32); write32le(buf + 13, -off - 17 + 18); - write32le(buf + 18, index); + write32le(buf + 18, sym.pltIndex); write32le(buf + 23, -off - 27); } RetpolineZNow::RetpolineZNow() { pltHeaderSize = 32; pltEntrySize = 16; + ipltEntrySize = 16; } void RetpolineZNow::writePltHeader(uint8_t *buf) const { @@ -671,9 +726,8 @@ void RetpolineZNow::writePltHeader(uint8_t *buf) const { memcpy(buf, insn, sizeof(insn)); } -void RetpolineZNow::writePlt(uint8_t *buf, uint64_t gotPltEntryAddr, - uint64_t pltEntryAddr, int32_t index, - unsigned relOff) const { +void RetpolineZNow::writePlt(uint8_t *buf, const Symbol &sym, + uint64_t pltEntryAddr) const { const uint8_t insn[] = { 0x4c, 0x8b, 0x1d, 0, 0, 0, 0, // mov foo@GOTPLT(%rip), %r11 0xe9, 0, 0, 0, 0, // jmp plt+0 @@ -681,8 +735,8 @@ void RetpolineZNow::writePlt(uint8_t *buf, uint64_t gotPltEntryAddr, }; memcpy(buf, insn, sizeof(insn)); - write32le(buf + 3, gotPltEntryAddr - pltEntryAddr - 7); - write32le(buf + 8, -pltHeaderSize - pltEntrySize * index - 12); + write32le(buf + 3, sym.getGotPltVA() - pltEntryAddr - 7); + write32le(buf + 8, in.plt->getVA() - pltEntryAddr - 12); } static TargetInfo *getTargetInfo() { @@ -695,6 +749,11 @@ static TargetInfo *getTargetInfo() { return &t; } + if (config->andFeatures & GNU_PROPERTY_X86_FEATURE_1_IBT) { + static IntelIBT t; + return &t; + } + static X86_64 t; return &t; } diff --git a/lld/ELF/CallGraphSort.cpp b/lld/ELF/CallGraphSort.cpp index 6f8ef8954af3..6dad7c965f1a 100644 --- a/lld/ELF/CallGraphSort.cpp +++ b/lld/ELF/CallGraphSort.cpp @@ -16,9 +16,9 @@ /// /// Definitions: /// * Cluster -/// * An ordered list of input sections which are layed out as a unit. At the +/// * An ordered list of input sections which are laid out as a unit. At the /// beginning of the algorithm each input section has its own cluster and -/// the weight of the cluster is the sum of the weight of all incomming +/// the weight of the cluster is the sum of the weight of all incoming /// edges. /// * Call-Chain Clustering (C³) Heuristic /// * Defines when and how clusters are combined. Pick the highest weighted @@ -26,7 +26,7 @@ /// penalize it too much. /// * Density /// * The weight of the cluster divided by the size of the cluster. This is a -/// proxy for the ammount of execution time spent per byte of the cluster. +/// proxy for the amount of execution time spent per byte of the cluster. /// /// It does so given a call graph profile by the following: /// * Build a weighted call graph from the call graph profile @@ -86,7 +86,7 @@ private: std::vector<const InputSectionBase *> sections; }; -// Maximum ammount the combined cluster density can be worse than the original +// Maximum amount the combined cluster density can be worse than the original // cluster to consider merging. constexpr int MAX_DENSITY_DEGRADATION = 8; diff --git a/lld/ELF/Config.h b/lld/ELF/Config.h index 0c68a8485fa2..06ba88a83dd4 100644 --- a/lld/ELF/Config.h +++ b/lld/ELF/Config.h @@ -64,6 +64,9 @@ enum class ARMVFPArgKind { Default, Base, VFP, ToolChain }; // For -z noseparate-code, -z separate-code and -z separate-loadable-segments. enum class SeparateSegmentKind { None, Code, Loadable }; +// For -z *stack +enum class GnuStackKind { None, Exec, NoExec }; + struct SymbolVersion { llvm::StringRef name; bool isExternCpp; @@ -151,7 +154,6 @@ struct Configuration { bool fixCortexA8; bool forceBTI; bool formatBinary = false; - bool requireCET; bool gcSections; bool gdbIndex; bool gnuHash = false; @@ -165,6 +167,7 @@ struct Configuration { bool ltoNewPassManager; bool mergeArmExidx; bool mipsN32Abi = false; + bool mmapOutputFile; bool nmagic; bool noinhibitExec; bool nostdlib; @@ -198,7 +201,7 @@ struct Configuration { bool writeAddends; bool zCombreloc; bool zCopyreloc; - bool zExecstack; + bool zForceIbt; bool zGlobal; bool zHazardplt; bool zIfuncNoplt; @@ -212,10 +215,12 @@ struct Configuration { bool zOrigin; bool zRelro; bool zRodynamic; + bool zShstk; bool zText; bool zRetpolineplt; bool zWxneeded; DiscardPolicy discard; + GnuStackKind zGnustack; ICFLevel icf; OrphanHandlingPolicy orphanHandling; SortSectionPolicy sortSection; @@ -239,7 +244,7 @@ struct Configuration { int32_t splitStackAdjustSize; // The following config options do not directly correspond to any - // particualr command line options. + // particular command line options. // True if we need to pass through relocations in input files to the // output file. Usually false because we consume relocations. diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp index 96257a4c7624..23da749d3078 100644 --- a/lld/ELF/Driver.cpp +++ b/lld/ELF/Driver.cpp @@ -75,14 +75,17 @@ LinkerDriver *driver; static void setConfigs(opt::InputArgList &args); static void readConfigs(opt::InputArgList &args); -bool link(ArrayRef<const char *> args, bool canExitEarly, raw_ostream &error) { +bool link(ArrayRef<const char *> args, bool canExitEarly, raw_ostream &stdoutOS, + raw_ostream &stderrOS) { + lld::stdoutOS = &stdoutOS; + lld::stderrOS = &stderrOS; + errorHandler().logName = args::getFilenameWithoutExe(args[0]); errorHandler().errorLimitExceededMsg = "too many errors emitted, stopping now (use " "-error-limit=0 to see all errors)"; - errorHandler().errorOS = &error; errorHandler().exitEarly = canExitEarly; - enableColors(error.has_colors()); + stderrOS.enable_colors(stderrOS.has_colors()); inputSections.clear(); outputSections.clear(); @@ -162,10 +165,7 @@ std::vector<std::pair<MemoryBufferRef, uint64_t>> static getArchiveMembers( std::vector<std::pair<MemoryBufferRef, uint64_t>> v; Error err = Error::success(); bool addToTar = file->isThin() && tar; - for (const ErrorOr<Archive::Child> &cOrErr : file->children(err)) { - Archive::Child c = - CHECK(cOrErr, mb.getBufferIdentifier() + - ": could not get the child of the archive"); + for (const Archive::Child &c : file->children(err)) { MemoryBufferRef mbref = CHECK(c.getMemoryBufferRef(), mb.getBufferIdentifier() + @@ -346,14 +346,14 @@ static void checkOptions() { error("-execute-only and -no-rosegment cannot be used together"); } - if (config->zRetpolineplt && config->requireCET) - error("--require-cet may not be used with -z retpolineplt"); + if (config->zRetpolineplt && config->zForceIbt) + error("-z force-ibt may not be used with -z retpolineplt"); if (config->emachine != EM_AARCH64) { if (config->pacPlt) - error("--pac-plt only supported on AArch64"); + error("-z pac-plt only supported on AArch64"); if (config->forceBTI) - error("--force-bti only supported on AArch64"); + error("-z force-bti only supported on AArch64"); } } @@ -394,18 +394,33 @@ static SeparateSegmentKind getZSeparate(opt::InputArgList &args) { return SeparateSegmentKind::None; } +static GnuStackKind getZGnuStack(opt::InputArgList &args) { + for (auto *arg : args.filtered_reverse(OPT_z)) { + if (StringRef("execstack") == arg->getValue()) + return GnuStackKind::Exec; + if (StringRef("noexecstack") == arg->getValue()) + return GnuStackKind::NoExec; + if (StringRef("nognustack") == arg->getValue()) + return GnuStackKind::None; + } + + return GnuStackKind::NoExec; +} + static bool isKnownZFlag(StringRef s) { return s == "combreloc" || s == "copyreloc" || s == "defs" || - s == "execstack" || s == "global" || s == "hazardplt" || - s == "ifunc-noplt" || s == "initfirst" || s == "interpose" || + s == "execstack" || s == "force-bti" || s == "force-ibt" || + s == "global" || s == "hazardplt" || s == "ifunc-noplt" || + s == "initfirst" || s == "interpose" || s == "keep-text-section-prefix" || s == "lazy" || s == "muldefs" || s == "separate-code" || s == "separate-loadable-segments" || s == "nocombreloc" || s == "nocopyreloc" || s == "nodefaultlib" || s == "nodelete" || s == "nodlopen" || s == "noexecstack" || - s == "nokeep-text-section-prefix" || s == "norelro" || - s == "noseparate-code" || s == "notext" || s == "now" || - s == "origin" || s == "relro" || s == "retpolineplt" || - s == "rodynamic" || s == "text" || s == "undefs" || s == "wxneeded" || + s == "nognustack" || s == "nokeep-text-section-prefix" || + s == "norelro" || s == "noseparate-code" || s == "notext" || + s == "now" || s == "origin" || s == "pac-plt" || s == "relro" || + s == "retpolineplt" || s == "rodynamic" || s == "shstk" || + s == "text" || s == "undefs" || s == "wxneeded" || s.startswith("common-page-size=") || s.startswith("max-page-size=") || s.startswith("stack-size="); } @@ -861,8 +876,7 @@ static void readConfigs(opt::InputArgList &args) { config->fini = args.getLastArgValue(OPT_fini, "_fini"); config->fixCortexA53Errata843419 = args.hasArg(OPT_fix_cortex_a53_843419); config->fixCortexA8 = args.hasArg(OPT_fix_cortex_a8); - config->forceBTI = args.hasArg(OPT_force_bti); - config->requireCET = args.hasArg(OPT_require_cet); + config->forceBTI = hasZOption(args, "force-bti"); config->gcSections = args.hasFlag(OPT_gc_sections, OPT_no_gc_sections, false); config->gnuUnique = args.hasFlag(OPT_gnu_unique, OPT_no_gnu_unique, true); config->gdbIndex = args.hasFlag(OPT_gdb_index, OPT_no_gdb_index, false); @@ -886,6 +900,8 @@ static void readConfigs(opt::InputArgList &args) { config->mipsGotSize = args::getInteger(args, OPT_mips_got_size, 0xfff0); config->mergeArmExidx = args.hasFlag(OPT_merge_exidx_entries, OPT_no_merge_exidx_entries, true); + config->mmapOutputFile = + args.hasFlag(OPT_mmap_output_file, OPT_no_mmap_output_file, true); config->nmagic = args.hasFlag(OPT_nmagic, OPT_no_nmagic, false); config->noinhibitExec = args.hasArg(OPT_noinhibit_exec); config->nostdlib = args.hasArg(OPT_nostdlib); @@ -898,7 +914,7 @@ static void readConfigs(opt::InputArgList &args) { config->optimize = args::getInteger(args, OPT_O, 1); config->orphanHandling = getOrphanHandling(args); config->outputFile = args.getLastArgValue(OPT_o); - config->pacPlt = args.hasArg(OPT_pac_plt); + config->pacPlt = hasZOption(args, "pac-plt"); config->pie = args.hasFlag(OPT_pie, OPT_no_pie, false); config->printIcfSections = args.hasFlag(OPT_print_icf_sections, OPT_no_print_icf_sections, false); @@ -949,8 +965,9 @@ static void readConfigs(opt::InputArgList &args) { args.hasFlag(OPT_warn_symbol_ordering, OPT_no_warn_symbol_ordering, true); config->zCombreloc = getZFlag(args, "combreloc", "nocombreloc", true); config->zCopyreloc = getZFlag(args, "copyreloc", "nocopyreloc", true); - config->zExecstack = getZFlag(args, "execstack", "noexecstack", false); + config->zForceIbt = hasZOption(args, "force-ibt"); config->zGlobal = hasZOption(args, "global"); + config->zGnustack = getZGnuStack(args); config->zHazardplt = hasZOption(args, "hazardplt"); config->zIfuncNoplt = hasZOption(args, "ifunc-noplt"); config->zInitfirst = hasZOption(args, "initfirst"); @@ -966,6 +983,7 @@ static void readConfigs(opt::InputArgList &args) { config->zRetpolineplt = hasZOption(args, "retpolineplt"); config->zRodynamic = hasZOption(args, "rodynamic"); config->zSeparate = getZSeparate(args); + config->zShstk = hasZOption(args, "shstk"); config->zStackSize = args::getZOptionValue(args, OPT_z, "stack-size", 0); config->zText = getZFlag(args, "text", "notext", true); config->zWxneeded = hasZOption(args, "wxneeded"); @@ -992,6 +1010,14 @@ static void readConfigs(opt::InputArgList &args) { if (config->splitStackAdjustSize < 0) error("--split-stack-adjust-size: size must be >= 0"); + // The text segment is traditionally the first segment, whose address equals + // the base address. However, lld places the R PT_LOAD first. -Ttext-segment + // is an old-fashioned option that does not play well with lld's layout. + // Suggest --image-base as a likely alternative. + if (args.hasArg(OPT_Ttext_segment)) + error("-Ttext-segment is not supported. Use --image-base if you " + "intend to set the base address"); + // Parse ELF{32,64}{LE,BE} and CPU type. if (auto *arg = args.getLastArg(OPT_m)) { StringRef s = arg->getValue(); @@ -1349,7 +1375,7 @@ static void excludeLibs(opt::InputArgList &args) { if (!file->archiveName.empty()) if (all || libs.count(path::filename(file->archiveName))) for (Symbol *sym : file->getSymbols()) - if (!sym->isLocal() && sym->file == file) + if (!sym->isUndefined() && !sym->isLocal() && sym->file == file) sym->versionId = VER_NDX_LOCAL; }; @@ -1370,7 +1396,7 @@ static void handleUndefined(Symbol *sym) { sym->fetch(); } -// As an extention to GNU linkers, lld supports a variant of `-u` +// As an extension to GNU linkers, lld supports a variant of `-u` // which accepts wildcard patterns. All symbols that match a given // pattern are handled as if they were given by `-u`. static void handleUndefinedGlob(StringRef arg) { @@ -1381,13 +1407,13 @@ static void handleUndefinedGlob(StringRef arg) { } std::vector<Symbol *> syms; - symtab->forEachSymbol([&](Symbol *sym) { + for (Symbol *sym : symtab->symbols()) { // Calling Sym->fetch() from here is not safe because it may // add new symbols to the symbol table, invalidating the // current iterator. So we just keep a note. if (pat->match(sym->getName())) syms.push_back(sym); - }); + } for (Symbol *sym : syms) handleUndefined(sym); @@ -1413,10 +1439,10 @@ static void handleLibcall(StringRef name) { // result, the passes after the symbol resolution won't see any // symbols of type CommonSymbol. static void replaceCommonSymbols() { - symtab->forEachSymbol([](Symbol *sym) { + for (Symbol *sym : symtab->symbols()) { auto *s = dyn_cast<CommonSymbol>(sym); if (!s) - return; + continue; auto *bss = make<BssSection>("COMMON", s->size, s->alignment); bss->file = s->file; @@ -1424,7 +1450,7 @@ static void replaceCommonSymbols() { inputSections.push_back(bss); s->replace(Defined{s->file, s->getName(), s->binding, s->stOther, s->type, /*value=*/0, s->size, bss}); - }); + } } // If all references to a DSO happen to be weak, the DSO is not added @@ -1432,15 +1458,15 @@ static void replaceCommonSymbols() { // created from the DSO. Otherwise, they become dangling references // that point to a non-existent DSO. static void demoteSharedSymbols() { - symtab->forEachSymbol([](Symbol *sym) { + for (Symbol *sym : symtab->symbols()) { auto *s = dyn_cast<SharedSymbol>(sym); if (!s || s->getFile().isNeeded) - return; + continue; bool used = s->used; s->replace(Undefined{nullptr, s->getName(), STB_WEAK, s->stOther, s->type}); s->used = used; - }); + } } // The section referred to by `s` is considered address-significant. Set the @@ -1476,10 +1502,9 @@ static void findKeepUniqueSections(opt::InputArgList &args) { // Symbols in the dynsym could be address-significant in other executables // or DSOs, so we conservatively mark them as address-significant. - symtab->forEachSymbol([&](Symbol *sym) { + for (Symbol *sym : symtab->symbols()) if (sym->includeInDynsym()) markAddrsig(sym); - }); // Visit the address-significance table in each object file and mark each // referenced symbol as address-significant. @@ -1663,12 +1688,8 @@ static void wrapSymbols(ArrayRef<WrappedSymbol> wrapped) { // with CET. We enable the feature only when all object files are compatible // with CET. // -// This function returns the merged feature flags. If 0, we cannot enable CET. // This is also the case with AARCH64's BTI and PAC which use the similar // GNU_PROPERTY_AARCH64_FEATURE_1_AND mechanism. -// -// Note that the CET-aware PLT is not implemented yet. We do error -// check only. template <class ELFT> static uint32_t getAndFeatures() { if (config->emachine != EM_386 && config->emachine != EM_X86_64 && config->emachine != EM_AARCH64) @@ -1678,10 +1699,14 @@ template <class ELFT> static uint32_t getAndFeatures() { for (InputFile *f : objectFiles) { uint32_t features = cast<ObjFile<ELFT>>(f)->andFeatures; if (config->forceBTI && !(features & GNU_PROPERTY_AARCH64_FEATURE_1_BTI)) { - warn(toString(f) + ": --force-bti: file does not have BTI property"); + warn(toString(f) + ": -z force-bti: file does not have BTI property"); features |= GNU_PROPERTY_AARCH64_FEATURE_1_BTI; - } else if (!features && config->requireCET) - error(toString(f) + ": --require-cet: file is not compatible with CET"); + } else if (config->zForceIbt && + !(features & GNU_PROPERTY_X86_FEATURE_1_IBT)) { + warn(toString(f) + ": -z force-ibt: file does not have " + "GNU_PROPERTY_X86_FEATURE_1_IBT property"); + features |= GNU_PROPERTY_X86_FEATURE_1_IBT; + } ret &= features; } @@ -1689,6 +1714,9 @@ template <class ELFT> static uint32_t getAndFeatures() { // this does not require support in the object for correctness. if (config->pacPlt) ret |= GNU_PROPERTY_AARCH64_FEATURE_1_PAC; + // Force enable Shadow Stack. + if (config->zShstk) + ret |= GNU_PROPERTY_X86_FEATURE_1_SHSTK; return ret; } @@ -1765,6 +1793,12 @@ template <class ELFT> void LinkerDriver::link(opt::InputArgList &args) { for (StringRef pat : args::getStrings(args, OPT_undefined_glob)) handleUndefinedGlob(pat); + // Mark -init and -fini symbols so that the LTO doesn't eliminate them. + if (Symbol *sym = symtab->find(config->init)) + sym->isUsedInRegularObj = true; + if (Symbol *sym = symtab->find(config->fini)) + sym->isUsedInRegularObj = true; + // If any of our inputs are bitcode files, the LTO code generator may create // references to certain library functions that might not be explicit in the // bitcode file's symbol table. If any of those library functions are defined diff --git a/lld/ELF/DriverUtils.cpp b/lld/ELF/DriverUtils.cpp index 43987cd5d4d4..9fcb36e81676 100644 --- a/lld/ELF/DriverUtils.cpp +++ b/lld/ELF/DriverUtils.cpp @@ -59,15 +59,15 @@ static void handleColorDiagnostics(opt::InputArgList &args) { if (!arg) return; if (arg->getOption().getID() == OPT_color_diagnostics) { - enableColors(true); + lld::errs().enable_colors(true); } else if (arg->getOption().getID() == OPT_no_color_diagnostics) { - enableColors(false); + lld::errs().enable_colors(false); } else { StringRef s = arg->getValue(); if (s == "always") - enableColors(true); + lld::errs().enable_colors(true); else if (s == "never") - enableColors(false); + lld::errs().enable_colors(false); else if (s != "auto") error("unknown option: --color-diagnostics=" + s); } @@ -145,16 +145,16 @@ opt::InputArgList ELFOptTable::parse(ArrayRef<const char *> argv) { void printHelp() { ELFOptTable().PrintHelp( - outs(), (config->progName + " [options] file...").str().c_str(), "lld", - false /*ShowHidden*/, true /*ShowAllAliases*/); - outs() << "\n"; + lld::outs(), (config->progName + " [options] file...").str().c_str(), + "lld", false /*ShowHidden*/, true /*ShowAllAliases*/); + lld::outs() << "\n"; // Scripts generated by Libtool versions up to at least 2.4.6 (the most // recent version as of March 2017) expect /: supported targets:.* elf/ // in a message for the -help option. If it doesn't match, the scripts // assume that the linker doesn't support very basic features such as // shared libraries. Therefore, we need to print out at least "elf". - outs() << config->progName << ": supported targets: elf\n"; + lld::outs() << config->progName << ": supported targets: elf\n"; } static std::string rewritePath(StringRef s) { diff --git a/lld/ELF/ICF.cpp b/lld/ELF/ICF.cpp index dce76f79c9b3..8992b6564a8a 100644 --- a/lld/ELF/ICF.cpp +++ b/lld/ELF/ICF.cpp @@ -42,7 +42,7 @@ // relocation targets. Relocation targets are considered equivalent if // their targets are in the same equivalence class. Sections with // different relocation targets are put into different equivalence -// clases. +// classes. // // 3. If we split an equivalence class in step 2, two relocations // previously target the same equivalence class may now target @@ -259,6 +259,13 @@ bool ICF<ELFT>::constantEq(const InputSection *secA, ArrayRef<RelTy> ra, if (!da || !db || da->scriptDefined || db->scriptDefined) return false; + // When comparing a pair of relocations, if they refer to different symbols, + // and either symbol is preemptible, the containing sections should be + // considered different. This is because even if the sections are identical + // in this DSO, they may not be after preemption. + if (da->isPreemptible || db->isPreemptible) + return false; + // Relocations referring to absolute symbols are constant-equal if their // values are equal. if (!da->section && !db->section && da->value + addA == db->value + addB) @@ -445,6 +452,12 @@ static void print(const Twine &s) { // The main function of ICF. template <class ELFT> void ICF<ELFT>::run() { + // Compute isPreemptible early. We may add more symbols later, so this loop + // cannot be merged with the later computeIsPreemptible() pass which is used + // by scanRelocations(). + for (Symbol *sym : symtab->symbols()) + sym->isPreemptible = computeIsPreemptible(*sym); + // Collect sections to merge. for (InputSectionBase *sec : inputSections) { auto *s = cast<InputSection>(sec); diff --git a/lld/ELF/InputFiles.cpp b/lld/ELF/InputFiles.cpp index fdf935a30856..43978cd66c61 100644 --- a/lld/ELF/InputFiles.cpp +++ b/lld/ELF/InputFiles.cpp @@ -13,6 +13,7 @@ #include "SymbolTable.h" #include "Symbols.h" #include "SyntheticSections.h" +#include "lld/Common/DWARF.h" #include "lld/Common/ErrorHandler.h" #include "lld/Common/Memory.h" #include "llvm/ADT/STLExtras.h" @@ -295,7 +296,7 @@ Optional<DILineInfo> ObjFile<ELFT>::getDILineInfo(InputSectionBase *s, } } - // Use fake address calcuated by adding section file offset and offset in + // Use fake address calculated by adding section file offset and offset in // section. See comments for ObjectInfo class. return dwarf->getDILineInfo(s->getOffsetInFile() + offset, sectionIndex); } @@ -496,6 +497,44 @@ static void addDependentLibrary(StringRef specifier, const InputFile *f) { specifier); } +// Record the membership of a section group so that in the garbage collection +// pass, section group members are kept or discarded as a unit. +template <class ELFT> +static void handleSectionGroup(ArrayRef<InputSectionBase *> sections, + ArrayRef<typename ELFT::Word> entries) { + bool hasAlloc = false; + for (uint32_t index : entries.slice(1)) { + if (index >= sections.size()) + return; + if (InputSectionBase *s = sections[index]) + if (s != &InputSection::discarded && s->flags & SHF_ALLOC) + hasAlloc = true; + } + + // If any member has the SHF_ALLOC flag, the whole group is subject to garbage + // collection. See the comment in markLive(). This rule retains .debug_types + // and .rela.debug_types. + if (!hasAlloc) + return; + + // Connect the members in a circular doubly-linked list via + // nextInSectionGroup. + InputSectionBase *head; + InputSectionBase *prev = nullptr; + for (uint32_t index : entries.slice(1)) { + InputSectionBase *s = sections[index]; + if (!s || s == &InputSection::discarded) + continue; + if (prev) + prev->nextInSectionGroup = s; + else + head = s; + prev = s; + } + if (prev) + prev->nextInSectionGroup = head; +} + template <class ELFT> void ObjFile<ELFT>::initializeSections(bool ignoreComdats) { const ELFFile<ELFT> &obj = this->getObj(); @@ -506,6 +545,8 @@ void ObjFile<ELFT>::initializeSections(bool ignoreComdats) { this->sectionStringTable = CHECK(obj.getSectionStringTable(objSections), this); + std::vector<ArrayRef<Elf_Word>> selectedGroups; + for (size_t i = 0, e = objSections.size(); i < e; ++i) { if (this->sections[i] == &InputSection::discarded) continue; @@ -563,6 +604,7 @@ void ObjFile<ELFT>::initializeSections(bool ignoreComdats) { if (isNew) { if (config->relocatable) this->sections[i] = createInputSection(sec); + selectedGroups.push_back(entries); continue; } @@ -587,6 +629,7 @@ void ObjFile<ELFT>::initializeSections(bool ignoreComdats) { } } + // This block handles SHF_LINK_ORDER. for (size_t i = 0, e = objSections.size(); i < e; ++i) { if (this->sections[i] == &InputSection::discarded) continue; @@ -609,6 +652,9 @@ void ObjFile<ELFT>::initializeSections(bool ignoreComdats) { " with SHF_LINK_ORDER should not refer a non-regular section: " + toString(linkSec)); } + + for (ArrayRef<Elf_Word> entries : selectedGroups) + handleSectionGroup<ELFT>(this->sections, entries); } // For ARM only, to set the EF_ARM_ABI_FLOAT_SOFT or EF_ARM_ABI_FLOAT_HARD @@ -741,7 +787,7 @@ static uint32_t readAndFeatures(ObjFile<ELFT> *obj, ArrayRef<uint8_t> data) { if (type == featureAndType) { // We found a FEATURE_1_AND field. There may be more than one of these - // in a .note.gnu.propery section, for a relocatable object we + // in a .note.gnu.property section, for a relocatable object we // accumulate the bits set. featuresSet |= read32le(desc.data() + 8); } @@ -838,6 +884,16 @@ InputSectionBase *ObjFile<ELFT>::createInputSection(const Elf_Shdr &sec) { if (!target) return nullptr; + // ELF spec allows mergeable sections with relocations, but they are + // rare, and it is in practice hard to merge such sections by contents, + // because applying relocations at end of linking changes section + // contents. So, we simply handle such sections as non-mergeable ones. + // Degrading like this is acceptable because section merging is optional. + if (auto *ms = dyn_cast<MergeInputSection>(target)) { + target = toRegularSection(ms); + this->sections[sec.sh_info] = target; + } + // This section contains relocation information. // If -r is given, we do not interpret or apply relocation // but just copy relocation sections to output. @@ -856,16 +912,6 @@ InputSectionBase *ObjFile<ELFT>::createInputSection(const Elf_Shdr &sec) { fatal(toString(this) + ": multiple relocation sections to one section are not supported"); - // ELF spec allows mergeable sections with relocations, but they are - // rare, and it is in practice hard to merge such sections by contents, - // because applying relocations at end of linking changes section - // contents. So, we simply handle such sections as non-mergeable ones. - // Degrading like this is acceptable because section merging is optional. - if (auto *ms = dyn_cast<MergeInputSection>(target)) { - target = toRegularSection(ms); - this->sections[sec.sh_info] = target; - } - if (sec.sh_type == SHT_RELA) { ArrayRef<Elf_Rela> rels = CHECK(getObj().relas(&sec), this); target->firstRelocation = rels.begin(); diff --git a/lld/ELF/InputFiles.h b/lld/ELF/InputFiles.h index cde6bc617764..a310ba551bd4 100644 --- a/lld/ELF/InputFiles.h +++ b/lld/ELF/InputFiles.h @@ -10,7 +10,6 @@ #define LLD_ELF_INPUT_FILES_H #include "Config.h" -#include "lld/Common/DWARF.h" #include "lld/Common/ErrorHandler.h" #include "lld/Common/LLVM.h" #include "lld/Common/Reproduce.h" @@ -25,6 +24,7 @@ #include <map> namespace llvm { +struct DILineInfo; class TarWriter; namespace lto { class InputFile; @@ -32,6 +32,7 @@ class InputFile; } // namespace llvm namespace lld { +class DWARFCache; // Returns "<internal>", "foo.a(bar.o)" or "baz.o". std::string toString(const elf::InputFile *f); diff --git a/lld/ELF/InputSection.cpp b/lld/ELF/InputSection.cpp index 0c93d2e10959..aab272f53a73 100644 --- a/lld/ELF/InputSection.cpp +++ b/lld/ELF/InputSection.cpp @@ -72,7 +72,7 @@ InputSectionBase::InputSectionBase(InputFile *file, uint64_t flags, areRelocsRela = false; // The ELF spec states that a value of 0 means the section has - // no alignment constraits. + // no alignment constraints. uint32_t v = std::max<uint32_t>(alignment, 1); if (!isPowerOf2_64(v)) fatal(toString(this) + ": sh_addralign is not a power of 2"); @@ -421,7 +421,7 @@ void InputSection::copyRelocations(uint8_t *buf, ArrayRef<RelTy> rels) { p->r_addend = getAddend<ELFT>(rel); // Output section VA is zero for -r, so r_offset is an offset within the - // section, but for --emit-relocs it is an virtual address. + // section, but for --emit-relocs it is a virtual address. p->r_offset = sec->getVA(rel.r_offset); p->setSymbolAndType(in.symTab->getSymbolIndex(&sym), type, config->isMips64EL); @@ -469,7 +469,7 @@ void InputSection::copyRelocations(uint8_t *buf, ArrayRef<RelTy> rels) { target->getRelExpr(type, sym, bufLoc) == R_MIPS_GOTREL) { // Some MIPS relocations depend on "gp" value. By default, // this value has 0x7ff0 offset from a .got section. But - // relocatable files produced by a complier or a linker + // relocatable files produced by a compiler or a linker // might redefine this default value and we must use it // for a calculation of the relocation result. When we // generate EXE or DSO it's trivial. Generating a relocatable @@ -636,6 +636,7 @@ static int64_t getTlsTpOffset(const Symbol &s) { return s.getVA(0) + (tls->p_vaddr & (tls->p_align - 1)); // Variant 2. + case EM_HEXAGON: case EM_386: case EM_X86_64: return s.getVA(0) - tls->p_memsz - @@ -757,7 +758,7 @@ static uint64_t getRelocTargetVA(const InputFile *file, RelType type, int64_t a, case R_PPC32_PLTREL: // R_PPC_PLTREL24 uses the addend (usually 0 or 0x8000) to indicate r30 // stores _GLOBAL_OFFSET_TABLE_ or .got2+0x8000. The addend is ignored for - // target VA compuation. + // target VA computation. return sym.getPltVA() - p; case R_PPC64_CALL: { uint64_t symVA = sym.getVA(a); @@ -825,7 +826,7 @@ static uint64_t getRelocTargetVA(const InputFile *file, RelType type, int64_t a, // Such sections are never mapped to memory at runtime. Debug sections are // an example. Relocations in non-alloc sections are much easier to // handle than in allocated sections because it will never need complex -// treatement such as GOT or PLT (because at runtime no one refers them). +// treatment such as GOT or PLT (because at runtime no one refers them). // So, we handle relocations for non-alloc sections directly in this // function as a performance optimization. template <class ELFT, class RelTy> @@ -971,8 +972,16 @@ void InputSectionBase::relocateAlloc(uint8_t *buf, uint8_t *bufEnd) { // Patch a nop (0x60000000) to a ld. if (rel.sym->needsTocRestore) { - if (bufLoc + 8 > bufEnd || read32(bufLoc + 4) != 0x60000000) { - error(getErrorLocation(bufLoc) + "call lacks nop, can't restore toc"); + // gcc/gfortran 5.4, 6.3 and earlier versions do not add nop for + // recursive calls even if the function is preemptible. This is not + // wrong in the common case where the function is not preempted at + // runtime. Just ignore. + if ((bufLoc + 8 > bufEnd || read32(bufLoc + 4) != 0x60000000) && + rel.sym->file != file) { + // Use substr(6) to remove the "__plt_" prefix. + errorOrWarn(getErrorLocation(bufLoc) + "call to " + + lld::toString(*rel.sym).substr(6) + + " lacks nop, can't restore toc"); break; } write32(bufLoc + 4, 0xe8410018); // ld %r2, 24(%r1) diff --git a/lld/ELF/InputSection.h b/lld/ELF/InputSection.h index d7c953262a41..3c42af7db7b4 100644 --- a/lld/ELF/InputSection.h +++ b/lld/ELF/InputSection.h @@ -142,6 +142,10 @@ public: // cases this points one level up. SectionBase *parent = nullptr; + // The next member in the section group if this section is in a group. This is + // used by --gc-sections. + InputSectionBase *nextInSectionGroup = nullptr; + template <class ELFT> ArrayRef<typename ELFT::Rel> rels() const { assert(!areRelocsRela); return llvm::makeArrayRef( diff --git a/lld/ELF/LTO.cpp b/lld/ELF/LTO.cpp index 6da409568c8b..2148ac500291 100644 --- a/lld/ELF/LTO.cpp +++ b/lld/ELF/LTO.cpp @@ -93,6 +93,9 @@ static lto::Config createConfig() { c.MAttrs = getMAttrs(); c.CGOptLevel = args::getCGOptLevel(config->ltoo); + c.PTO.LoopVectorization = c.OptLevel > 1; + c.PTO.SLPVectorization = c.OptLevel > 1; + // Set up a custom pipeline if we've been asked to. c.OptPipeline = config->ltoNewPmPasses; c.AAPipeline = config->ltoAAPipeline; @@ -145,12 +148,12 @@ BitcodeCompiler::BitcodeCompiler() { config->ltoPartitions); // Initialize usedStartStop. - symtab->forEachSymbol([&](Symbol *sym) { + for (Symbol *sym : symtab->symbols()) { StringRef s = sym->getName(); for (StringRef prefix : {"__start_", "__stop_"}) if (s.startswith(prefix)) usedStartStop.insert(s.substr(prefix.size())); - }); + } } BitcodeCompiler::~BitcodeCompiler() = default; diff --git a/lld/ELF/LinkerScript.cpp b/lld/ELF/LinkerScript.cpp index cebbd89168be..57e0e1e8acbf 100644 --- a/lld/ELF/LinkerScript.cpp +++ b/lld/ELF/LinkerScript.cpp @@ -442,7 +442,7 @@ LinkerScript::computeInputSections(const InputSectionDescription *cmd) { } void LinkerScript::discard(InputSectionBase *s) { - if (s == in.shStrTab || s == mainPart->relaDyn || s == mainPart->relrDyn) + if (s == in.shStrTab || s == mainPart->relrDyn) error("discarding " + s->name + " section is not allowed"); // You can discard .hash and .gnu.hash sections by linker scripts. Since @@ -954,8 +954,6 @@ void LinkerScript::adjustSectionsBeforeSorting() { if (isEmpty && isDiscardable(*sec)) { sec->markDead(); cmd = nullptr; - } else if (!sec->isLive()) { - sec->markLive(); } } diff --git a/lld/ELF/LinkerScript.h b/lld/ELF/LinkerScript.h index 621b8baeaae6..25a14e08dade 100644 --- a/lld/ELF/LinkerScript.h +++ b/lld/ELF/LinkerScript.h @@ -116,7 +116,7 @@ struct SymbolAssignment : BaseCommand { unsigned size; }; -// Linker scripts allow additional constraints to be put on ouput sections. +// Linker scripts allow additional constraints to be put on output sections. // If an output section is marked as ONLY_IF_RO, the section is created // only if its input sections are read-only. Likewise, an output section // with ONLY_IF_RW is created if all input sections are RW. diff --git a/lld/ELF/MapFile.cpp b/lld/ELF/MapFile.cpp index 4d76e22f37f5..e5f5c4f4ff23 100644 --- a/lld/ELF/MapFile.cpp +++ b/lld/ELF/MapFile.cpp @@ -213,7 +213,7 @@ void writeMapFile() { } static void print(StringRef a, StringRef b) { - outs() << left_justify(a, 49) << " " << b << "\n"; + lld::outs() << left_justify(a, 49) << " " << b << "\n"; } // Output a cross reference table to stdout. This is for --cref. @@ -244,7 +244,7 @@ void writeCrossReferenceTable() { } // Print out a header. - outs() << "Cross Reference Table\n\n"; + lld::outs() << "Cross Reference Table\n\n"; print("Symbol", "File"); // Print out a table. diff --git a/lld/ELF/MarkLive.cpp b/lld/ELF/MarkLive.cpp index 02ab6d18e537..bb0105c28928 100644 --- a/lld/ELF/MarkLive.cpp +++ b/lld/ELF/MarkLive.cpp @@ -165,9 +165,11 @@ static bool isReserved(InputSectionBase *sec) { switch (sec->type) { case SHT_FINI_ARRAY: case SHT_INIT_ARRAY: - case SHT_NOTE: case SHT_PREINIT_ARRAY: return true; + case SHT_NOTE: + // SHT_NOTE sections in a group are subject to garbage collection. + return !sec->nextInSectionGroup; default: StringRef s = sec->name; return s.startswith(".ctors") || s.startswith(".dtors") || @@ -217,10 +219,9 @@ template <class ELFT> void MarkLive<ELFT>::run() { // Preserve externally-visible symbols if the symbols defined by this // file can interrupt other ELF file's symbols at runtime. - symtab->forEachSymbol([&](Symbol *sym) { + for (Symbol *sym : symtab->symbols()) if (sym->includeInDynsym() && sym->partition == partition) markSymbol(sym); - }); // If this isn't the main partition, that's all that we need to preserve. if (partition != 1) { @@ -283,6 +284,10 @@ template <class ELFT> void MarkLive<ELFT>::mark() { for (InputSectionBase *isec : sec.dependentSections) enqueue(isec, 0); + + // Mark the next group member. + if (sec.nextInSectionGroup) + enqueue(sec.nextInSectionGroup, 0); } } @@ -324,15 +329,14 @@ template <class ELFT> void markLive() { sec->markLive(); // If a DSO defines a symbol referenced in a regular object, it is needed. - symtab->forEachSymbol([](Symbol *sym) { + for (Symbol *sym : symtab->symbols()) if (auto *s = dyn_cast<SharedSymbol>(sym)) if (s->isUsedInRegularObj && !s->isWeak()) s->getFile().isNeeded = true; - }); return; } - // Otheriwse, do mark-sweep GC. + // Otherwise, do mark-sweep GC. // // The -gc-sections option works only for SHF_ALLOC sections // (sections that are memory-mapped at runtime). So we can @@ -353,12 +357,19 @@ template <class ELFT> void markLive() { // or -emit-reloc were given. And they are subject of garbage // collection because, if we remove a text section, we also // remove its relocation section. + // + // Note on nextInSectionGroup: The ELF spec says that group sections are + // included or omitted as a unit. We take the interpretation that: + // + // - Group members (nextInSectionGroup != nullptr) are subject to garbage + // collection. + // - Groups members are retained or discarded as a unit. for (InputSectionBase *sec : inputSections) { bool isAlloc = (sec->flags & SHF_ALLOC); bool isLinkOrder = (sec->flags & SHF_LINK_ORDER); bool isRel = (sec->type == SHT_REL || sec->type == SHT_RELA); - if (!isAlloc && !isLinkOrder && !isRel) + if (!isAlloc && !isLinkOrder && !isRel && !sec->nextInSectionGroup) sec->markLive(); } diff --git a/lld/ELF/Options.td b/lld/ELF/Options.td index c540efb25e90..ea78a3526211 100644 --- a/lld/ELF/Options.td +++ b/lld/ELF/Options.td @@ -59,6 +59,8 @@ defm Tdata: Eq<"Tdata", "Same as --section-start with .data as the sectionname"> defm Ttext: Eq<"Ttext", "Same as --section-start with .text as the sectionname">; +def Ttext_segment: Separate<["-", "--"], "Ttext-segment">; + defm allow_multiple_definition: B<"allow-multiple-definition", "Allow multiple definitions", "Do not allow multiple definitions (default)">; @@ -174,13 +176,6 @@ def fix_cortex_a53_843419: F<"fix-cortex-a53-843419">, def fix_cortex_a8: F<"fix-cortex-a8">, HelpText<"Apply fixes for ARM Cortex-A8 erratum 657417">; -// This option is intentionally hidden from the user as the implementation -// is not complete. -def require_cet: F<"require-cet">; - -def force_bti: F<"force-bti">, - HelpText<"Force enable AArch64 BTI in PLT, warn if Input ELF file does not have GNU_PROPERTY_AARCH64_FEATURE_1_BTI property">; - defm format: Eq<"format", "Change the input format of the inputs following this option">, MetaVarName<"[default,elf,binary]">; @@ -232,6 +227,10 @@ defm merge_exidx_entries: B<"merge-exidx-entries", "Enable merging .ARM.exidx entries (default)", "Disable merging .ARM.exidx entries">; +defm mmap_output_file: B<"mmap-output-file", + "Mmap the output file for writing (default)", + "Do not mmap the output file for writing">; + def nmagic: F<"nmagic">, MetaVarName<"<magic>">, HelpText<"Do not page align sections, link against static libraries.">; @@ -275,9 +274,6 @@ defm pack_dyn_relocs: Eq<"pack-dyn-relocs", "Pack dynamic relocations in the given format">, MetaVarName<"[none,android,relr,android+relr]">; -def pac_plt: F<"pac-plt">, - HelpText<"AArch64 only, use pointer authentication in PLT">; - defm use_android_relr_tags: B<"use-android-relr-tags", "Use SHT_ANDROID_RELR / DT_ANDROID_RELR* tags instead of SHT_RELR / DT_RELR*", "Use SHT_RELR / DT_RELR* tags (default)">; @@ -298,7 +294,7 @@ defm print_icf_sections: B<"print-icf-sections", "Do not list identical folded sections (default)">; defm print_symbol_order: Eq<"print-symbol-order", - "Print a symbol order specified by --call-graph-ordering-file into the speficied file">; + "Print a symbol order specified by --call-graph-ordering-file into the specified file">; def pop_state: F<"pop-state">, HelpText<"Undo the effect of -push-state">; @@ -420,7 +416,7 @@ def z: JoinedOrSeparate<["-"], "z">, MetaVarName<"<option>">, HelpText<"Linker option extensions">; def visual_studio_diagnostics_format : F<"vs-diagnostics">, -HelpText<"Format diagnostics for Visual Studio compatiblity">; +HelpText<"Format diagnostics for Visual Studio compatibility">; // Aliases def: Separate<["-"], "f">, Alias<auxiliary>, HelpText<"Alias for --auxiliary">; @@ -458,9 +454,8 @@ def: Flag<["-"], "(">, Alias<start_group>, HelpText<"Alias for --start-group">; def: Flag<["-"], "s">, Alias<strip_all>, HelpText<"Alias for --strip-all">; def: Flag<["-"], "S">, Alias<strip_debug>, HelpText<"Alias for --strip-debug">; def: Flag<["-"], "t">, Alias<trace>, HelpText<"Alias for --trace">; +def: Joined<["-", "--"], "Ttext-segment=">, Alias<Ttext_segment>; def: JoinedOrSeparate<["-"], "y">, Alias<trace_symbol>, HelpText<"Alias for --trace-symbol">; -def: Separate<["-", "--"], "Ttext-segment">, Alias<Ttext>, HelpText<"Alias for --Ttext">; -def: Joined<["-", "--"], "Ttext-segment=">, Alias<Ttext>, HelpText<"Alias for --Ttext">; def: JoinedOrSeparate<["-"], "u">, Alias<undefined>, HelpText<"Alias for --undefined">; def: Flag<["-"], "V">, Alias<version>, HelpText<"Alias for --version">; @@ -478,7 +473,7 @@ def lto_O: J<"lto-O">, MetaVarName<"<opt-level>">, def lto_partitions: J<"lto-partitions=">, HelpText<"Number of LTO codegen partitions">; def lto_cs_profile_generate: F<"lto-cs-profile-generate">, - HelpText<"Perform context senstive PGO instrumentation">; + HelpText<"Perform context sensitive PGO instrumentation">; def lto_cs_profile_file: J<"lto-cs-profile-file=">, HelpText<"Context sensitive profile file path">; def lto_obj_path_eq: J<"lto-obj-path=">; @@ -566,7 +561,6 @@ def: F<"no-add-needed">; def: F<"no-copy-dt-needed-entries">; def: F<"no-ctors-in-init-array">; def: F<"no-keep-memory">; -def: F<"no-mmap-output-file">; def: F<"no-pipeline-knowledge">; def: F<"no-warn-mismatch">; def: Flag<["-"], "p">; diff --git a/lld/ELF/OutputSections.cpp b/lld/ELF/OutputSections.cpp index ea7c96eb676a..6142cb0783ce 100644 --- a/lld/ELF/OutputSections.cpp +++ b/lld/ELF/OutputSections.cpp @@ -20,6 +20,7 @@ #include "llvm/Support/MD5.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/SHA1.h" +#include <regex> using namespace llvm; using namespace llvm::dwarf; @@ -88,7 +89,7 @@ static bool canMergeToProgbits(unsigned type) { // InputSection post finalizeInputSections(), then you must do the following: // // 1. Find or create an InputSectionDescription to hold InputSection. -// 2. Add the InputSection to the InputSectionDesciption::sections. +// 2. Add the InputSection to the InputSectionDescription::sections. // 3. Call commitSection(isec). void OutputSection::recordSection(InputSectionBase *isec) { partition = isec->partition; @@ -271,7 +272,12 @@ template <class ELFT> void OutputSection::maybeCompress() { // Write section contents to a temporary buffer and compress it. std::vector<uint8_t> buf(size); writeTo<ELFT>(buf.data()); - if (Error e = zlib::compress(toStringRef(buf), compressedData)) + // We chose 1 as the default compression level because it is the fastest. If + // -O2 is given, we use level 6 to compress debug info more by ~15%. We found + // that level 7 to 9 doesn't make much difference (~1% more compression) while + // they take significant amount of time (~2x), so level 6 seems enough. + if (Error e = zlib::compress(toStringRef(buf), compressedData, + config->optimize >= 2 ? 6 : 1)) fatal("compress failed: " + llvm::toString(std::move(e))); // Update section headers. @@ -296,7 +302,7 @@ template <class ELFT> void OutputSection::writeTo(uint8_t *buf) { if (type == SHT_NOBITS) return; - // If -compress-debug-section is specified and if this is a debug seciton, + // If -compress-debug-section is specified and if this is a debug section, // we've already compressed section contents. If that's the case, // just write it down. if (!compressedData.empty()) { @@ -384,18 +390,23 @@ void OutputSection::finalize() { flags |= SHF_INFO_LINK; } -// Returns true if S matches /Filename.?\.o$/. -static bool isCrtBeginEnd(StringRef s, StringRef filename) { - if (!s.endswith(".o")) - return false; - s = s.drop_back(2); - if (s.endswith(filename)) - return true; - return !s.empty() && s.drop_back().endswith(filename); +// Returns true if S is in one of the many forms the compiler driver may pass +// crtbegin files. +// +// Gcc uses any of crtbegin[<empty>|S|T].o. +// Clang uses Gcc's plus clang_rt.crtbegin[<empty>|S|T][-<arch>|<empty>].o. + +static bool isCrtbegin(StringRef s) { + static std::regex re(R"((clang_rt\.)?crtbegin[ST]?(-.*)?\.o)"); + s = sys::path::filename(s); + return std::regex_match(s.begin(), s.end(), re); } -static bool isCrtbegin(StringRef s) { return isCrtBeginEnd(s, "crtbegin"); } -static bool isCrtend(StringRef s) { return isCrtBeginEnd(s, "crtend"); } +static bool isCrtend(StringRef s) { + static std::regex re(R"((clang_rt\.)?crtend[ST]?(-.*)?\.o)"); + s = sys::path::filename(s); + return std::regex_match(s.begin(), s.end(), re); +} // .ctors and .dtors are sorted by this priority from highest to lowest. // diff --git a/lld/ELF/Relocations.cpp b/lld/ELF/Relocations.cpp index ab3030d91017..4731554e0c0d 100644 --- a/lld/ELF/Relocations.cpp +++ b/lld/ELF/Relocations.cpp @@ -53,6 +53,7 @@ #include "lld/Common/Memory.h" #include "lld/Common/Strings.h" #include "llvm/ADT/SmallSet.h" +#include "llvm/Demangle/Demangle.h" #include "llvm/Support/Endian.h" #include "llvm/Support/raw_ostream.h" #include <algorithm> @@ -176,7 +177,9 @@ handleTlsRelocation(RelType type, Symbol &sym, InputSectionBase &c, return 1; } - bool canRelax = config->emachine != EM_ARM && config->emachine != EM_RISCV; + bool canRelax = config->emachine != EM_ARM && + config->emachine != EM_HEXAGON && + config->emachine != EM_RISCV; // If we are producing an executable and the symbol is non-preemptable, it // must be defined and the code sequence can be relaxed to use Local-Exec. @@ -374,8 +377,8 @@ static bool isStaticLinkTimeConstant(RelExpr e, RelType type, const Symbol &sym, R_AARCH64_GOT_PAGE_PC, R_GOT_PC, R_GOTONLY_PC, R_GOTPLTONLY_PC, R_PLT_PC, R_TLSGD_GOT, R_TLSGD_GOTPLT, R_TLSGD_PC, R_PPC32_PLTREL, R_PPC64_CALL_PLT, R_PPC64_RELAX_TOC, R_RISCV_ADD, R_TLSDESC_CALL, - R_TLSDESC_PC, R_AARCH64_TLSDESC_PAGE, R_HINT, R_TLSLD_HINT, - R_TLSIE_HINT>(e)) + R_TLSDESC_PC, R_AARCH64_TLSDESC_PAGE, R_TLSLD_HINT, R_TLSIE_HINT>( + e)) return true; // These never do, except if the entire file is position dependent or if @@ -403,17 +406,7 @@ static bool isStaticLinkTimeConstant(RelExpr e, RelType type, const Symbol &sym, if (!absVal && !relE) return target->usesOnlyLowPageBits(type); - // Relative relocation to an absolute value. This is normally unrepresentable, - // but if the relocation refers to a weak undefined symbol, we allow it to - // resolve to the image base. This is a little strange, but it allows us to - // link function calls to such symbols. Normally such a call will be guarded - // with a comparison, which will load a zero from the GOT. - // Another special case is MIPS _gp_disp symbol which represents offset - // between start of a function and '_gp' value and defined as absolute just - // to simplify the code. assert(absVal && relE); - if (sym.isUndefWeak()) - return true; // We set the final symbols values for linker script defined symbols later. // They always can be computed as a link time constant. @@ -508,7 +501,6 @@ static void replaceWithDefined(Symbol &sym, SectionBase *sec, uint64_t value, sym.pltIndex = old.pltIndex; sym.gotIndex = old.gotIndex; sym.verdefIndex = old.verdefIndex; - sym.ppc64BranchltIndex = old.ppc64BranchltIndex; sym.exportDynamic = true; sym.isUsedInRegularObj = true; } @@ -538,7 +530,7 @@ static void replaceWithDefined(Symbol &sym, SectionBase *sec, uint64_t value, // // As you can see in this function, we create a copy relocation for the // dynamic linker, and the relocation contains not only symbol name but -// various other informtion about the symbol. So, such attributes become a +// various other information about the symbol. So, such attributes become a // part of the ABI. // // Note for application developers: I can give you a piece of advice if @@ -553,7 +545,7 @@ static void replaceWithDefined(Symbol &sym, SectionBase *sec, uint64_t value, // reserved in .bss unless you recompile the main program. That means they // are likely to overlap with other data that happens to be laid out next // to the variable in .bss. This kind of issue is sometimes very hard to -// debug. What's a solution? Instead of exporting a varaible V from a DSO, +// debug. What's a solution? Instead of exporting a variable V from a DSO, // define an accessor getV(). template <class ELFT> static void addCopyRelSymbol(SharedSymbol &ss) { // Copy relocation against zero-sized symbol doesn't make sense. @@ -696,13 +688,37 @@ struct UndefinedDiag { static std::vector<UndefinedDiag> undefs; +// Check whether the definition name def is a mangled function name that matches +// the reference name ref. +static bool canSuggestExternCForCXX(StringRef ref, StringRef def) { + llvm::ItaniumPartialDemangler d; + std::string name = def.str(); + if (d.partialDemangle(name.c_str())) + return false; + char *buf = d.getFunctionName(nullptr, nullptr); + if (!buf) + return false; + bool ret = ref == buf; + free(buf); + return ret; +} + // Suggest an alternative spelling of an "undefined symbol" diagnostic. Returns // the suggested symbol, which is either in the symbol table, or in the same // file of sym. -static const Symbol *getAlternativeSpelling(const Undefined &sym) { - // Build a map of local defined symbols. +template <class ELFT> +static const Symbol *getAlternativeSpelling(const Undefined &sym, + std::string &pre_hint, + std::string &post_hint) { DenseMap<StringRef, const Symbol *> map; - if (sym.file && !isa<SharedFile>(sym.file)) { + if (auto *file = dyn_cast_or_null<ObjFile<ELFT>>(sym.file)) { + // If sym is a symbol defined in a discarded section, maybeReportDiscarded() + // will give an error. Don't suggest an alternative spelling. + if (file && sym.discardedSecIdx != 0 && + file->getSections()[sym.discardedSecIdx] == &InputSection::discarded) + return nullptr; + + // Build a map of local defined symbols. for (const Symbol *s : sym.file->getSymbols()) if (s->isLocal() && s->isDefined()) map.try_emplace(s->getName(), s); @@ -759,6 +775,48 @@ static const Symbol *getAlternativeSpelling(const Undefined &sym) { return s; } + // Case mismatch, e.g. Foo vs FOO. + for (auto &it : map) + if (name.equals_lower(it.first)) + return it.second; + for (Symbol *sym : symtab->symbols()) + if (!sym->isUndefined() && name.equals_lower(sym->getName())) + return sym; + + // The reference may be a mangled name while the definition is not. Suggest a + // missing extern "C". + if (name.startswith("_Z")) { + std::string buf = name.str(); + llvm::ItaniumPartialDemangler d; + if (!d.partialDemangle(buf.c_str())) + if (char *buf = d.getFunctionName(nullptr, nullptr)) { + const Symbol *s = suggest(buf); + free(buf); + if (s) { + pre_hint = ": extern \"C\" "; + return s; + } + } + } else { + const Symbol *s = nullptr; + for (auto &it : map) + if (canSuggestExternCForCXX(name, it.first)) { + s = it.second; + break; + } + if (!s) + for (Symbol *sym : symtab->symbols()) + if (canSuggestExternCForCXX(name, sym->getName())) { + s = sym; + break; + } + if (s) { + pre_hint = " to declare "; + post_hint = " as extern \"C\"?"; + return s; + } + } + return nullptr; } @@ -804,13 +862,15 @@ static void reportUndefinedSymbol(const UndefinedDiag &undef, msg += ("\n>>> referenced " + Twine(undef.locs.size() - i) + " more times") .str(); - if (correctSpelling) - if (const Symbol *corrected = - getAlternativeSpelling(cast<Undefined>(sym))) { - msg += "\n>>> did you mean: " + toString(*corrected); + if (correctSpelling) { + std::string pre_hint = ": ", post_hint; + if (const Symbol *corrected = getAlternativeSpelling<ELFT>( + cast<Undefined>(sym), pre_hint, post_hint)) { + msg += "\n>>> did you mean" + pre_hint + toString(*corrected) + post_hint; if (corrected->file) msg += "\n>>> defined in: " + toString(corrected->file); } + } if (sym.getName().startswith("_ZTV")) msg += "\nthe vtable symbol may be undefined because the class is missing " @@ -950,10 +1010,10 @@ static void addRelativeReloc(InputSectionBase *isec, uint64_t offsetInSec, expr, type); } -template <class ELFT, class GotPltSection> +template <class PltSection, class GotPltSection> static void addPltEntry(PltSection *plt, GotPltSection *gotPlt, RelocationBaseSection *rel, RelType type, Symbol &sym) { - plt->addEntry<ELFT>(sym); + plt->addEntry(sym); gotPlt->addEntry(sym); rel->addReloc( {type, gotPlt, sym.getGotPltOffset(), !sym.isPreemptible, &sym, 0}); @@ -1129,7 +1189,7 @@ static void processRelocAux(InputSectionBase &sec, RelExpr expr, RelType type, "' cannot be preempted; recompile with -fPIE" + getLocation(sec, sym, offset)); if (!sym.isInPlt()) - addPltEntry<ELFT>(in.plt, in.gotPlt, in.relaPlt, target->pltRel, sym); + addPltEntry(in.plt, in.gotPlt, in.relaPlt, target->pltRel, sym); if (!sym.isDefined()) replaceWithDefined( sym, in.plt, @@ -1190,8 +1250,8 @@ static void scanReloc(InputSectionBase &sec, OffsetGetter &getOffset, RelTy *&i, const uint8_t *relocatedAddr = sec.data().begin() + rel.r_offset; RelExpr expr = target->getRelExpr(type, sym, relocatedAddr); - // Ignore "hint" relocations because they are only markers for relaxation. - if (oneof<R_HINT, R_NONE>(expr)) + // Ignore R_*_NONE and other marker relocations. + if (expr == R_NONE) return; // We can separate the small code model relocations into 2 categories: @@ -1222,9 +1282,9 @@ static void scanReloc(InputSectionBase &sec, OffsetGetter &getOffset, RelTy *&i, // // If we know that a PLT entry will be resolved within the same ELF module, we // can skip PLT access and directly jump to the destination function. For - // example, if we are linking a main exectuable, all dynamic symbols that can + // example, if we are linking a main executable, all dynamic symbols that can // be resolved within the executable will actually be resolved that way at - // runtime, because the main exectuable is always at the beginning of a search + // runtime, because the main executable is always at the beginning of a search // list. We can leverage that fact. if (!sym.isPreemptible && (!sym.isGnuIFunc() || config->zIfuncNoplt)) { if (expr == R_GOT_PC && !isAbsoluteValue(sym)) { @@ -1270,7 +1330,7 @@ static void scanReloc(InputSectionBase &sec, OffsetGetter &getOffset, RelTy *&i, if (!sym.isGnuIFunc() || sym.isPreemptible) { // If a relocation needs PLT, we create PLT and GOTPLT slots for the symbol. if (needsPlt(expr) && !sym.isInPlt()) - addPltEntry<ELFT>(in.plt, in.gotPlt, in.relaPlt, target->pltRel, sym); + addPltEntry(in.plt, in.gotPlt, in.relaPlt, target->pltRel, sym); // Create a GOT slot if a relocation needs GOT. if (needsGot(expr)) { @@ -1340,8 +1400,8 @@ static void scanReloc(InputSectionBase &sec, OffsetGetter &getOffset, RelTy *&i, // that's really needed to create the IRELATIVE is the section and value, // so ideally we should just need to copy those. auto *directSym = make<Defined>(cast<Defined>(sym)); - addPltEntry<ELFT>(in.iplt, in.igotPlt, in.relaIplt, target->iRelativeRel, - *directSym); + addPltEntry(in.iplt, in.igotPlt, in.relaIplt, target->iRelativeRel, + *directSym); sym.pltIndex = directSym->pltIndex; } if (needsGot(expr)) { @@ -1354,13 +1414,9 @@ static void scanReloc(InputSectionBase &sec, OffsetGetter &getOffset, RelTy *&i, } else if (!needsPlt(expr)) { // Make the ifunc's PLT entry canonical by changing the value of its // symbol to redirect all references to point to it. - unsigned entryOffset = sym.pltIndex * target->pltEntrySize; - if (config->zRetpolineplt) - entryOffset += target->pltHeaderSize; - auto &d = cast<Defined>(sym); d.section = in.iplt; - d.value = entryOffset; + d.value = sym.pltIndex * target->ipltEntrySize; d.size = 0; // It's important to set the symbol type here so that dynamic loaders // don't try to call the PLT as if it were an ifunc resolver. @@ -1555,7 +1611,7 @@ void ThunkCreator::mergeThunks(ArrayRef<OutputSection *> outputSections) { // those inserted in previous passes. Extract the Thunks created this // pass and order them in ascending outSecOff. std::vector<ThunkSection *> newThunks; - for (const std::pair<ThunkSection *, uint32_t> ts : isd->thunkSections) + for (std::pair<ThunkSection *, uint32_t> ts : isd->thunkSections) if (ts.second == pass) newThunks.push_back(ts.first); llvm::stable_sort(newThunks, @@ -1701,23 +1757,43 @@ static bool isThunkSectionCompatible(InputSection *source, return true; } +static int64_t getPCBias(RelType type) { + if (config->emachine != EM_ARM) + return 0; + switch (type) { + case R_ARM_THM_JUMP19: + case R_ARM_THM_JUMP24: + case R_ARM_THM_CALL: + return 4; + default: + return 8; + } +} + std::pair<Thunk *, bool> ThunkCreator::getThunk(InputSection *isec, Relocation &rel, uint64_t src) { std::vector<Thunk *> *thunkVec = nullptr; + int64_t addend = rel.addend + getPCBias(rel.type); - // We use (section, offset) pair to find the thunk position if possible so - // that we create only one thunk for aliased symbols or ICFed sections. + // We use a ((section, offset), addend) pair to find the thunk position if + // possible so that we create only one thunk for aliased symbols or ICFed + // sections. There may be multiple relocations sharing the same (section, + // offset + addend) pair. We may revert the relocation back to its original + // non-Thunk target, so we cannot fold offset + addend. if (auto *d = dyn_cast<Defined>(rel.sym)) if (!d->isInPlt() && d->section) - thunkVec = &thunkedSymbolsBySection[{d->section->repl, d->value}]; + thunkVec = &thunkedSymbolsBySectionAndAddend[{ + {d->section->repl, d->value}, addend}]; if (!thunkVec) - thunkVec = &thunkedSymbols[rel.sym]; + thunkVec = &thunkedSymbols[{rel.sym, addend}]; // Check existing Thunks for Sym to see if they can be reused for (Thunk *t : *thunkVec) if (isThunkSectionCompatible(isec, t->getThunkTargetSym()->section) && t->isCompatibleWith(*isec, rel) && - target->inBranchRange(rel.type, src, t->getThunkTargetSym()->getVA())) + target->inBranchRange(rel.type, src, + t->getThunkTargetSym()->getVA(rel.addend) + + getPCBias(rel.type))) return std::make_pair(t, false); // No existing compatible Thunk in range, create a new one @@ -1732,9 +1808,13 @@ std::pair<Thunk *, bool> ThunkCreator::getThunk(InputSection *isec, // relocation back to its original non-Thunk target. bool ThunkCreator::normalizeExistingThunk(Relocation &rel, uint64_t src) { if (Thunk *t = thunks.lookup(rel.sym)) { - if (target->inBranchRange(rel.type, src, rel.sym->getVA())) + if (target->inBranchRange(rel.type, src, + rel.sym->getVA(rel.addend) + getPCBias(rel.type))) return true; rel.sym = &t->destination; + // TODO Restore addend on all targets. + if (config->emachine == EM_AARCH64 || config->emachine == EM_PPC64) + rel.addend = t->addend; if (rel.sym->isInPlt()) rel.expr = toPlt(rel.expr); } @@ -1790,7 +1870,7 @@ bool ThunkCreator::createThunks(ArrayRef<OutputSection *> outputSections) { continue; if (!target->needsThunk(rel.expr, rel.type, isec->file, src, - *rel.sym)) + *rel.sym, rel.addend)) continue; Thunk *t; @@ -1812,9 +1892,15 @@ bool ThunkCreator::createThunks(ArrayRef<OutputSection *> outputSections) { rel.sym = t->getThunkTargetSym(); rel.expr = fromPlt(rel.expr); + // On AArch64 and PPC64, a jump/call relocation may be encoded as + // STT_SECTION + non-zero addend, clear the addend after + // redirection. + // // The addend of R_PPC_PLTREL24 should be ignored after changing to // R_PC. - if (config->emachine == EM_PPC && rel.type == R_PPC_PLTREL24) + if (config->emachine == EM_AARCH64 || + config->emachine == EM_PPC64 || + (config->emachine == EM_PPC && rel.type == R_PPC_PLTREL24)) rel.addend = 0; } diff --git a/lld/ELF/Relocations.h b/lld/ELF/Relocations.h index befe15b8f3b9..bfec1e628851 100644 --- a/lld/ELF/Relocations.h +++ b/lld/ELF/Relocations.h @@ -40,7 +40,6 @@ enum RelExpr { R_GOTPLT, R_GOTPLTREL, R_GOTREL, - R_HINT, R_NEG_TLS, R_NONE, R_PC, @@ -115,8 +114,6 @@ template <class ELFT> void scanRelocations(InputSectionBase &); template <class ELFT> void reportUndefinedSymbols(); -void addIRelativeRelocs(); - class ThunkSection; class Thunk; struct InputSectionDescription; @@ -150,10 +147,17 @@ private: bool normalizeExistingThunk(Relocation &rel, uint64_t src); - // Record all the available Thunks for a Symbol - llvm::DenseMap<std::pair<SectionBase *, uint64_t>, std::vector<Thunk *>> - thunkedSymbolsBySection; - llvm::DenseMap<Symbol *, std::vector<Thunk *>> thunkedSymbols; + // Record all the available Thunks for a (Symbol, addend) pair, where Symbol + // is represented as a (section, offset) pair. There may be multiple + // relocations sharing the same (section, offset + addend) pair. We may revert + // a relocation back to its original non-Thunk target, and restore the + // original addend, so we cannot fold offset + addend. A nested pair is used + // because DenseMapInfo is not specialized for std::tuple. + llvm::DenseMap<std::pair<std::pair<SectionBase *, uint64_t>, int64_t>, + std::vector<Thunk *>> + thunkedSymbolsBySectionAndAddend; + llvm::DenseMap<std::pair<Symbol *, int64_t>, std::vector<Thunk *>> + thunkedSymbols; // Find a Thunk from the Thunks symbol definition, we can use this to find // the Thunk from a relocation to the Thunks symbol definition. diff --git a/lld/ELF/SymbolTable.cpp b/lld/ELF/SymbolTable.cpp index 5f6008ef908b..f7a8a99cf8f9 100644 --- a/lld/ELF/SymbolTable.cpp +++ b/lld/ELF/SymbolTable.cpp @@ -215,7 +215,7 @@ void SymbolTable::assignExactVersion(SymbolVersion ver, uint16_t versionId, } void SymbolTable::assignWildcardVersion(SymbolVersion ver, uint16_t versionId) { - // Exact matching takes precendence over fuzzy matching, + // Exact matching takes precedence over fuzzy matching, // so we set a version to a symbol only if no version has been assigned // to the symbol. This behavior is compatible with GNU. for (Symbol *sym : findAllByVersion(ver)) diff --git a/lld/ELF/SymbolTable.h b/lld/ELF/SymbolTable.h index d3be0cb6450f..507af8d2be75 100644 --- a/lld/ELF/SymbolTable.h +++ b/lld/ELF/SymbolTable.h @@ -32,15 +32,19 @@ namespace elf { // add*() functions, which are called by input files as they are parsed. There // is one add* function per symbol type. class SymbolTable { -public: - void wrap(Symbol *sym, Symbol *real, Symbol *wrap); + struct FilterOutPlaceholder { + bool operator()(Symbol *S) const { return !S->isPlaceholder(); } + }; + using iterator = llvm::filter_iterator<std::vector<Symbol *>::const_iterator, + FilterOutPlaceholder>; - void forEachSymbol(llvm::function_ref<void(Symbol *)> fn) { - for (Symbol *sym : symVector) - if (!sym->isPlaceholder()) - fn(sym); +public: + llvm::iterator_range<iterator> symbols() const { + return llvm::make_filter_range(symVector, FilterOutPlaceholder()); } + void wrap(Symbol *sym, Symbol *real, Symbol *wrap); + Symbol *insert(StringRef name); Symbol *addSymbol(const Symbol &newSym); diff --git a/lld/ELF/Symbols.cpp b/lld/ELF/Symbols.cpp index c0cba21cfe8d..f0f6121009a5 100644 --- a/lld/ELF/Symbols.cpp +++ b/lld/ELF/Symbols.cpp @@ -162,15 +162,12 @@ uint64_t Symbol::getGotPltOffset() const { return (pltIndex + target->gotPltHeaderEntriesNum) * config->wordsize; } -uint64_t Symbol::getPPC64LongBranchOffset() const { - assert(ppc64BranchltIndex != 0xffff); - return ppc64BranchltIndex * config->wordsize; -} - uint64_t Symbol::getPltVA() const { - PltSection *plt = isInIplt ? in.iplt : in.plt; - uint64_t outVA = - plt->getVA() + plt->headerSize + pltIndex * target->pltEntrySize; + uint64_t outVA = isInIplt + ? in.iplt->getVA() + pltIndex * target->ipltEntrySize + : in.plt->getVA() + in.plt->headerSize + + pltIndex * target->pltEntrySize; + // While linking microMIPS code PLT code are always microMIPS // code. Set the less-significant bit to track that fact. // See detailed comment in the `getSymVA` function. @@ -179,12 +176,6 @@ uint64_t Symbol::getPltVA() const { return outVA; } -uint64_t Symbol::getPPC64LongBranchTableVA() const { - assert(ppc64BranchltIndex != 0xffff); - return in.ppc64LongBranchTarget->getVA() + - ppc64BranchltIndex * config->wordsize; -} - uint64_t Symbol::getSize() const { if (const auto *dr = dyn_cast<Defined>(this)) return dr->size; @@ -286,13 +277,10 @@ bool Symbol::includeInDynsym() const { return false; if (computeBinding() == STB_LOCAL) return false; + if (!isDefined() && !isCommon()) + return true; - // If a PIE binary was not linked against any shared libraries, then we can - // safely drop weak undef symbols from .dynsym. - if (isUndefWeak() && config->pie && sharedFiles.empty()) - return false; - - return isUndefined() || isShared() || exportDynamic || inDynamicList; + return exportDynamic || inDynamicList; } // Print out a log message for --trace-symbol. @@ -342,6 +330,34 @@ void maybeWarnUnorderableSymbol(const Symbol *sym) { report(": unable to order discarded symbol: "); } +// Returns true if a symbol can be replaced at load-time by a symbol +// with the same name defined in other ELF executable or DSO. +bool computeIsPreemptible(const Symbol &sym) { + assert(!sym.isLocal()); + + // Only symbols with default visibility that appear in dynsym can be + // preempted. Symbols with protected visibility cannot be preempted. + if (!sym.includeInDynsym() || sym.visibility != STV_DEFAULT) + return false; + + // At this point copy relocations have not been created yet, so any + // symbol that is not defined locally is preemptible. + if (!sym.isDefined()) + return true; + + if (!config->shared) + return false; + + // If the dynamic list is present, it specifies preemptable symbols in a DSO. + if (config->hasDynamicList) + return sym.inDynamicList; + + // -Bsymbolic means that definitions are not preempted. + if (config->bsymbolic || (config->bsymbolicFunctions && sym.isFunc())) + return false; + return true; +} + static uint8_t getMinVisibility(uint8_t va, uint8_t vb) { if (va == STV_DEFAULT) return vb; @@ -603,7 +619,18 @@ void Symbol::resolveCommon(const CommonSymbol &other) { return; if (cmp > 0) { - replace(other); + if (auto *s = dyn_cast<SharedSymbol>(this)) { + // Increase st_size if the shared symbol has a larger st_size. The shared + // symbol may be created from common symbols. The fact that some object + // files were linked into a shared object first should not change the + // regular rule that picks the largest st_size. + uint64_t size = s->size; + replace(other); + if (size > cast<CommonSymbol>(this)->size) + cast<CommonSymbol>(this)->size = size; + } else { + replace(other); + } return; } @@ -644,6 +671,12 @@ template <class LazyT> void Symbol::resolveLazy(const LazyT &other) { } void Symbol::resolveShared(const SharedSymbol &other) { + if (isCommon()) { + // See the comment in resolveCommon() above. + if (other.size > cast<CommonSymbol>(this)->size) + cast<CommonSymbol>(this)->size = other.size; + return; + } if (visibility == STV_DEFAULT && (isUndefined() || isLazy())) { // An undefined symbol with non default visibility must be satisfied // in the same DSO. diff --git a/lld/ELF/Symbols.h b/lld/ELF/Symbols.h index d43568fe295c..ac606198afd8 100644 --- a/lld/ELF/Symbols.h +++ b/lld/ELF/Symbols.h @@ -87,9 +87,6 @@ public: // Version definition index. uint16_t versionId; - // An index into the .branch_lt section on PPC64. - uint16_t ppc64BranchltIndex = -1; - // Symbol binding. This is not overwritten by replace() to track // changes during resolution. In particular: // - An undefined weak is still weak when it resolves to a shared library. @@ -105,13 +102,13 @@ public: // Symbol visibility. This is the computed minimum visibility of all // observed non-DSO symbols. - unsigned visibility : 2; + uint8_t visibility : 2; // True if the symbol was used for linking and thus need to be added to the // output file's symbol table. This is true for all symbols except for // unreferenced DSO symbols, lazy (archive) symbols, and bitcode symbols that // are unreferenced except by other bitcode objects. - unsigned isUsedInRegularObj : 1; + uint8_t isUsedInRegularObj : 1; // Used by a Defined symbol with protected or default visibility, to record // whether it is required to be exported into .dynsym. This is set when any of @@ -121,25 +118,25 @@ public: // - If -shared or --export-dynamic is specified, any symbol in an object // file/bitcode sets this property, unless suppressed by LTO // canBeOmittedFromSymbolTable(). - unsigned exportDynamic : 1; + uint8_t exportDynamic : 1; // True if the symbol is in the --dynamic-list file. A Defined symbol with // protected or default visibility with this property is required to be // exported into .dynsym. - unsigned inDynamicList : 1; + uint8_t inDynamicList : 1; // False if LTO shouldn't inline whatever this symbol points to. If a symbol // is overwritten after LTO, LTO shouldn't inline the symbol because it // doesn't know the final contents of the symbol. - unsigned canInline : 1; + uint8_t canInline : 1; // Used by Undefined and SharedSymbol to track if there has been at least one // undefined reference to the symbol. The binding may change to STB_WEAK if // the first undefined reference from a non-shared object is weak. - unsigned referenced : 1; + uint8_t referenced : 1; // True if this symbol is specified by --trace-symbol option. - unsigned traced : 1; + uint8_t traced : 1; inline void replace(const Symbol &newSym); @@ -181,7 +178,6 @@ public: bool isInGot() const { return gotIndex != -1U; } bool isInPlt() const { return pltIndex != -1U; } - bool isInPPC64Branchlt() const { return ppc64BranchltIndex != 0xffff; } uint64_t getVA(int64_t addend = 0) const; @@ -190,8 +186,6 @@ public: uint64_t getGotPltOffset() const; uint64_t getGotPltVA() const; uint64_t getPltVA() const; - uint64_t getPPC64LongBranchTableVA() const; - uint64_t getPPC64LongBranchOffset() const; uint64_t getSize() const; OutputSection *getOutputSection() const; @@ -248,28 +242,28 @@ protected: public: // True the symbol should point to its PLT entry. // For SharedSymbol only. - unsigned needsPltAddr : 1; + uint8_t needsPltAddr : 1; // True if this symbol is in the Iplt sub-section of the Plt and the Igot // sub-section of the .got.plt or .got. - unsigned isInIplt : 1; + uint8_t isInIplt : 1; // True if this symbol needs a GOT entry and its GOT entry is actually in // Igot. This will be true only for certain non-preemptible ifuncs. - unsigned gotInIgot : 1; + uint8_t gotInIgot : 1; // True if this symbol is preemptible at load time. - unsigned isPreemptible : 1; + uint8_t isPreemptible : 1; // True if an undefined or shared symbol is used from a live section. - unsigned used : 1; + uint8_t used : 1; // True if a call to this symbol needs to be followed by a restore of the // PPC64 toc pointer. - unsigned needsTocRestore : 1; + uint8_t needsTocRestore : 1; // True if this symbol is defined by a linker script. - unsigned scriptDefined : 1; + uint8_t scriptDefined : 1; // The partition whose dynamic symbol table contains this symbol's definition. uint8_t partition = 1; @@ -310,7 +304,7 @@ public: // definitions for this particular case. // // Common symbols represent variable definitions without initializations. -// The compiler creates common symbols when it sees varaible definitions +// The compiler creates common symbols when it sees variable definitions // without initialization (you can suppress this behavior and let the // compiler create a regular defined symbol by -fno-common). // @@ -560,6 +554,8 @@ void Symbol::replace(const Symbol &newSym) { } void maybeWarnUnorderableSymbol(const Symbol *sym); +bool computeIsPreemptible(const Symbol &sym); + } // namespace elf } // namespace lld diff --git a/lld/ELF/SyntheticSections.cpp b/lld/ELF/SyntheticSections.cpp index ff35bb7bd10c..550a5b38b89b 100644 --- a/lld/ELF/SyntheticSections.cpp +++ b/lld/ELF/SyntheticSections.cpp @@ -298,8 +298,8 @@ static size_t getHashSize() { // sets is empty, or some input files didn't have .note.gnu.property sections), // we don't create this section. GnuPropertySection::GnuPropertySection() - : SyntheticSection(llvm::ELF::SHF_ALLOC, llvm::ELF::SHT_NOTE, 4, - ".note.gnu.property") {} + : SyntheticSection(llvm::ELF::SHF_ALLOC, llvm::ELF::SHT_NOTE, + config->wordsize, ".note.gnu.property") {} void GnuPropertySection::writeTo(uint8_t *buf) { uint32_t featureAndType = config->emachine == EM_AARCH64 @@ -893,7 +893,7 @@ void MipsGotSection::build() { std::swap(gots, mergedGots); // Reduce number of "reloc-only" entries in the primary GOT - // by substracting "global" entries exist in the primary GOT. + // by subtracting "global" entries in the primary GOT. primGot = &gots.front(); primGot->relocs.remove_if([&](const std::pair<Symbol *, size_t> &p) { return primGot->global.count(p.first); @@ -1055,7 +1055,7 @@ void MipsGotSection::writeTo(uint8_t *buf) { // Write VA to the primary GOT only. For secondary GOTs that // will be done by REL32 dynamic relocations. if (&g == &gots.front()) - for (const std::pair<const Symbol *, size_t> &p : g.global) + for (const std::pair<Symbol *, size_t> &p : g.global) write(p.second, p.first, 0); for (const std::pair<Symbol *, size_t> &p : g.relocs) write(p.second, p.first, 0); @@ -1079,7 +1079,7 @@ void MipsGotSection::writeTo(uint8_t *buf) { // On PowerPC the .plt section is used to hold the table of function addresses // instead of the .got.plt, and the type is SHT_NOBITS similar to a .bss // section. I don't know why we have a BSS style type for the section but it is -// consitent across both 64-bit PowerPC ABIs as well as the 32-bit PowerPC ABI. +// consistent across both 64-bit PowerPC ABIs as well as the 32-bit PowerPC ABI. GotPltSection::GotPltSection() : SyntheticSection(SHF_ALLOC | SHF_WRITE, SHT_PROGBITS, config->wordsize, ".got.plt") { @@ -1333,7 +1333,7 @@ template <class ELFT> void DynamicSection<ELFT>::finalizeContents() { if (dtFlags1) addInt(DT_FLAGS_1, dtFlags1); - // DT_DEBUG is a pointer to debug informaion used by debuggers at runtime. We + // DT_DEBUG is a pointer to debug information used by debuggers at runtime. We // need it for each process, so we don't write it for DSOs. The loader writes // the pointer into this entry. // @@ -1378,7 +1378,7 @@ template <class ELFT> void DynamicSection<ELFT>::finalizeContents() { // iplt relocations. It is possible to have only iplt relocations in the // output. In that case relaPlt is empty and have zero offset, the same offset // as relaIplt has. And we still want to emit proper dynamic tags for that - // case, so here we always use relaPlt as marker for the begining of + // case, so here we always use relaPlt as marker for the beginning of // .rel[a].plt section. if (isMain && (in.relaPlt->isNeeded() || in.relaIplt->isNeeded())) { addInSec(DT_JMPREL, in.relaPlt); @@ -1437,14 +1437,13 @@ template <class ELFT> void DynamicSection<ELFT>::finalizeContents() { addSym(DT_FINI, b); } - bool hasVerNeed = SharedFile::vernauxNum != 0; - if (hasVerNeed || part.verDef) + if (part.verSym && part.verSym->isNeeded()) addInSec(DT_VERSYM, part.verSym); - if (part.verDef) { + if (part.verDef && part.verDef->isLive()) { addInSec(DT_VERDEF, part.verDef); addInt(DT_VERDEFNUM, getVerDefNum()); } - if (hasVerNeed) { + if (part.verNeed && part.verNeed->isNeeded()) { addInSec(DT_VERNEED, part.verNeed); unsigned needNum = 0; for (SharedFile *f : sharedFiles) @@ -2265,7 +2264,7 @@ size_t SymtabShndxSection::getSize() const { // .gnu.hash has a bloom filter in addition to a hash table to skip // DSOs very quickly. If you are sure that your dynamic linker knows // about .gnu.hash, you want to specify -hash-style=gnu. Otherwise, a -// safe bet is to specify -hash-style=both for backward compatibilty. +// safe bet is to specify -hash-style=both for backward compatibility. GnuHashTableSection::GnuHashTableSection() : SyntheticSection(SHF_ALLOC, SHT_GNU_HASH, config->wordsize, ".gnu.hash") { } @@ -2443,16 +2442,21 @@ void HashTableSection::writeTo(uint8_t *buf) { } } -// On PowerPC64 the lazy symbol resolvers go into the `global linkage table` -// in the .glink section, rather then the typical .plt section. -PltSection::PltSection(bool isIplt) - : SyntheticSection( - SHF_ALLOC | SHF_EXECINSTR, SHT_PROGBITS, 16, - (config->emachine == EM_PPC || config->emachine == EM_PPC64) - ? ".glink" - : ".plt"), - headerSize(!isIplt || config->zRetpolineplt ? target->pltHeaderSize : 0), - isIplt(isIplt) { +PltSection::PltSection() + : SyntheticSection(SHF_ALLOC | SHF_EXECINSTR, SHT_PROGBITS, 16, ".plt"), + headerSize(target->pltHeaderSize) { + // On PowerPC, this section contains lazy symbol resolvers. + if (config->emachine == EM_PPC || config->emachine == EM_PPC64) { + name = ".glink"; + alignment = 4; + } + + // On x86 when IBT is enabled, this section contains the second PLT (lazy + // symbol resolvers). + if ((config->emachine == EM_386 || config->emachine == EM_X86_64) && + (config->andFeatures & GNU_PROPERTY_X86_FEATURE_1_IBT)) + name = ".plt.sec"; + // The PLT needs to be writable on SPARC as the dynamic linker will // modify the instructions in the PLT entries. if (config->emachine == EM_SPARCV9) @@ -2465,28 +2469,18 @@ void PltSection::writeTo(uint8_t *buf) { return; } - // At beginning of PLT or retpoline IPLT, we have code to call the dynamic + // At beginning of PLT, we have code to call the dynamic // linker to resolve dynsyms at runtime. Write such code. - if (headerSize) - target->writePltHeader(buf); + target->writePltHeader(buf); size_t off = headerSize; - RelocationBaseSection *relSec = isIplt ? in.relaIplt : in.relaPlt; - - // The IPlt is immediately after the Plt, account for this in relOff - size_t pltOff = isIplt ? in.plt->getSize() : 0; - - for (size_t i = 0, e = entries.size(); i != e; ++i) { - const Symbol *b = entries[i]; - unsigned relOff = relSec->entsize * i + pltOff; - uint64_t got = b->getGotPltVA(); - uint64_t plt = this->getVA() + off; - target->writePlt(buf + off, got, plt, b->pltIndex, relOff); + for (const Symbol *sym : entries) { + target->writePlt(buf + off, *sym, getVA() + off); off += target->pltEntrySize; } } -template <class ELFT> void PltSection::addEntry(Symbol &sym) { +void PltSection::addEntry(Symbol &sym) { sym.pltIndex = entries.size(); entries.push_back(&sym); } @@ -2495,12 +2489,15 @@ size_t PltSection::getSize() const { return headerSize + entries.size() * target->pltEntrySize; } -// Some architectures such as additional symbols in the PLT section. For -// example ARM uses mapping symbols to aid disassembly +bool PltSection::isNeeded() const { + // For -z retpolineplt, .iplt needs the .plt header. + return !entries.empty() || (config->zRetpolineplt && in.iplt->isNeeded()); +} + +// Used by ARM to add mapping symbols in the PLT section, which aid +// disassembly. void PltSection::addSymbols() { - // The PLT may have symbols defined for the Header, the IPLT has no header - if (!isIplt) - target->addPltHeaderSymbols(*this); + target->addPltHeaderSymbols(*this); size_t off = headerSize; for (size_t i = 0; i < entries.size(); ++i) { @@ -2509,6 +2506,110 @@ void PltSection::addSymbols() { } } +IpltSection::IpltSection() + : SyntheticSection(SHF_ALLOC | SHF_EXECINSTR, SHT_PROGBITS, 16, ".iplt") { + if (config->emachine == EM_PPC || config->emachine == EM_PPC64) { + name = ".glink"; + alignment = 4; + } +} + +void IpltSection::writeTo(uint8_t *buf) { + uint32_t off = 0; + for (const Symbol *sym : entries) { + target->writeIplt(buf + off, *sym, getVA() + off); + off += target->ipltEntrySize; + } +} + +size_t IpltSection::getSize() const { + return entries.size() * target->ipltEntrySize; +} + +void IpltSection::addEntry(Symbol &sym) { + sym.pltIndex = entries.size(); + entries.push_back(&sym); +} + +// ARM uses mapping symbols to aid disassembly. +void IpltSection::addSymbols() { + size_t off = 0; + for (size_t i = 0, e = entries.size(); i != e; ++i) { + target->addPltSymbols(*this, off); + off += target->pltEntrySize; + } +} + +// This is an x86-only extra PLT section and used only when a security +// enhancement feature called CET is enabled. In this comment, I'll explain what +// the feature is and why we have two PLT sections if CET is enabled. +// +// So, what does CET do? CET introduces a new restriction to indirect jump +// instructions. CET works this way. Assume that CET is enabled. Then, if you +// execute an indirect jump instruction, the processor verifies that a special +// "landing pad" instruction (which is actually a repurposed NOP instruction and +// now called "endbr32" or "endbr64") is at the jump target. If the jump target +// does not start with that instruction, the processor raises an exception +// instead of continuing executing code. +// +// If CET is enabled, the compiler emits endbr to all locations where indirect +// jumps may jump to. +// +// This mechanism makes it extremely hard to transfer the control to a middle of +// a function that is not supporsed to be a indirect jump target, preventing +// certain types of attacks such as ROP or JOP. +// +// Note that the processors in the market as of 2019 don't actually support the +// feature. Only the spec is available at the moment. +// +// Now, I'll explain why we have this extra PLT section for CET. +// +// Since you can indirectly jump to a PLT entry, we have to make PLT entries +// start with endbr. The problem is there's no extra space for endbr (which is 4 +// bytes long), as the PLT entry is only 16 bytes long and all bytes are already +// used. +// +// In order to deal with the issue, we split a PLT entry into two PLT entries. +// Remember that each PLT entry contains code to jump to an address read from +// .got.plt AND code to resolve a dynamic symbol lazily. With the 2-PLT scheme, +// the former code is written to .plt.sec, and the latter code is written to +// .plt. +// +// Lazy symbol resolution in the 2-PLT scheme works in the usual way, except +// that the regular .plt is now called .plt.sec and .plt is repurposed to +// contain only code for lazy symbol resolution. +// +// In other words, this is how the 2-PLT scheme works. Application code is +// supposed to jump to .plt.sec to call an external function. Each .plt.sec +// entry contains code to read an address from a corresponding .got.plt entry +// and jump to that address. Addresses in .got.plt initially point to .plt, so +// when an application calls an external function for the first time, the +// control is transferred to a function that resolves a symbol name from +// external shared object files. That function then rewrites a .got.plt entry +// with a resolved address, so that the subsequent function calls directly jump +// to a desired location from .plt.sec. +// +// There is an open question as to whether the 2-PLT scheme was desirable or +// not. We could have simply extended the PLT entry size to 32-bytes to +// accommodate endbr, and that scheme would have been much simpler than the +// 2-PLT scheme. One reason to split PLT was, by doing that, we could keep hot +// code (.plt.sec) from cold code (.plt). But as far as I know no one proved +// that the optimization actually makes a difference. +// +// That said, the 2-PLT scheme is a part of the ABI, debuggers and other tools +// depend on it, so we implement the ABI. +IBTPltSection::IBTPltSection() + : SyntheticSection(SHF_ALLOC | SHF_EXECINSTR, SHT_PROGBITS, 16, ".plt") {} + +void IBTPltSection::writeTo(uint8_t *buf) { + target->writeIBTPlt(buf, in.plt->getNumEntries()); +} + +size_t IBTPltSection::getSize() const { + // 16 is the header size of .plt. + return 16 + in.plt->getNumEntries() * target->pltEntrySize; +} + // The string hash function for .gdb_index. static uint32_t computeGdbHash(StringRef s) { uint32_t h = 0; @@ -2936,7 +3037,8 @@ void VersionTableSection::writeTo(uint8_t *buf) { } bool VersionTableSection::isNeeded() const { - return getPartition().verDef || getPartition().verNeed->isNeeded(); + return isLive() && + (getPartition().verDef || getPartition().verNeed->isNeeded()); } void addVerneed(Symbol *ss) { @@ -3024,7 +3126,7 @@ template <class ELFT> size_t VersionNeedSection<ELFT>::getSize() const { } template <class ELFT> bool VersionNeedSection<ELFT>::isNeeded() const { - return SharedFile::vernauxNum != 0; + return isLive() && SharedFile::vernauxNum != 0; } void MergeSyntheticSection::addSection(MergeInputSection *ms) { @@ -3164,12 +3266,10 @@ static bool isValidExidxSectionDep(InputSection *isec) { bool ARMExidxSyntheticSection::addSection(InputSection *isec) { if (isec->type == SHT_ARM_EXIDX) { - if (InputSection* dep = isec->getLinkOrderDep()) - if (isValidExidxSectionDep(dep)) { + if (InputSection *dep = isec->getLinkOrderDep()) + if (isValidExidxSectionDep(dep)) exidxSections.push_back(isec); - return true; - } - return false; + return true; } if (isValidExidxSectionDep(isec)) { @@ -3357,6 +3457,17 @@ ThunkSection::ThunkSection(OutputSection *os, uint64_t off) this->outSecOff = off; } +// When the errata patching is on, we round the size up to a 4 KiB +// boundary. This limits the effect that adding Thunks has on the addresses +// of the program modulo 4 KiB. As the errata patching is sensitive to address +// modulo 4 KiB this can prevent further patches from being needed due to +// Thunk insertion. +size_t ThunkSection::getSize() const { + if (config->fixCortexA53Errata843419 || config->fixCortexA8) + return alignTo(size, 4096); + return size; +} + void ThunkSection::addThunk(Thunk *t) { thunks.push_back(t); t->addSymbols(*this); @@ -3428,10 +3539,19 @@ PPC64LongBranchTargetSection::PPC64LongBranchTargetSection() config->isPic ? SHT_NOBITS : SHT_PROGBITS, 8, ".branch_lt") {} -void PPC64LongBranchTargetSection::addEntry(Symbol &sym) { - assert(sym.ppc64BranchltIndex == 0xffff); - sym.ppc64BranchltIndex = entries.size(); - entries.push_back(&sym); +uint64_t PPC64LongBranchTargetSection::getEntryVA(const Symbol *sym, + int64_t addend) { + return getVA() + entry_index.find({sym, addend})->second * 8; +} + +Optional<uint32_t> PPC64LongBranchTargetSection::addEntry(const Symbol *sym, + int64_t addend) { + auto res = + entry_index.try_emplace(std::make_pair(sym, addend), entries.size()); + if (!res.second) + return None; + entries.emplace_back(sym, addend); + return res.first->second; } size_t PPC64LongBranchTargetSection::getSize() const { @@ -3445,12 +3565,14 @@ void PPC64LongBranchTargetSection::writeTo(uint8_t *buf) { if (config->isPic) return; - for (const Symbol *sym : entries) { + for (auto entry : entries) { + const Symbol *sym = entry.first; + int64_t addend = entry.second; assert(sym->getVA()); // Need calls to branch to the local entry-point since a long-branch // must be a local-call. - write64(buf, - sym->getVA() + getPPC64GlobalEntryToLocalEntryOffset(sym->stOther)); + write64(buf, sym->getVA(addend) + + getPPC64GlobalEntryToLocalEntryOffset(sym->stOther)); buf += 8; } } @@ -3460,7 +3582,7 @@ bool PPC64LongBranchTargetSection::isNeeded() const { // is too early to determine if this section will be empty or not. We need // Finalized to keep the section alive until after thunk creation. Finalized // only gets set to true once `finalizeSections()` is called after thunk - // creation. Becuase of this, if we don't create any long-branch thunks we end + // creation. Because of this, if we don't create any long-branch thunks we end // up with an empty .branch_lt section in the binary. return !finalized || !entries.empty(); } @@ -3601,11 +3723,6 @@ template void splitSections<ELF32BE>(); template void splitSections<ELF64LE>(); template void splitSections<ELF64BE>(); -template void PltSection::addEntry<ELF32LE>(Symbol &Sym); -template void PltSection::addEntry<ELF32BE>(Symbol &Sym); -template void PltSection::addEntry<ELF64LE>(Symbol &Sym); -template void PltSection::addEntry<ELF64BE>(Symbol &Sym); - template class MipsAbiFlagsSection<ELF32LE>; template class MipsAbiFlagsSection<ELF32BE>; template class MipsAbiFlagsSection<ELF64LE>; diff --git a/lld/ELF/SyntheticSections.h b/lld/ELF/SyntheticSections.h index d592dcb84e12..f0a598dda51d 100644 --- a/lld/ELF/SyntheticSections.h +++ b/lld/ELF/SyntheticSections.h @@ -662,24 +662,55 @@ private: size_t size = 0; }; -// The PltSection is used for both the Plt and Iplt. The former usually has a -// header as its first entry that is used at run-time to resolve lazy binding. -// The latter is used for GNU Ifunc symbols, that will be subject to a -// Target->IRelativeRel. +// Used for PLT entries. It usually has a PLT header for lazy binding. Each PLT +// entry is associated with a JUMP_SLOT relocation, which may be resolved lazily +// at runtime. +// +// On PowerPC, this section contains lazy symbol resolvers. A branch instruction +// jumps to a PLT call stub, which will then jump to the target (BIND_NOW) or a +// lazy symbol resolver. +// +// On x86 when IBT is enabled, this section (.plt.sec) contains PLT call stubs. +// A call instruction jumps to a .plt.sec entry, which will then jump to the +// target (BIND_NOW) or a .plt entry. class PltSection : public SyntheticSection { public: - PltSection(bool isIplt); + PltSection(); void writeTo(uint8_t *buf) override; size_t getSize() const override; - bool isNeeded() const override { return !entries.empty(); } + bool isNeeded() const override; void addSymbols(); - template <class ELFT> void addEntry(Symbol &sym); + void addEntry(Symbol &sym); + size_t getNumEntries() const { return entries.size(); } - size_t headerSize; + size_t headerSize = 0; private: std::vector<const Symbol *> entries; - bool isIplt; +}; + +// Used for non-preemptible ifuncs. It does not have a header. Each entry is +// associated with an IRELATIVE relocation, which will be resolved eagerly at +// runtime. PltSection can only contain entries associated with JUMP_SLOT +// relocations, so IPLT entries are in a separate section. +class IpltSection final : public SyntheticSection { + std::vector<const Symbol *> entries; + +public: + IpltSection(); + void writeTo(uint8_t *buf) override; + size_t getSize() const override; + bool isNeeded() const override { return !entries.empty(); } + void addSymbols(); + void addEntry(Symbol &sym); +}; + +// This is x86-only. +class IBTPltSection : public SyntheticSection { +public: + IBTPltSection(); + void writeTo(uint8_t *Buf) override; + size_t getSize() const override; }; class GdbIndexSection final : public SyntheticSection { @@ -1033,7 +1064,7 @@ public: // Thunk defines a symbol in this InputSection that can be used as target // of a relocation void addThunk(Thunk *t); - size_t getSize() const override { return size; } + size_t getSize() const override; void writeTo(uint8_t *buf) override; InputSection *getTargetInputSection() const; bool assignOffsets(); @@ -1055,21 +1086,23 @@ public: }; // This section is used to store the addresses of functions that are called -// in range-extending thunks on PowerPC64. When producing position dependant +// in range-extending thunks on PowerPC64. When producing position dependent // code the addresses are link-time constants and the table is written out to -// the binary. When producing position-dependant code the table is allocated and +// the binary. When producing position-dependent code the table is allocated and // filled in by the dynamic linker. class PPC64LongBranchTargetSection final : public SyntheticSection { public: PPC64LongBranchTargetSection(); - void addEntry(Symbol &sym); + uint64_t getEntryVA(const Symbol *sym, int64_t addend); + llvm::Optional<uint32_t> addEntry(const Symbol *sym, int64_t addend); size_t getSize() const override; void writeTo(uint8_t *buf) override; bool isNeeded() const override; void finalizeContents() override { finalized = true; } private: - std::vector<const Symbol *> entries; + std::vector<std::pair<const Symbol *, int64_t>> entries; + llvm::DenseMap<std::pair<const Symbol *, int64_t>, uint32_t> entry_index; bool finalized = false; }; @@ -1160,8 +1193,9 @@ struct InStruct { SyntheticSection *partEnd; SyntheticSection *partIndex; PltSection *plt; - PltSection *iplt; + IpltSection *iplt; PPC32Got2Section *ppc32Got2; + IBTPltSection *ibtPlt; RelocationBaseSection *relaPlt; RelocationBaseSection *relaIplt; StringTableSection *shStrTab; diff --git a/lld/ELF/Target.cpp b/lld/ELF/Target.cpp index 024e0cfec27b..70a68fd8db9e 100644 --- a/lld/ELF/Target.cpp +++ b/lld/ELF/Target.cpp @@ -28,6 +28,7 @@ #include "OutputSections.h" #include "SymbolTable.h" #include "Symbols.h" +#include "SyntheticSections.h" #include "lld/Common/ErrorHandler.h" #include "llvm/Object/ELF.h" @@ -91,15 +92,20 @@ TargetInfo *getTarget() { } template <class ELFT> static ErrorPlace getErrPlace(const uint8_t *loc) { - if (!Out::bufferStart) - return {}; - + assert(loc != nullptr); for (InputSectionBase *d : inputSections) { auto *isec = cast<InputSection>(d); if (!isec->getParent()) continue; - uint8_t *isecLoc = Out::bufferStart + isec->getParent()->offset + isec->outSecOff; + const uint8_t *isecLoc = + Out::bufferStart + ? (Out::bufferStart + isec->getParent()->offset + isec->outSecOff) + : isec->data().data(); + if (isecLoc == nullptr) { + assert(isa<SyntheticSection>(isec) && "No data but not synthetic?"); + continue; + } if (isecLoc <= loc && loc < isecLoc + isec->getSize()) return {isec, isec->template getLocation<ELFT>(loc - isecLoc) + ": "}; } @@ -130,7 +136,8 @@ int64_t TargetInfo::getImplicitAddend(const uint8_t *buf, RelType type) const { bool TargetInfo::usesOnlyLowPageBits(RelType type) const { return false; } bool TargetInfo::needsThunk(RelExpr expr, RelType type, const InputFile *file, - uint64_t branchAddr, const Symbol &s) const { + uint64_t branchAddr, const Symbol &s, + int64_t a) const { return false; } @@ -143,10 +150,6 @@ bool TargetInfo::inBranchRange(RelType type, uint64_t src, uint64_t dst) const { return true; } -void TargetInfo::writeIgotPlt(uint8_t *buf, const Symbol &s) const { - writeGotPlt(buf, s); -} - RelExpr TargetInfo::adjustRelaxExpr(RelType type, const uint8_t *data, RelExpr expr) const { return expr; diff --git a/lld/ELF/Target.h b/lld/ELF/Target.h index effa6001f6d9..949a7bfdf64b 100644 --- a/lld/ELF/Target.h +++ b/lld/ELF/Target.h @@ -32,7 +32,7 @@ public: virtual void writeGotPltHeader(uint8_t *buf) const {} virtual void writeGotHeader(uint8_t *buf) const {} virtual void writeGotPlt(uint8_t *buf, const Symbol &s) const {}; - virtual void writeIgotPlt(uint8_t *buf, const Symbol &s) const; + virtual void writeIgotPlt(uint8_t *buf, const Symbol &s) const {} virtual int64_t getImplicitAddend(const uint8_t *buf, RelType type) const; virtual int getTlsGdRelaxSkip(RelType type) const { return 1; } @@ -41,9 +41,14 @@ public: // they are called. This function writes that code. virtual void writePltHeader(uint8_t *buf) const {} - virtual void writePlt(uint8_t *buf, uint64_t gotEntryAddr, - uint64_t pltEntryAddr, int32_t index, - unsigned relOff) const {} + virtual void writePlt(uint8_t *buf, const Symbol &sym, + uint64_t pltEntryAddr) const {} + virtual void writeIplt(uint8_t *buf, const Symbol &sym, + uint64_t pltEntryAddr) const { + // All but PPC32 and PPC64 use the same format for .plt and .iplt entries. + writePlt(buf, sym, pltEntryAddr); + } + virtual void writeIBTPlt(uint8_t *buf, size_t numEntries) const {} virtual void addPltHeaderSymbols(InputSection &isec) const {} virtual void addPltSymbols(InputSection &isec, uint64_t off) const {} @@ -58,7 +63,7 @@ public: // targeting S. virtual bool needsThunk(RelExpr expr, RelType relocType, const InputFile *file, uint64_t branchAddr, - const Symbol &s) const; + const Symbol &s, int64_t a) const; // On systems with range extensions we place collections of Thunks at // regular spacings that enable the majority of branches reach the Thunks. @@ -102,6 +107,7 @@ public: RelType tlsOffsetRel; unsigned pltEntrySize; unsigned pltHeaderSize; + unsigned ipltEntrySize; // At least on x86_64 positions 1 and 2 are used by the first plt entry // to support lazy loading. @@ -131,8 +137,8 @@ public: protected: // On FreeBSD x86_64 the first page cannot be mmaped. - // On Linux that is controled by vm.mmap_min_addr. At least on some x86_64 - // installs that is 65536, so the first 15 pages cannot be used. + // On Linux this is controlled by vm.mmap_min_addr. At least on some x86_64 + // installs this is set to 65536, so the first 15 pages cannot be used. // Given that, the smallest value that can be used in here is 0x10000. uint64_t defaultImageBase = 0x10000; }; diff --git a/lld/ELF/Thunks.cpp b/lld/ELF/Thunks.cpp index 73208f932031..7b927a434e36 100644 --- a/lld/ELF/Thunks.cpp +++ b/lld/ELF/Thunks.cpp @@ -49,7 +49,7 @@ namespace { // AArch64 long range Thunks class AArch64ABSLongThunk final : public Thunk { public: - AArch64ABSLongThunk(Symbol &dest) : Thunk(dest) {} + AArch64ABSLongThunk(Symbol &dest, int64_t addend) : Thunk(dest, addend) {} uint32_t size() override { return 16; } void writeTo(uint8_t *buf) override; void addSymbols(ThunkSection &isec) override; @@ -57,7 +57,7 @@ public: class AArch64ADRPThunk final : public Thunk { public: - AArch64ADRPThunk(Symbol &dest) : Thunk(dest) {} + AArch64ADRPThunk(Symbol &dest, int64_t addend) : Thunk(dest, addend) {} uint32_t size() override { return 12; } void writeTo(uint8_t *buf) override; void addSymbols(ThunkSection &isec) override; @@ -73,7 +73,7 @@ public: // if the target is in range, otherwise it creates a long thunk. class ARMThunk : public Thunk { public: - ARMThunk(Symbol &dest) : Thunk(dest) {} + ARMThunk(Symbol &dest) : Thunk(dest, 0) {} bool getMayUseShortThunk(); uint32_t size() override { return getMayUseShortThunk() ? 4 : sizeLong(); } @@ -103,7 +103,7 @@ private: // which has a range of 16MB. class ThumbThunk : public Thunk { public: - ThumbThunk(Symbol &dest) : Thunk(dest) { alignment = 2; } + ThumbThunk(Symbol &dest) : Thunk(dest, 0) { alignment = 2; } bool getMayUseShortThunk(); uint32_t size() override { return getMayUseShortThunk() ? 4 : sizeLong(); } @@ -209,7 +209,7 @@ public: // MIPS LA25 thunk class MipsThunk final : public Thunk { public: - MipsThunk(Symbol &dest) : Thunk(dest) {} + MipsThunk(Symbol &dest) : Thunk(dest, 0) {} uint32_t size() override { return 16; } void writeTo(uint8_t *buf) override; @@ -220,7 +220,7 @@ public: // microMIPS R2-R5 LA25 thunk class MicroMipsThunk final : public Thunk { public: - MicroMipsThunk(Symbol &dest) : Thunk(dest) {} + MicroMipsThunk(Symbol &dest) : Thunk(dest, 0) {} uint32_t size() override { return 14; } void writeTo(uint8_t *buf) override; @@ -231,7 +231,7 @@ public: // microMIPS R6 LA25 thunk class MicroMipsR6Thunk final : public Thunk { public: - MicroMipsR6Thunk(Symbol &dest) : Thunk(dest) {} + MicroMipsR6Thunk(Symbol &dest) : Thunk(dest, 0) {} uint32_t size() override { return 12; } void writeTo(uint8_t *buf) override; @@ -241,8 +241,11 @@ public: class PPC32PltCallStub final : public Thunk { public: - PPC32PltCallStub(const InputSection &isec, const Relocation &rel, Symbol &dest) - : Thunk(dest), addend(rel.type == R_PPC_PLTREL24 ? rel.addend : 0), + // For R_PPC_PLTREL24, Thunk::addend records the addend which will be used to + // decide the offsets in the call stub. + PPC32PltCallStub(const InputSection &isec, const Relocation &rel, + Symbol &dest) + : Thunk(dest, rel.type == R_PPC_PLTREL24 ? rel.addend : 0), file(isec.file) {} uint32_t size() override { return 16; } void writeTo(uint8_t *buf) override; @@ -250,10 +253,6 @@ public: bool isCompatibleWith(const InputSection &isec, const Relocation &rel) const override; private: - // For R_PPC_PLTREL24, this records the addend, which will be used to decide - // the offsets in the call stub. - uint32_t addend; - // Records the call site of the call stub. const InputFile *file; }; @@ -265,10 +264,10 @@ private: // 2) Loading the target functions address from the procedure linkage table into // r12 for use by the target functions global entry point, and into the count // register. -// 3) Transfering control to the target function through an indirect branch. +// 3) Transferring control to the target function through an indirect branch. class PPC64PltCallStub final : public Thunk { public: - PPC64PltCallStub(Symbol &dest) : Thunk(dest) {} + PPC64PltCallStub(Symbol &dest) : Thunk(dest, 0) {} uint32_t size() override { return 20; } void writeTo(uint8_t *buf) override; void addSymbols(ThunkSection &isec) override; @@ -289,29 +288,29 @@ public: void addSymbols(ThunkSection &isec) override; protected: - PPC64LongBranchThunk(Symbol &dest) : Thunk(dest) {} + PPC64LongBranchThunk(Symbol &dest, int64_t addend) : Thunk(dest, addend) {} }; class PPC64PILongBranchThunk final : public PPC64LongBranchThunk { public: - PPC64PILongBranchThunk(Symbol &dest) : PPC64LongBranchThunk(dest) { + PPC64PILongBranchThunk(Symbol &dest, int64_t addend) + : PPC64LongBranchThunk(dest, addend) { assert(!dest.isPreemptible); - if (dest.isInPPC64Branchlt()) - return; - - in.ppc64LongBranchTarget->addEntry(dest); - mainPart->relaDyn->addReloc( - {target->relativeRel, in.ppc64LongBranchTarget, - dest.getPPC64LongBranchOffset(), true, &dest, - getPPC64GlobalEntryToLocalEntryOffset(dest.stOther)}); + if (Optional<uint32_t> index = + in.ppc64LongBranchTarget->addEntry(&dest, addend)) { + mainPart->relaDyn->addReloc( + {target->relativeRel, in.ppc64LongBranchTarget, *index * UINT64_C(8), + true, &dest, + addend + getPPC64GlobalEntryToLocalEntryOffset(dest.stOther)}); + } } }; class PPC64PDLongBranchThunk final : public PPC64LongBranchThunk { public: - PPC64PDLongBranchThunk(Symbol &dest) : PPC64LongBranchThunk(dest) { - if (!dest.isInPPC64Branchlt()) - in.ppc64LongBranchTarget->addEntry(dest); + PPC64PDLongBranchThunk(Symbol &dest, int64_t addend) + : PPC64LongBranchThunk(dest, addend) { + in.ppc64LongBranchTarget->addEntry(&dest, addend); } }; @@ -332,8 +331,8 @@ void Thunk::setOffset(uint64_t newOffset) { // AArch64 long range Thunks -static uint64_t getAArch64ThunkDestVA(const Symbol &s) { - uint64_t v = s.isInPlt() ? s.getPltVA() : s.getVA(); +static uint64_t getAArch64ThunkDestVA(const Symbol &s, int64_t a) { + uint64_t v = s.isInPlt() ? s.getPltVA() : s.getVA(a); return v; } @@ -344,7 +343,7 @@ void AArch64ABSLongThunk::writeTo(uint8_t *buf) { 0x00, 0x00, 0x00, 0x00, // L0: .xword S 0x00, 0x00, 0x00, 0x00, }; - uint64_t s = getAArch64ThunkDestVA(destination); + uint64_t s = getAArch64ThunkDestVA(destination, addend); memcpy(buf, data, sizeof(data)); target->relocateOne(buf + 8, R_AARCH64_ABS64, s); } @@ -367,7 +366,7 @@ void AArch64ADRPThunk::writeTo(uint8_t *buf) { 0x10, 0x02, 0x00, 0x91, // add x16, x16, R_AARCH64_ADD_ABS_LO12_NC(Dest) 0x00, 0x02, 0x1f, 0xd6, // br x16 }; - uint64_t s = getAArch64ThunkDestVA(destination); + uint64_t s = getAArch64ThunkDestVA(destination, addend); uint64_t p = getThunkTargetSym()->getVA(); memcpy(buf, data, sizeof(data)); target->relocateOne(buf, R_AARCH64_ADR_PREL_PG_HI21, @@ -708,13 +707,13 @@ InputSection *MicroMipsR6Thunk::getTargetInputSection() const { return dyn_cast<InputSection>(dr.section); } -void PPC32PltCallStub::writeTo(uint8_t *buf) { +void writePPC32PltCallStub(uint8_t *buf, uint64_t gotPltVA, + const InputFile *file, int64_t addend) { if (!config->isPic) { - uint64_t va = destination.getGotPltVA(); - write32(buf + 0, 0x3d600000 | (va + 0x8000) >> 16); // lis r11,ha - write32(buf + 4, 0x816b0000 | (uint16_t)va); // lwz r11,l(r11) - write32(buf + 8, 0x7d6903a6); // mtctr r11 - write32(buf + 12, 0x4e800420); // bctr + write32(buf + 0, 0x3d600000 | (gotPltVA + 0x8000) >> 16); // lis r11,ha + write32(buf + 4, 0x816b0000 | (uint16_t)gotPltVA); // lwz r11,l(r11) + write32(buf + 8, 0x7d6903a6); // mtctr r11 + write32(buf + 12, 0x4e800420); // bctr return; } uint32_t offset; @@ -722,12 +721,12 @@ void PPC32PltCallStub::writeTo(uint8_t *buf) { // The stub loads an address relative to r30 (.got2+Addend). Addend is // almost always 0x8000. The address of .got2 is different in another object // file, so a stub cannot be shared. - offset = destination.getGotPltVA() - (in.ppc32Got2->getParent()->getVA() + - file->ppc32Got2OutSecOff + addend); + offset = gotPltVA - (in.ppc32Got2->getParent()->getVA() + + file->ppc32Got2OutSecOff + addend); } else { // The stub loads an address relative to _GLOBAL_OFFSET_TABLE_ (which is // currently the address of .got). - offset = destination.getGotPltVA() - in.got->getVA(); + offset = gotPltVA - in.got->getVA(); } uint16_t ha = (offset + 0x8000) >> 16, l = (uint16_t)offset; if (ha == 0) { @@ -743,6 +742,10 @@ void PPC32PltCallStub::writeTo(uint8_t *buf) { } } +void PPC32PltCallStub::writeTo(uint8_t *buf) { + writePPC32PltCallStub(buf, destination.getGotPltVA(), file, addend); +} + void PPC32PltCallStub::addSymbols(ThunkSection &isec) { std::string buf; raw_string_ostream os(buf); @@ -762,7 +765,7 @@ bool PPC32PltCallStub::isCompatibleWith(const InputSection &isec, return !config->isPic || (isec.file == file && rel.addend == addend); } -static void writePPCLoadAndBranch(uint8_t *buf, int64_t offset) { +void writePPC64LoadAndBranch(uint8_t *buf, int64_t offset) { uint16_t offHa = (offset + 0x8000) >> 16; uint16_t offLo = offset & 0xffff; @@ -776,18 +779,20 @@ void PPC64PltCallStub::writeTo(uint8_t *buf) { int64_t offset = destination.getGotPltVA() - getPPC64TocBase(); // Save the TOC pointer to the save-slot reserved in the call frame. write32(buf + 0, 0xf8410018); // std r2,24(r1) - writePPCLoadAndBranch(buf + 4, offset); + writePPC64LoadAndBranch(buf + 4, offset); } void PPC64PltCallStub::addSymbols(ThunkSection &isec) { Defined *s = addSymbol(saver.save("__plt_" + destination.getName()), STT_FUNC, 0, isec); s->needsTocRestore = true; + s->file = destination.file; } void PPC64LongBranchThunk::writeTo(uint8_t *buf) { - int64_t offset = destination.getPPC64LongBranchTableVA() - getPPC64TocBase(); - writePPCLoadAndBranch(buf, offset); + int64_t offset = in.ppc64LongBranchTarget->getEntryVA(&destination, addend) - + getPPC64TocBase(); + writePPC64LoadAndBranch(buf, offset); } void PPC64LongBranchThunk::addSymbols(ThunkSection &isec) { @@ -795,16 +800,16 @@ void PPC64LongBranchThunk::addSymbols(ThunkSection &isec) { isec); } -Thunk::Thunk(Symbol &d) : destination(d), offset(0) {} +Thunk::Thunk(Symbol &d, int64_t a) : destination(d), addend(a), offset(0) {} Thunk::~Thunk() = default; -static Thunk *addThunkAArch64(RelType type, Symbol &s) { +static Thunk *addThunkAArch64(RelType type, Symbol &s, int64_t a) { if (type != R_AARCH64_CALL26 && type != R_AARCH64_JUMP26) fatal("unrecognized relocation type"); if (config->picThunk) - return make<AArch64ADRPThunk>(s); - return make<AArch64ABSLongThunk>(s); + return make<AArch64ADRPThunk>(s, a); + return make<AArch64ABSLongThunk>(s, a); } // Creates a thunk for Thumb-ARM interworking. @@ -895,28 +900,30 @@ static Thunk *addThunkMips(RelType type, Symbol &s) { return make<MipsThunk>(s); } -static Thunk *addThunkPPC32(const InputSection &isec, const Relocation &rel, Symbol &s) { +static Thunk *addThunkPPC32(const InputSection &isec, const Relocation &rel, + Symbol &s) { assert((rel.type == R_PPC_REL24 || rel.type == R_PPC_PLTREL24) && "unexpected relocation type for thunk"); return make<PPC32PltCallStub>(isec, rel, s); } -static Thunk *addThunkPPC64(RelType type, Symbol &s) { +static Thunk *addThunkPPC64(RelType type, Symbol &s, int64_t a) { assert(type == R_PPC64_REL24 && "unexpected relocation type for thunk"); if (s.isInPlt()) return make<PPC64PltCallStub>(s); if (config->picThunk) - return make<PPC64PILongBranchThunk>(s); + return make<PPC64PILongBranchThunk>(s, a); - return make<PPC64PDLongBranchThunk>(s); + return make<PPC64PDLongBranchThunk>(s, a); } Thunk *addThunk(const InputSection &isec, Relocation &rel) { Symbol &s = *rel.sym; + int64_t a = rel.addend; if (config->emachine == EM_AARCH64) - return addThunkAArch64(rel.type, s); + return addThunkAArch64(rel.type, s, a); if (config->emachine == EM_ARM) return addThunkArm(rel.type, s); @@ -928,7 +935,7 @@ Thunk *addThunk(const InputSection &isec, Relocation &rel) { return addThunkPPC32(isec, rel, s); if (config->emachine == EM_PPC64) - return addThunkPPC64(rel.type, s); + return addThunkPPC64(rel.type, s, a); llvm_unreachable("add Thunk only supported for ARM, Mips and PowerPC"); } diff --git a/lld/ELF/Thunks.h b/lld/ELF/Thunks.h index 2d27ee5f6c38..a8575b4cdb59 100644 --- a/lld/ELF/Thunks.h +++ b/lld/ELF/Thunks.h @@ -14,6 +14,7 @@ namespace lld { namespace elf { class Defined; +class InputFile; class Symbol; class ThunkSection; // Class to describe an instance of a Thunk. @@ -27,7 +28,7 @@ class ThunkSection; // Thunks are assigned to synthetic ThunkSections class Thunk { public: - Thunk(Symbol &destination); + Thunk(Symbol &destination, int64_t addend); virtual ~Thunk(); virtual uint32_t size() = 0; @@ -55,11 +56,12 @@ public: Defined *getThunkTargetSym() const { return syms[0]; } - // The alignment requirement for this Thunk, defaults to the size of the - // typical code section alignment. Symbol &destination; + int64_t addend; llvm::SmallVector<Defined *, 3> syms; uint64_t offset = 0; + // The alignment requirement for this Thunk, defaults to the size of the + // typical code section alignment. uint32_t alignment = 4; }; @@ -67,6 +69,10 @@ public: // ThunkSection. Thunk *addThunk(const InputSection &isec, Relocation &rel); +void writePPC32PltCallStub(uint8_t *buf, uint64_t gotPltVA, + const InputFile *file, int64_t addend); +void writePPC64LoadAndBranch(uint8_t *buf, int64_t offset); + } // namespace elf } // namespace lld diff --git a/lld/ELF/Writer.cpp b/lld/ELF/Writer.cpp index dc0f9254596a..6373044d8804 100644 --- a/lld/ELF/Writer.cpp +++ b/lld/ELF/Writer.cpp @@ -135,7 +135,7 @@ StringRef getOutputSectionName(const InputSectionBase *s) { } static bool needsInterpSection() { - return !sharedFiles.empty() && !config->dynamicLinker.empty() && + return !config->shared && !config->dynamicLinker.empty() && script->needsInterpSection(); } @@ -283,7 +283,7 @@ void addReservedSymbols() { // different in different DSOs, so we chose the start address of the DSO. addOptionalRegular("__dso_handle", Out::elfHeader, 0, STV_HIDDEN); - // If linker script do layout we do not need to create any standart symbols. + // If linker script do layout we do not need to create any standard symbols. if (script->hasSectionsCommand) return; @@ -515,9 +515,15 @@ template <class ELFT> void createSyntheticSections() { /*sort=*/false); add(in.relaIplt); - in.plt = make<PltSection>(false); + if ((config->emachine == EM_386 || config->emachine == EM_X86_64) && + (config->andFeatures & GNU_PROPERTY_X86_FEATURE_1_IBT)) { + in.ibtPlt = make<IBTPltSection>(); + add(in.ibtPlt); + } + + in.plt = make<PltSection>(); add(in.plt); - in.iplt = make<PltSection>(true); + in.iplt = make<IpltSection>(); add(in.iplt); if (config->andFeatures) @@ -1238,10 +1244,9 @@ static DenseMap<const InputSectionBase *, int> buildSectionOrder() { // We want both global and local symbols. We get the global ones from the // symbol table and iterate the object files for the local ones. - symtab->forEachSymbol([&](Symbol *sym) { + for (Symbol *sym : symtab->symbols()) if (!sym->isLazy()) addSym(*sym); - }); for (InputFile *file : objectFiles) for (Symbol *sym : file->getSymbols()) @@ -1637,37 +1642,6 @@ static void removeUnusedSyntheticSections() { } } -// Returns true if a symbol can be replaced at load-time by a symbol -// with the same name defined in other ELF executable or DSO. -static bool computeIsPreemptible(const Symbol &b) { - assert(!b.isLocal()); - - // Only symbols that appear in dynsym can be preempted. - if (!b.includeInDynsym()) - return false; - - // Only default visibility symbols can be preempted. - if (b.visibility != STV_DEFAULT) - return false; - - // At this point copy relocations have not been created yet, so any - // symbol that is not defined locally is preemptible. - if (!b.isDefined()) - return true; - - if (!config->shared) - return false; - - // If the dynamic list is present, it specifies preemptable symbols in a DSO. - if (config->hasDynamicList) - return b.inDynamicList; - - // -Bsymbolic means that definitions are not preempted. - if (config->bsymbolic || (config->bsymbolicFunctions && b.isFunc())) - return false; - return true; -} - // Create output section objects and add them to OutputSections. template <class ELFT> void Writer<ELFT>::finalizeSections() { Out::preinitArray = findSection(".preinit_array"); @@ -1734,8 +1708,8 @@ template <class ELFT> void Writer<ELFT>::finalizeSections() { for (Partition &part : partitions) finalizeSynthetic(part.ehFrame); - symtab->forEachSymbol( - [](Symbol *s) { s->isPreemptible = computeIsPreemptible(*s); }); + for (Symbol *sym : symtab->symbols()) + sym->isPreemptible = computeIsPreemptible(*sym); // Change values of linker-script-defined symbols from placeholders (assigned // by declareSymbols) to actual definitions. @@ -1757,7 +1731,7 @@ template <class ELFT> void Writer<ELFT>::finalizeSections() { if (!config->allowShlibUndefined) { // Error on undefined symbols in a shared object, if all of its DT_NEEDED - // entires are seen. These cases would otherwise lead to runtime errors + // entries are seen. These cases would otherwise lead to runtime errors // reported by the dynamic linker. // // ld.bfd traces all DT_NEEDED to emulate the logic of the dynamic linker to @@ -1769,19 +1743,18 @@ template <class ELFT> void Writer<ELFT>::finalizeSections() { return symtab->soNames.count(needed); }); - symtab->forEachSymbol([](Symbol *sym) { + for (Symbol *sym : symtab->symbols()) if (sym->isUndefined() && !sym->isWeak()) if (auto *f = dyn_cast_or_null<SharedFile>(sym->file)) if (f->allNeededIsKnown) error(toString(f) + ": undefined reference to " + toString(*sym)); - }); } // Now that we have defined all possible global symbols including linker- // synthesized ones. Visit all symbols to give the finishing touches. - symtab->forEachSymbol([](Symbol *sym) { + for (Symbol *sym : symtab->symbols()) { if (!includeInSymtab(*sym)) - return; + continue; if (in.symTab) in.symTab->addSymbol(sym); @@ -1791,7 +1764,7 @@ template <class ELFT> void Writer<ELFT>::finalizeSections() { if (file->isNeeded && !sym->isUndefined()) addVerneed(sym); } - }); + } // We also need to scan the dynamic relocation tables of the other partitions // and add any referenced symbols to the partition's dynsym. @@ -1979,7 +1952,7 @@ template <class ELFT> void Writer<ELFT>::addStartEndSymbols() { // program text is above 2 GiB. We use the address of the .text // section instead to prevent that failure. // - // In a rare sitaution, .text section may not exist. If that's the + // In rare situations, the .text section may not exist. If that's the // case, use the image base address as a last resort. OutputSection *Default = findSection(".text"); if (!Default) @@ -2085,7 +2058,7 @@ std::vector<PhdrEntry *> Writer<ELFT>::createPhdrs(Partition &part) { } // PT_GNU_RELRO includes all sections that should be marked as - // read-only by dynamic linker after proccessing relocations. + // read-only by dynamic linker after processing relocations. // Current dynamic loaders only support one PT_GNU_RELRO PHDR, give // an error message if more than one PT_GNU_RELRO PHDR is required. PhdrEntry *relRo = make<PhdrEntry>(PT_GNU_RELRO, PF_R); @@ -2172,14 +2145,16 @@ std::vector<PhdrEntry *> Writer<ELFT>::createPhdrs(Partition &part) { if (OutputSection *cmd = findSection(".openbsd.randomdata", partNo)) addHdr(PT_OPENBSD_RANDOMIZE, cmd->getPhdrFlags())->add(cmd); - // PT_GNU_STACK is a special section to tell the loader to make the - // pages for the stack non-executable. If you really want an executable - // stack, you can pass -z execstack, but that's not recommended for - // security reasons. - unsigned perm = PF_R | PF_W; - if (config->zExecstack) - perm |= PF_X; - addHdr(PT_GNU_STACK, perm)->p_memsz = config->zStackSize; + if (config->zGnustack != GnuStackKind::None) { + // PT_GNU_STACK is a special section to tell the loader to make the + // pages for the stack non-executable. If you really want an executable + // stack, you can pass -z execstack, but that's not recommended for + // security reasons. + unsigned perm = PF_R | PF_W; + if (config->zGnustack == GnuStackKind::Exec) + perm |= PF_X; + addHdr(PT_GNU_STACK, perm)->p_memsz = config->zStackSize; + } // PT_OPENBSD_WXNEEDED is a OpenBSD-specific header to mark the executable // is expected to perform W^X violations, such as calling mprotect(2) or @@ -2188,6 +2163,9 @@ std::vector<PhdrEntry *> Writer<ELFT>::createPhdrs(Partition &part) { if (config->zWxneeded) addHdr(PT_OPENBSD_WXNEEDED, PF_X); + if (OutputSection *cmd = findSection(".note.gnu.property", partNo)) + addHdr(PT_GNU_PROPERTY, PF_R)->add(cmd); + // Create one PT_NOTE per a group of contiguous SHT_NOTE sections with the // same alignment. PhdrEntry *note = nullptr; @@ -2423,7 +2401,7 @@ struct SectionOffset { } // namespace // Check whether sections overlap for a specific address range (file offsets, -// load and virtual adresses). +// load and virtual addresses). static void checkOverlap(StringRef name, std::vector<SectionOffset> §ions, bool isVirtualAddr) { llvm::sort(sections, [=](const SectionOffset &a, const SectionOffset &b) { @@ -2593,7 +2571,9 @@ template <class ELFT> void Writer<ELFT>::openFile() { unlinkAsync(config->outputFile); unsigned flags = 0; if (!config->relocatable) - flags = FileOutputBuffer::F_executable; + flags |= FileOutputBuffer::F_executable; + if (!config->mmapOutputFile) + flags |= FileOutputBuffer::F_no_mmap; Expected<std::unique_ptr<FileOutputBuffer>> bufferOrErr = FileOutputBuffer::create(config->outputFile, fileSize, flags); |