diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2018-07-28 11:08:33 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2018-07-28 11:08:33 +0000 |
commit | 20d35e67e67f106f617c939725101223211659f0 (patch) | |
tree | 64eb963cbf5ba58765e0a6b64a440965d66a7a4d /COFF | |
parent | ae1a339de31cf4065777531959a11e55a2e5fa00 (diff) | |
download | src-20d35e67e67f106f617c939725101223211659f0.tar.gz src-20d35e67e67f106f617c939725101223211659f0.zip |
Vendor import of lld trunk r338150:vendor/lld/lld-trunk-r338150
Notes
Notes:
svn path=/vendor/lld/dist/; revision=336821
svn path=/vendor/lld/lld-trunk-r338150/; revision=336822; tag=vendor/lld/lld-trunk-r338150
Diffstat (limited to 'COFF')
-rw-r--r-- | COFF/CMakeLists.txt | 1 | ||||
-rw-r--r-- | COFF/Chunks.cpp | 175 | ||||
-rw-r--r-- | COFF/Chunks.h | 92 | ||||
-rw-r--r-- | COFF/Config.h | 30 | ||||
-rw-r--r-- | COFF/DLL.cpp | 2 | ||||
-rw-r--r-- | COFF/DLL.h | 5 | ||||
-rw-r--r-- | COFF/Driver.cpp | 430 | ||||
-rw-r--r-- | COFF/Driver.h | 21 | ||||
-rw-r--r-- | COFF/DriverUtils.cpp | 144 | ||||
-rw-r--r-- | COFF/ICF.cpp | 90 | ||||
-rw-r--r-- | COFF/ICF.h (renamed from COFF/Strings.h) | 21 | ||||
-rw-r--r-- | COFF/InputFiles.cpp | 92 | ||||
-rw-r--r-- | COFF/InputFiles.h | 51 | ||||
-rw-r--r-- | COFF/LTO.cpp | 70 | ||||
-rw-r--r-- | COFF/LTO.h | 2 | ||||
-rw-r--r-- | COFF/MapFile.cpp | 12 | ||||
-rw-r--r-- | COFF/MarkLive.cpp | 7 | ||||
-rw-r--r-- | COFF/MarkLive.h | 24 | ||||
-rw-r--r-- | COFF/MinGW.cpp | 2 | ||||
-rw-r--r-- | COFF/Options.td | 30 | ||||
-rw-r--r-- | COFF/PDB.cpp | 577 | ||||
-rw-r--r-- | COFF/PDB.h | 4 | ||||
-rw-r--r-- | COFF/Strings.cpp | 35 | ||||
-rw-r--r-- | COFF/SymbolTable.cpp | 96 | ||||
-rw-r--r-- | COFF/SymbolTable.h | 6 | ||||
-rw-r--r-- | COFF/Symbols.cpp | 8 | ||||
-rw-r--r-- | COFF/Symbols.h | 11 | ||||
-rw-r--r-- | COFF/Writer.cpp | 825 | ||||
-rw-r--r-- | COFF/Writer.h | 16 |
29 files changed, 2123 insertions, 756 deletions
diff --git a/COFF/CMakeLists.txt b/COFF/CMakeLists.txt index 4610ccc880fd..bb241e788c19 100644 --- a/COFF/CMakeLists.txt +++ b/COFF/CMakeLists.txt @@ -18,7 +18,6 @@ add_lld_library(lldCOFF MarkLive.cpp MinGW.cpp PDB.cpp - Strings.cpp SymbolTable.cpp Symbols.cpp Writer.cpp diff --git a/COFF/Chunks.cpp b/COFF/Chunks.cpp index 557b02654426..412ff783222b 100644 --- a/COFF/Chunks.cpp +++ b/COFF/Chunks.cpp @@ -31,8 +31,7 @@ namespace coff { SectionChunk::SectionChunk(ObjFile *F, const coff_section *H) : Chunk(SectionKind), Repl(this), Header(H), File(F), - Relocs(File->getCOFFObj()->getRelocations(Header)), - NumRelocs(std::distance(Relocs.begin(), Relocs.end())) { + Relocs(File->getCOFFObj()->getRelocations(Header)) { // Initialize SectionName. File->getCOFFObj()->getSectionName(Header, SectionName); @@ -51,13 +50,21 @@ static void add64(uint8_t *P, int64_t V) { write64le(P, read64le(P) + V); } static void or16(uint8_t *P, uint16_t V) { write16le(P, read16le(P) | V); } static void or32(uint8_t *P, uint32_t V) { write32le(P, read32le(P) | V); } +// Verify that given sections are appropriate targets for SECREL +// relocations. This check is relaxed because unfortunately debug +// sections have section-relative relocations against absolute symbols. +static bool checkSecRel(const SectionChunk *Sec, OutputSection *OS) { + if (OS) + return true; + if (Sec->isCodeView()) + return false; + fatal("SECREL relocation cannot be applied to absolute symbols"); +} + static void applySecRel(const SectionChunk *Sec, uint8_t *Off, OutputSection *OS, uint64_t S) { - if (!OS) { - if (Sec->isCodeView()) - return; - fatal("SECREL relocation cannot be applied to absolute symbols"); - } + if (!checkSecRel(Sec, OS)) + return; uint64_t SecRel = S - OS->getRVA(); if (SecRel > UINT32_MAX) { error("overflow in SECREL relocation in section: " + Sec->getSectionName()); @@ -67,10 +74,13 @@ static void applySecRel(const SectionChunk *Sec, uint8_t *Off, } static void applySecIdx(uint8_t *Off, OutputSection *OS) { - // If we have no output section, this must be an absolute symbol. Use the - // sentinel absolute symbol section index. - uint16_t SecIdx = OS ? OS->SectionIndex : DefinedAbsolute::OutputSectionIndex; - add16(Off, SecIdx); + // Absolute symbol doesn't have section index, but section index relocation + // against absolute symbol should be resolved to one plus the last output + // section index. This is required for compatibility with MSVC. + if (OS) + add16(Off, OS->SectionIndex); + else + add16(Off, DefinedAbsolute::NumOutputSections + 1); } void SectionChunk::applyRelX64(uint8_t *Off, uint16_t Type, OutputSection *OS, @@ -88,7 +98,8 @@ void SectionChunk::applyRelX64(uint8_t *Off, uint16_t Type, OutputSection *OS, case IMAGE_REL_AMD64_SECTION: applySecIdx(Off, OS); break; case IMAGE_REL_AMD64_SECREL: applySecRel(this, Off, OS, S); break; default: - fatal("unsupported relocation type 0x" + Twine::utohexstr(Type)); + fatal("unsupported relocation type 0x" + Twine::utohexstr(Type) + " in " + + toString(File)); } } @@ -102,7 +113,8 @@ void SectionChunk::applyRelX86(uint8_t *Off, uint16_t Type, OutputSection *OS, case IMAGE_REL_I386_SECTION: applySecIdx(Off, OS); break; case IMAGE_REL_I386_SECREL: applySecRel(this, Off, OS, S); break; default: - fatal("unsupported relocation type 0x" + Twine::utohexstr(Type)); + fatal("unsupported relocation type 0x" + Twine::utohexstr(Type) + " in " + + toString(File)); } } @@ -112,11 +124,10 @@ static void applyMOV(uint8_t *Off, uint16_t V) { } static uint16_t readMOV(uint8_t *Off) { - uint16_t Opcode1 = read16le(Off); - uint16_t Opcode2 = read16le(Off + 2); - uint16_t Imm = (Opcode2 & 0x00ff) | ((Opcode2 >> 4) & 0x0700); - Imm |= ((Opcode1 << 1) & 0x0800) | ((Opcode1 & 0x000f) << 12); - return Imm; + uint16_t Op1 = read16le(Off); + uint16_t Op2 = read16le(Off + 2); + return (Op2 & 0x00ff) | ((Op2 >> 4) & 0x0700) | ((Op1 << 1) & 0x0800) | + ((Op1 & 0x000f) << 12); } void applyMOV32T(uint8_t *Off, uint32_t V) { @@ -153,7 +164,7 @@ void SectionChunk::applyRelARM(uint8_t *Off, uint16_t Type, OutputSection *OS, uint64_t S, uint64_t P) const { // Pointer to thumb code must have the LSB set. uint64_t SX = S; - if (OS && (OS->getPermissions() & IMAGE_SCN_MEM_EXECUTE)) + if (OS && (OS->Header.Characteristics & IMAGE_SCN_MEM_EXECUTE)) SX |= 1; switch (Type) { case IMAGE_REL_ARM_ADDR32: add32(Off, SX + Config->ImageBase); break; @@ -165,18 +176,19 @@ void SectionChunk::applyRelARM(uint8_t *Off, uint16_t Type, OutputSection *OS, case IMAGE_REL_ARM_SECTION: applySecIdx(Off, OS); break; case IMAGE_REL_ARM_SECREL: applySecRel(this, Off, OS, S); break; default: - fatal("unsupported relocation type 0x" + Twine::utohexstr(Type)); + fatal("unsupported relocation type 0x" + Twine::utohexstr(Type) + " in " + + toString(File)); } } // Interpret the existing immediate value as a byte offset to the // target symbol, then update the instruction with the immediate as // the page offset from the current instruction to the target. -static void applyArm64Addr(uint8_t *Off, uint64_t S, uint64_t P) { +static void applyArm64Addr(uint8_t *Off, uint64_t S, uint64_t P, int Shift) { uint32_t Orig = read32le(Off); uint64_t Imm = ((Orig >> 29) & 0x3) | ((Orig >> 3) & 0x1FFFFC); S += Imm; - Imm = (S >> 12) - (P >> 12); + Imm = (S >> Shift) - (P >> Shift); uint32_t ImmLo = (Imm & 0x3) << 29; uint32_t ImmHi = (Imm & 0x1FFFFC) << 3; uint64_t Mask = (0x3 << 29) | (0x1FFFFC << 3); @@ -213,19 +225,70 @@ static void applyArm64Ldr(uint8_t *Off, uint64_t Imm) { applyArm64Imm(Off, Imm >> Size, Size); } +static void applySecRelLow12A(const SectionChunk *Sec, uint8_t *Off, + OutputSection *OS, uint64_t S) { + if (checkSecRel(Sec, OS)) + applyArm64Imm(Off, (S - OS->getRVA()) & 0xfff, 0); +} + +static void applySecRelHigh12A(const SectionChunk *Sec, uint8_t *Off, + OutputSection *OS, uint64_t S) { + if (!checkSecRel(Sec, OS)) + return; + uint64_t SecRel = (S - OS->getRVA()) >> 12; + if (0xfff < SecRel) { + error("overflow in SECREL_HIGH12A relocation in section: " + + Sec->getSectionName()); + return; + } + applyArm64Imm(Off, SecRel & 0xfff, 0); +} + +static void applySecRelLdr(const SectionChunk *Sec, uint8_t *Off, + OutputSection *OS, uint64_t S) { + if (checkSecRel(Sec, OS)) + applyArm64Ldr(Off, (S - OS->getRVA()) & 0xfff); +} + +static void applyArm64Branch26(uint8_t *Off, int64_t V) { + if (!isInt<28>(V)) + fatal("relocation out of range"); + or32(Off, (V & 0x0FFFFFFC) >> 2); +} + +static void applyArm64Branch19(uint8_t *Off, int64_t V) { + if (!isInt<21>(V)) + fatal("relocation out of range"); + or32(Off, (V & 0x001FFFFC) << 3); +} + +static void applyArm64Branch14(uint8_t *Off, int64_t V) { + if (!isInt<16>(V)) + fatal("relocation out of range"); + or32(Off, (V & 0x0000FFFC) << 3); +} + void SectionChunk::applyRelARM64(uint8_t *Off, uint16_t Type, OutputSection *OS, uint64_t S, uint64_t P) const { switch (Type) { - case IMAGE_REL_ARM64_PAGEBASE_REL21: applyArm64Addr(Off, S, P); break; + case IMAGE_REL_ARM64_PAGEBASE_REL21: applyArm64Addr(Off, S, P, 12); break; + case IMAGE_REL_ARM64_REL21: applyArm64Addr(Off, S, P, 0); break; case IMAGE_REL_ARM64_PAGEOFFSET_12A: applyArm64Imm(Off, S & 0xfff, 0); break; case IMAGE_REL_ARM64_PAGEOFFSET_12L: applyArm64Ldr(Off, S & 0xfff); break; - case IMAGE_REL_ARM64_BRANCH26: or32(Off, ((S - P) & 0x0FFFFFFC) >> 2); break; + case IMAGE_REL_ARM64_BRANCH26: applyArm64Branch26(Off, S - P); break; + case IMAGE_REL_ARM64_BRANCH19: applyArm64Branch19(Off, S - P); break; + case IMAGE_REL_ARM64_BRANCH14: applyArm64Branch14(Off, S - P); break; case IMAGE_REL_ARM64_ADDR32: add32(Off, S + Config->ImageBase); break; case IMAGE_REL_ARM64_ADDR32NB: add32(Off, S); break; case IMAGE_REL_ARM64_ADDR64: add64(Off, S + Config->ImageBase); break; case IMAGE_REL_ARM64_SECREL: applySecRel(this, Off, OS, S); break; + case IMAGE_REL_ARM64_SECREL_LOW12A: applySecRelLow12A(this, Off, OS, S); break; + case IMAGE_REL_ARM64_SECREL_HIGH12A: applySecRelHigh12A(this, Off, OS, S); break; + case IMAGE_REL_ARM64_SECREL_LOW12L: applySecRelLdr(this, Off, OS, S); break; + case IMAGE_REL_ARM64_SECTION: applySecIdx(Off, OS); break; default: - fatal("unsupported relocation type 0x" + Twine::utohexstr(Type)); + fatal("unsupported relocation type 0x" + Twine::utohexstr(Type) + " in " + + toString(File)); } } @@ -234,7 +297,8 @@ void SectionChunk::writeTo(uint8_t *Buf) const { return; // Copy section contents from source object file to output file. ArrayRef<uint8_t> A = getContents(); - memcpy(Buf + OutputSectionOff, A.data(), A.size()); + if (!A.empty()) + memcpy(Buf + OutputSectionOff, A.data(), A.size()); // Apply relocations. size_t InputSize = getSize(); @@ -350,8 +414,8 @@ bool SectionChunk::hasData() const { return !(Header->Characteristics & IMAGE_SCN_CNT_UNINITIALIZED_DATA); } -uint32_t SectionChunk::getPermissions() const { - return Header->Characteristics & PermMask; +uint32_t SectionChunk::getOutputCharacteristics() const { + return Header->Characteristics & (PermMask | TypeMask); } bool SectionChunk::isCOMDAT() const { @@ -378,6 +442,7 @@ ArrayRef<uint8_t> SectionChunk::getContents() const { } void SectionChunk::replace(SectionChunk *Other) { + Alignment = std::max(Alignment, Other->Alignment); Other->Repl = Repl; Other->Live = false; } @@ -388,7 +453,7 @@ CommonChunk::CommonChunk(const COFFSymbolRef S) : Sym(S) { Alignment = std::min(uint64_t(32), PowerOf2Ceil(Sym.getValue())); } -uint32_t CommonChunk::getPermissions() const { +uint32_t CommonChunk::getOutputCharacteristics() const { return IMAGE_SCN_CNT_UNINITIALIZED_DATA | IMAGE_SCN_MEM_READ | IMAGE_SCN_MEM_WRITE; } @@ -433,7 +498,7 @@ void ImportThunkChunkARM::writeTo(uint8_t *Buf) const { void ImportThunkChunkARM64::writeTo(uint8_t *Buf) const { int64_t Off = ImpSymbol->getRVA() & 0xfff; memcpy(Buf + OutputSectionOff, ImportThunkARM64, sizeof(ImportThunkARM64)); - applyArm64Addr(Buf + OutputSectionOff, ImpSymbol->getRVA(), RVA); + applyArm64Addr(Buf + OutputSectionOff, ImpSymbol->getRVA(), RVA, 12); applyArm64Ldr(Buf + OutputSectionOff + 4, Off); } @@ -453,12 +518,14 @@ void LocalImportChunk::writeTo(uint8_t *Buf) const { } } -void SEHTableChunk::writeTo(uint8_t *Buf) const { +void RVATableChunk::writeTo(uint8_t *Buf) const { ulittle32_t *Begin = reinterpret_cast<ulittle32_t *>(Buf + OutputSectionOff); size_t Cnt = 0; - for (Defined *D : Syms) - Begin[Cnt++] = D->getRVA(); + for (const ChunkAndOffset &CO : Syms) + Begin[Cnt++] = CO.InputChunk->getRVA() + CO.Offset; std::sort(Begin, Begin + Cnt); + assert(std::unique(Begin, Begin + Cnt) == Begin + Cnt && + "RVA tables should be de-duplicated"); } // Windows-specific. This class represents a block in .reloc section. @@ -531,5 +598,47 @@ uint8_t Baserel::getDefaultType() { } } +std::map<uint32_t, MergeChunk *> MergeChunk::Instances; + +MergeChunk::MergeChunk(uint32_t Alignment) + : Builder(StringTableBuilder::RAW, Alignment) { + this->Alignment = Alignment; +} + +void MergeChunk::addSection(SectionChunk *C) { + auto *&MC = Instances[C->Alignment]; + if (!MC) + MC = make<MergeChunk>(C->Alignment); + MC->Sections.push_back(C); +} + +void MergeChunk::finalizeContents() { + for (SectionChunk *C : Sections) + if (C->isLive()) + Builder.add(toStringRef(C->getContents())); + Builder.finalize(); + + for (SectionChunk *C : Sections) { + if (!C->isLive()) + continue; + size_t Off = Builder.getOffset(toStringRef(C->getContents())); + C->setOutputSection(Out); + C->setRVA(RVA + Off); + C->OutputSectionOff = OutputSectionOff + Off; + } +} + +uint32_t MergeChunk::getOutputCharacteristics() const { + return IMAGE_SCN_MEM_READ | IMAGE_SCN_CNT_INITIALIZED_DATA; +} + +size_t MergeChunk::getSize() const { + return Builder.getSize(); +} + +void MergeChunk::writeTo(uint8_t *Buf) const { + Builder.write(Buf + OutputSectionOff); +} + } // namespace coff } // namespace lld diff --git a/COFF/Chunks.h b/COFF/Chunks.h index 381527ee6ef2..9e896531bd9a 100644 --- a/COFF/Chunks.h +++ b/COFF/Chunks.h @@ -16,6 +16,7 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/iterator.h" #include "llvm/ADT/iterator_range.h" +#include "llvm/MC/StringTableBuilder.h" #include "llvm/Object/COFF.h" #include <utility> #include <vector> @@ -37,9 +38,11 @@ class ObjFile; class OutputSection; class Symbol; -// Mask for section types (code, data, bss, disacardable, etc.) -// and permissions (writable, readable or executable). -const uint32_t PermMask = 0xFF0000F0; +// Mask for permissions (discardable, writable, readable, executable, etc). +const uint32_t PermMask = 0xFE000000; + +// Mask for section types (code, data, bss). +const uint32_t TypeMask = 0x000000E0; // A Chunk represents a chunk of data that will occupy space in the // output (if the resolver chose that). It may or may not be backed by @@ -60,6 +63,10 @@ public: // before calling this function. virtual void writeTo(uint8_t *Buf) const {} + // Called by the writer after an RVA is assigned, but before calling + // getSize(). + virtual void finalizeContents() {} + // The writer sets and uses the addresses. uint64_t getRVA() const { return RVA; } void setRVA(uint64_t V) { RVA = V; } @@ -70,7 +77,7 @@ public: virtual bool hasData() const { return true; } // Returns readable/writable/executable bits. - virtual uint32_t getPermissions() const { return 0; } + virtual uint32_t getOutputCharacteristics() const { return 0; } // Returns the section name if this is a section chunk. // It is illegal to call this function on non-section chunks. @@ -137,7 +144,7 @@ public: ArrayRef<uint8_t> getContents() const; void writeTo(uint8_t *Buf) const override; bool hasData() const override; - uint32_t getPermissions() const override; + uint32_t getOutputCharacteristics() const override; StringRef getSectionName() const override { return SectionName; } void getBaserels(std::vector<Baserel> *Res) override; bool isCOMDAT() const; @@ -208,11 +215,11 @@ public: // The COMDAT leader symbol if this is a COMDAT chunk. DefinedRegular *Sym = nullptr; + ArrayRef<coff_relocation> Relocs; + private: StringRef SectionName; std::vector<SectionChunk *> AssocChildren; - llvm::iterator_range<const coff_relocation *> Relocs; - size_t NumRelocs; // Used by the garbage collector. bool Live; @@ -222,13 +229,40 @@ private: uint32_t Class[2] = {0, 0}; }; +// This class is used to implement an lld-specific feature (not implemented in +// MSVC) that minimizes the output size by finding string literals sharing tail +// parts and merging them. +// +// If string tail merging is enabled and a section is identified as containing a +// string literal, it is added to a MergeChunk with an appropriate alignment. +// The MergeChunk then tail merges the strings using the StringTableBuilder +// class and assigns RVAs and section offsets to each of the member chunks based +// on the offsets assigned by the StringTableBuilder. +class MergeChunk : public Chunk { +public: + MergeChunk(uint32_t Alignment); + static void addSection(SectionChunk *C); + void finalizeContents() override; + + uint32_t getOutputCharacteristics() const override; + StringRef getSectionName() const override { return ".rdata"; } + size_t getSize() const override; + void writeTo(uint8_t *Buf) const override; + + static std::map<uint32_t, MergeChunk *> Instances; + std::vector<SectionChunk *> Sections; + +private: + llvm::StringTableBuilder Builder; +}; + // A chunk for common symbols. Common chunks don't have actual data. class CommonChunk : public Chunk { public: CommonChunk(const COFFSymbolRef Sym); size_t getSize() const override { return Sym.getValue(); } bool hasData() const override { return false; } - uint32_t getPermissions() const override; + uint32_t getOutputCharacteristics() const override; StringRef getSectionName() const override { return ".bss"; } private: @@ -320,17 +354,41 @@ private: Defined *Sym; }; -// Windows-specific. -// A chunk for SEH table which contains RVAs of safe exception handler -// functions. x86-only. -class SEHTableChunk : public Chunk { +// Duplicate RVAs are not allowed in RVA tables, so unique symbols by chunk and +// offset into the chunk. Order does not matter as the RVA table will be sorted +// later. +struct ChunkAndOffset { + Chunk *InputChunk; + uint32_t Offset; + + struct DenseMapInfo { + static ChunkAndOffset getEmptyKey() { + return {llvm::DenseMapInfo<Chunk *>::getEmptyKey(), 0}; + } + static ChunkAndOffset getTombstoneKey() { + return {llvm::DenseMapInfo<Chunk *>::getTombstoneKey(), 0}; + } + static unsigned getHashValue(const ChunkAndOffset &CO) { + return llvm::DenseMapInfo<std::pair<Chunk *, uint32_t>>::getHashValue( + {CO.InputChunk, CO.Offset}); + } + static bool isEqual(const ChunkAndOffset &LHS, const ChunkAndOffset &RHS) { + return LHS.InputChunk == RHS.InputChunk && LHS.Offset == RHS.Offset; + } + }; +}; + +using SymbolRVASet = llvm::DenseSet<ChunkAndOffset>; + +// Table which contains symbol RVAs. Used for /safeseh and /guard:cf. +class RVATableChunk : public Chunk { public: - explicit SEHTableChunk(std::set<Defined *> S) : Syms(std::move(S)) {} + explicit RVATableChunk(SymbolRVASet S) : Syms(std::move(S)) {} size_t getSize() const override { return Syms.size() * 4; } void writeTo(uint8_t *Buf) const override; private: - std::set<Defined *> Syms; + SymbolRVASet Syms; }; // Windows-specific. @@ -362,4 +420,10 @@ void applyBranch24T(uint8_t *Off, int32_t V); } // namespace coff } // namespace lld +namespace llvm { +template <> +struct DenseMapInfo<lld::coff::ChunkAndOffset> + : lld::coff::ChunkAndOffset::DenseMapInfo {}; +} + #endif diff --git a/COFF/Config.h b/COFF/Config.h index 93bef23a97f0..3ae50b868333 100644 --- a/COFF/Config.h +++ b/COFF/Config.h @@ -10,6 +10,7 @@ #ifndef LLD_COFF_CONFIG_H #define LLD_COFF_CONFIG_H +#include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" #include "llvm/Object/COFF.h" #include "llvm/Support/CachePruning.h" @@ -71,6 +72,12 @@ enum class DebugType { Fixup = 0x4, /// Relocation Table }; +enum class GuardCFLevel { + Off, + NoLongJmp, // Emit gfids but no longjmp tables + Full, // Enable all protections. +}; + // Global configuration. struct Configuration { enum ManifestKind { SideBySide, Embed, No }; @@ -85,13 +92,19 @@ struct Configuration { std::string ImportName; bool DoGC = true; bool DoICF = true; + bool TailMerge; bool Relocatable = true; bool Force = false; bool Debug = false; bool DebugDwarf = false; bool DebugGHashes = false; + bool DebugSymtab = false; + bool ShowTiming = false; unsigned DebugTypes = static_cast<unsigned>(DebugType::None); + std::vector<std::string> NatvisFiles; + llvm::SmallString<128> PDBAltPath; llvm::SmallString<128> PDBPath; + llvm::SmallString<128> PDBSourcePath; std::vector<llvm::StringRef> Argv; // Symbols in this set are considered as live by the garbage collector. @@ -110,15 +123,18 @@ struct Configuration { bool SaveTemps = false; + // /guard:cf + GuardCFLevel GuardCF = GuardCFLevel::Off; + // Used for SafeSEH. Symbol *SEHTable = nullptr; Symbol *SEHCount = nullptr; // Used for /opt:lldlto=N - unsigned LTOOptLevel = 2; + unsigned LTOO = 2; // Used for /opt:lldltojobs=N - unsigned LTOJobs = 0; + unsigned ThinLTOJobs = 0; // Used for /opt:lldltopartitions=N unsigned LTOPartitions = 1; @@ -152,6 +168,9 @@ struct Configuration { // Used for /alternatename. std::map<StringRef, StringRef> AlternateNames; + // Used for /order. + llvm::StringMap<int> Order; + // Used for /lldmap. std::string MapFile; @@ -164,7 +183,7 @@ struct Configuration { uint32_t MinorImageVersion = 0; uint32_t MajorOSVersion = 6; uint32_t MinorOSVersion = 0; - bool CanExitEarly = false; + uint32_t Timestamp = 0; bool DynamicBase = true; bool AllowBind = true; bool NxCompat = true; @@ -174,7 +193,12 @@ struct Configuration { bool HighEntropyVA = false; bool AppContainer = false; bool MinGW = false; + bool WarnMissingOrderSymbol = true; bool WarnLocallyDefinedImported = true; + bool Incremental = true; + bool IntegrityCheck = false; + bool KillAt = false; + bool Repro = false; }; extern Configuration *Config; diff --git a/COFF/DLL.cpp b/COFF/DLL.cpp index 195839139670..464abe8e0894 100644 --- a/COFF/DLL.cpp +++ b/COFF/DLL.cpp @@ -18,8 +18,8 @@ // //===----------------------------------------------------------------------===// -#include "Chunks.h" #include "DLL.h" +#include "Chunks.h" #include "llvm/Object/COFF.h" #include "llvm/Support/Endian.h" #include "llvm/Support/Path.h" diff --git a/COFF/DLL.h b/COFF/DLL.h index ad312789edf1..c5d6e7c93abf 100644 --- a/COFF/DLL.h +++ b/COFF/DLL.h @@ -76,6 +76,11 @@ class EdataContents { public: EdataContents(); std::vector<Chunk *> Chunks; + + uint64_t getRVA() { return Chunks[0]->getRVA(); } + uint64_t getSize() { + return Chunks.back()->getRVA() + Chunks.back()->getSize() - getRVA(); + } }; } // namespace coff diff --git a/COFF/Driver.cpp b/COFF/Driver.cpp index 1aaec355c7a5..eefdb48beadd 100644 --- a/COFF/Driver.cpp +++ b/COFF/Driver.cpp @@ -9,14 +9,18 @@ #include "Driver.h" #include "Config.h" +#include "ICF.h" #include "InputFiles.h" +#include "MarkLive.h" #include "MinGW.h" #include "SymbolTable.h" #include "Symbols.h" #include "Writer.h" +#include "lld/Common/Args.h" #include "lld/Common/Driver.h" #include "lld/Common/ErrorHandler.h" #include "lld/Common/Memory.h" +#include "lld/Common/Timer.h" #include "lld/Common/Version.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/StringSwitch.h" @@ -35,9 +39,8 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/ToolDrivers/llvm-lib/LibDriver.h" #include <algorithm> -#include <memory> - #include <future> +#include <memory> using namespace llvm; using namespace llvm::object; @@ -47,19 +50,20 @@ using llvm::sys::Process; namespace lld { namespace coff { +static Timer InputFileTimer("Input File Reading", Timer::root()); + Configuration *Config; LinkerDriver *Driver; bool link(ArrayRef<const char *> Args, bool CanExitEarly, raw_ostream &Diag) { - errorHandler().LogName = Args[0]; + errorHandler().LogName = sys::path::filename(Args[0]); errorHandler().ErrorOS = &Diag; errorHandler().ColorDiagnostics = Diag.has_colors(); errorHandler().ErrorLimitExceededMsg = "too many errors emitted, stopping now" - " (use /ERRORLIMIT:0 to see all errors)"; + " (use /errorlimit:0 to see all errors)"; + errorHandler().ExitEarly = CanExitEarly; Config = make<Configuration>(); - Config->Argv = {Args.begin(), Args.end()}; - Config->CanExitEarly = CanExitEarly; Symtab = make<SymbolTable>(); @@ -71,6 +75,9 @@ bool link(ArrayRef<const char *> Args, bool CanExitEarly, raw_ostream &Diag) { exitLld(errorCount() ? 1 : 0); freeArena(); + ObjFile::Instances.clear(); + ImportFile::Instances.clear(); + BitcodeFile::Instances.clear(); return !errorCount(); } @@ -92,7 +99,7 @@ typedef std::pair<std::unique_ptr<MemoryBuffer>, std::error_code> MBErrPair; // Create a std::future that opens and maps a file using the best strategy for // the host platform. static std::future<MBErrPair> createFutureForFile(std::string Path) { -#if LLVM_ON_WIN32 +#if _WIN32 // On Windows, file I/O is relatively slow so it is best to do this // asynchronously. auto Strategy = std::launch::async; @@ -100,7 +107,9 @@ static std::future<MBErrPair> createFutureForFile(std::string Path) { auto Strategy = std::launch::deferred; #endif return std::async(Strategy, [=]() { - auto MBOrErr = MemoryBuffer::getFile(Path); + auto MBOrErr = MemoryBuffer::getFile(Path, + /*FileSize*/ -1, + /*RequiresNullTerminator*/ false); if (!MBOrErr) return MBErrPair{nullptr, MBOrErr.getError()}; return MBErrPair{std::move(*MBOrErr), std::error_code()}; @@ -119,39 +128,46 @@ MemoryBufferRef LinkerDriver::takeBuffer(std::unique_ptr<MemoryBuffer> MB) { void LinkerDriver::addBuffer(std::unique_ptr<MemoryBuffer> MB, bool WholeArchive) { + StringRef Filename = MB->getBufferIdentifier(); + MemoryBufferRef MBRef = takeBuffer(std::move(MB)); - FilePaths.push_back(MBRef.getBufferIdentifier()); + FilePaths.push_back(Filename); // File type is detected by contents, not by file extension. switch (identify_magic(MBRef.getBuffer())) { case file_magic::windows_resource: Resources.push_back(MBRef); break; - case file_magic::archive: if (WholeArchive) { std::unique_ptr<Archive> File = - CHECK(Archive::create(MBRef), - MBRef.getBufferIdentifier() + ": failed to parse archive"); + CHECK(Archive::create(MBRef), Filename + ": failed to parse archive"); for (MemoryBufferRef M : getArchiveMembers(File.get())) - addArchiveBuffer(M, "<whole-archive>", MBRef.getBufferIdentifier()); + addArchiveBuffer(M, "<whole-archive>", Filename); return; } Symtab->addFile(make<ArchiveFile>(MBRef)); break; - case file_magic::bitcode: Symtab->addFile(make<BitcodeFile>(MBRef)); break; - + case file_magic::coff_object: + case file_magic::coff_import_library: + Symtab->addFile(make<ObjFile>(MBRef)); + break; case file_magic::coff_cl_gl_object: - error(MBRef.getBufferIdentifier() + ": is not a native COFF file. " - "Recompile without /GL"); + error(Filename + ": is not a native COFF file. Recompile without /GL"); break; - + case file_magic::pecoff_executable: + if (Filename.endswith_lower(".dll")) { + error(Filename + ": bad file type. Did you specify a DLL instead of an " + "import library?"); + break; + } + LLVM_FALLTHROUGH; default: - Symtab->addFile(make<ObjFile>(MBRef)); + error(MBRef.getBufferIdentifier() + ": unknown file type"); break; } } @@ -227,7 +243,29 @@ static bool isDecorated(StringRef Sym) { void LinkerDriver::parseDirectives(StringRef S) { ArgParser Parser; // .drectve is always tokenized using Windows shell rules. - opt::InputArgList Args = Parser.parseDirectives(S); + // /EXPORT: option can appear too many times, processing in fastpath. + opt::InputArgList Args; + std::vector<StringRef> Exports; + std::tie(Args, Exports) = Parser.parseDirectives(S); + + for (StringRef E : Exports) { + // If a common header file contains dllexported function + // declarations, many object files may end up with having the + // same /EXPORT options. In order to save cost of parsing them, + // we dedup them first. + if (!DirectivesExports.insert(E).second) + continue; + + Export Exp = parseExport(E); + if (Config->Machine == I386 && Config->MinGW) { + if (!isDecorated(Exp.Name)) + Exp.Name = Saver.save("_" + Exp.Name); + if (!Exp.ExtName.empty() && !isDecorated(Exp.ExtName)) + Exp.ExtName = Saver.save("_" + Exp.ExtName); + } + Exp.Directives = true; + Config->Exports.push_back(Exp); + } for (auto *Arg : Args) { switch (Arg->getOption().getUnaliasedOption().getID()) { @@ -244,25 +282,6 @@ void LinkerDriver::parseDirectives(StringRef S) { case OPT_entry: Config->Entry = addUndefined(mangle(Arg->getValue())); break; - case OPT_export: { - // If a common header file contains dllexported function - // declarations, many object files may end up with having the - // same /EXPORT options. In order to save cost of parsing them, - // we dedup them first. - if (!DirectivesExports.insert(Arg->getValue()).second) - break; - - Export E = parseExport(Arg->getValue()); - if (Config->Machine == I386 && Config->MinGW) { - if (!isDecorated(E.Name)) - E.Name = Saver.save("_" + E.Name); - if (!E.ExtName.empty() && !isDecorated(E.ExtName)) - E.ExtName = Saver.save("_" + E.ExtName); - } - E.Directives = true; - Config->Exports.push_back(E); - break; - } case OPT_failifmismatch: checkFailIfMismatch(Arg->getValue()); break; @@ -315,13 +334,24 @@ StringRef LinkerDriver::doFindFile(StringRef Filename) { return Filename; } +static Optional<sys::fs::UniqueID> getUniqueID(StringRef Path) { + sys::fs::UniqueID Ret; + if (sys::fs::getUniqueID(Path, Ret)) + return None; + return Ret; +} + // Resolves a file path. This never returns the same path // (in that case, it returns None). Optional<StringRef> LinkerDriver::findFile(StringRef Filename) { StringRef Path = doFindFile(Filename); - bool Seen = !VisitedFiles.insert(Path.lower()).second; - if (Seen) - return None; + + if (Optional<sys::fs::UniqueID> ID = getUniqueID(Path)) { + bool Seen = !VisitedFiles.insert(*ID).second; + if (Seen) + return None; + } + if (Path.endswith_lower(".lib")) VisitedLibs.insert(sys::path::filename(Path)); return Path; @@ -344,11 +374,14 @@ Optional<StringRef> LinkerDriver::findLib(StringRef Filename) { return None; if (!VisitedLibs.insert(Filename.lower()).second) return None; + StringRef Path = doFindLib(Filename); if (Config->NoDefaultLibs.count(Path)) return None; - if (!VisitedFiles.insert(Path.lower()).second) - return None; + + if (Optional<sys::fs::UniqueID> ID = getUniqueID(Path)) + if (!VisitedFiles.insert(*ID).second) + return None; return Path; } @@ -383,7 +416,24 @@ StringRef LinkerDriver::mangle(StringRef Sym) { } // Windows specific -- find default entry point name. +// +// There are four different entry point functions for Windows executables, +// each of which corresponds to a user-defined "main" function. This function +// infers an entry point from a user-defined "main" function. StringRef LinkerDriver::findDefaultEntry() { + // As a special case, if /nodefaultlib is given, we directly look for an + // entry point. This is because, if no default library is linked, users + // need to define an entry point instead of a "main". + if (Config->NoDefaultLibAll) { + for (StringRef S : {"mainCRTStartup", "wmainCRTStartup", + "WinMainCRTStartup", "wWinMainCRTStartup"}) { + StringRef Entry = Symtab->findMangle(S); + if (!Entry.empty() && !isa<Undefined>(Symtab->find(Entry))) + return mangle(S); + } + return ""; + } + // User-defined main functions and their corresponding entry points. static const char *Entries[][2] = { {"main", "mainCRTStartup"}, @@ -533,10 +583,49 @@ static void createImportLibrary(bool AsLib) { Exports.push_back(E2); } - auto E = writeImportLibrary(getImportName(AsLib), getImplibPath(), Exports, - Config->Machine, false); - handleAllErrors(std::move(E), - [&](ErrorInfoBase &EIB) { error(EIB.message()); }); + auto HandleError = [](Error &&E) { + handleAllErrors(std::move(E), + [](ErrorInfoBase &EIB) { error(EIB.message()); }); + }; + std::string LibName = getImportName(AsLib); + std::string Path = getImplibPath(); + + if (!Config->Incremental) { + HandleError(writeImportLibrary(LibName, Path, Exports, Config->Machine, + Config->MinGW)); + return; + } + + // If the import library already exists, replace it only if the contents + // have changed. + ErrorOr<std::unique_ptr<MemoryBuffer>> OldBuf = MemoryBuffer::getFile( + Path, /*FileSize*/ -1, /*RequiresNullTerminator*/ false); + if (!OldBuf) { + HandleError(writeImportLibrary(LibName, Path, Exports, Config->Machine, + Config->MinGW)); + return; + } + + SmallString<128> TmpName; + if (std::error_code EC = + sys::fs::createUniqueFile(Path + ".tmp-%%%%%%%%.lib", TmpName)) + fatal("cannot create temporary file for import library " + Path + ": " + + EC.message()); + + if (Error E = writeImportLibrary(LibName, TmpName, Exports, Config->Machine, + Config->MinGW)) { + HandleError(std::move(E)); + return; + } + + std::unique_ptr<MemoryBuffer> NewBuf = check(MemoryBuffer::getFile( + TmpName, /*FileSize*/ -1, /*RequiresNullTerminator*/ false)); + if ((*OldBuf)->getBuffer() != NewBuf->getBuffer()) { + OldBuf->reset(); + HandleError(errorCodeToError(sys::fs::rename(TmpName, Path))); + } else { + sys::fs::remove(TmpName); + } } static void parseModuleDefs(StringRef Path) { @@ -569,9 +658,18 @@ static void parseModuleDefs(StringRef Path) { for (COFFShortExport E1 : M.Exports) { Export E2; + // In simple cases, only Name is set. Renamed exports are parsed + // and set as "ExtName = Name". If Name has the form "OtherDll.Func", + // it shouldn't be a normal exported function but a forward to another + // DLL instead. This is supported by both MS and GNU linkers. + if (E1.ExtName != E1.Name && StringRef(E1.Name).contains('.')) { + E2.Name = Saver.save(E1.ExtName); + E2.ForwardTo = Saver.save(E1.Name); + Config->Exports.push_back(E2); + continue; + } E2.Name = Saver.save(E1.Name); - if (E1.isWeak()) - E2.ExtName = Saver.save(E1.ExtName); + E2.ExtName = Saver.save(E1.ExtName); E2.Ordinal = E1.Ordinal; E2.Noname = E1.Noname; E2.Data = E1.Data; @@ -634,8 +732,8 @@ filterBitcodeFiles(StringRef Path, std::vector<std::string> &TemporaryFiles) { log("Creating a temporary archive for " + Path + " to remove bitcode files"); SmallString<128> S; - if (auto EC = sys::fs::createTemporaryFile("lld-" + sys::path::stem(Path), - ".lib", S)) + if (std::error_code EC = sys::fs::createTemporaryFile( + "lld-" + sys::path::stem(Path), ".lib", S)) fatal("cannot create a temporary file: " + EC.message()); std::string Temp = S.str(); TemporaryFiles.push_back(Temp); @@ -711,6 +809,8 @@ void LinkerDriver::enqueueTask(std::function<void()> Task) { } bool LinkerDriver::run() { + ScopedTimer T(InputFileTimer); + bool DidWork = !TaskQueue.empty(); while (!TaskQueue.empty()) { TaskQueue.front()(); @@ -719,6 +819,46 @@ bool LinkerDriver::run() { return DidWork; } +// Parse an /order file. If an option is given, the linker places +// COMDAT sections in the same order as their names appear in the +// given file. +static void parseOrderFile(StringRef Arg) { + // For some reason, the MSVC linker requires a filename to be + // preceded by "@". + if (!Arg.startswith("@")) { + error("malformed /order option: '@' missing"); + return; + } + + // Get a list of all comdat sections for error checking. + DenseSet<StringRef> Set; + for (Chunk *C : Symtab->getChunks()) + if (auto *Sec = dyn_cast<SectionChunk>(C)) + if (Sec->Sym) + Set.insert(Sec->Sym->getName()); + + // Open a file. + StringRef Path = Arg.substr(1); + std::unique_ptr<MemoryBuffer> MB = CHECK( + MemoryBuffer::getFile(Path, -1, false, true), "could not open " + Path); + + // Parse a file. An order file contains one symbol per line. + // All symbols that were not present in a given order file are + // considered to have the lowest priority 0 and are placed at + // end of an output section. + for (std::string S : args::getLines(MB->getMemBufferRef())) { + if (Config->Machine == I386 && !isDecorated(S)) + S = "_" + S; + + if (Set.count(S) == 0) { + if (Config->WarnMissingOrderSymbol) + warn("/order:" + Arg + ": missing symbol: " + S + " [LNK4037]"); + } + else + Config->Order[S] = INT_MIN + Config->Order.size(); + } +} + void LinkerDriver::link(ArrayRef<const char *> ArgsArr) { // If the first command line argument is "/lib", link.exe acts like lib.exe. // We call our own implementation of lib.exe that understands bitcode files. @@ -734,11 +874,10 @@ void LinkerDriver::link(ArrayRef<const char *> ArgsArr) { InitializeAllTargetMCs(); InitializeAllAsmParsers(); InitializeAllAsmPrinters(); - InitializeAllDisassemblers(); // Parse command line options. ArgParser Parser; - opt::InputArgList Args = Parser.parseLINK(ArgsArr.slice(1)); + opt::InputArgList Args = Parser.parseLINK(ArgsArr); // Parse and evaluate -mllvm options. std::vector<const char *> V; @@ -762,6 +901,10 @@ void LinkerDriver::link(ArrayRef<const char *> ArgsArr) { return; } + if (Args.hasArg(OPT_show_timing)) + Config->ShowTiming = true; + + ScopedTimer T(Timer::root()); // Handle --version, which is an lld extension. This option is a bit odd // because it doesn't start with "/", but we deliberately chose "--" to // avoid conflict with /version and for compatibility with clang-cl. @@ -804,7 +947,9 @@ void LinkerDriver::link(ArrayRef<const char *> ArgsArr) { // Handle /ignore for (auto *Arg : Args.filtered(OPT_ignore)) { - if (StringRef(Arg->getValue()) == "4217") + if (StringRef(Arg->getValue()) == "4037") + Config->WarnMissingOrderSymbol = false; + else if (StringRef(Arg->getValue()) == "4217") Config->WarnLocallyDefinedImported = false; // Other warning numbers are ignored. } @@ -825,6 +970,7 @@ void LinkerDriver::link(ArrayRef<const char *> ArgsArr) { // Handle /debug if (Args.hasArg(OPT_debug, OPT_debug_dwarf, OPT_debug_ghash)) { Config->Debug = true; + Config->Incremental = true; if (auto *Arg = Args.getLastArg(OPT_debugtype)) Config->DebugTypes = parseDebugType(Arg->getValue()); else @@ -833,9 +979,17 @@ void LinkerDriver::link(ArrayRef<const char *> ArgsArr) { // Handle /pdb bool ShouldCreatePDB = Args.hasArg(OPT_debug, OPT_debug_ghash); - if (ShouldCreatePDB) + if (ShouldCreatePDB) { if (auto *Arg = Args.getLastArg(OPT_pdb)) Config->PDBPath = Arg->getValue(); + if (auto *Arg = Args.getLastArg(OPT_pdbaltpath)) + Config->PDBAltPath = Arg->getValue(); + if (Args.hasArg(OPT_natvis)) + Config->NatvisFiles = Args.getAllArgValues(OPT_natvis); + + if (auto *Arg = Args.getLastArg(OPT_pdb_source_path)) + Config->PDBSourcePath = Arg->getValue(); + } // Handle /noentry if (Args.hasArg(OPT_noentry)) { @@ -859,6 +1013,9 @@ void LinkerDriver::link(ArrayRef<const char *> ArgsArr) { DynamicBaseArg->getOption().getID() == OPT_dynamicbase_no) Config->DynamicBase = false; + // MSDN claims "/FIXED:NO is the default setting for a DLL, and /FIXED is the + // default setting for any other project type.", but link.exe defaults to + // /FIXED:NO for exe outputs as well. Match behavior, not docs. bool Fixed = Args.hasFlag(OPT_fixed, OPT_fixed_no, false); if (Fixed) { if (DynamicBaseArg && @@ -894,6 +1051,10 @@ void LinkerDriver::link(ArrayRef<const char *> ArgsArr) { if (auto *Arg = Args.getLastArg(OPT_stack)) parseNumbers(Arg->getValue(), &Config->StackReserve, &Config->StackCommit); + // Handle /guard:cf + if (auto *Arg = Args.getLastArg(OPT_guard)) + parseGuard(Arg->getValue()); + // Handle /heap if (auto *Arg = Args.getLastArg(OPT_heap)) parseNumbers(Arg->getValue(), &Config->HeapReserve, &Config->HeapCommit); @@ -908,6 +1069,23 @@ void LinkerDriver::link(ArrayRef<const char *> ArgsArr) { parseSubsystem(Arg->getValue(), &Config->Subsystem, &Config->MajorOSVersion, &Config->MinorOSVersion); + // Handle /timestamp + if (llvm::opt::Arg *Arg = Args.getLastArg(OPT_timestamp, OPT_repro)) { + if (Arg->getOption().getID() == OPT_repro) { + Config->Timestamp = 0; + Config->Repro = true; + } else { + Config->Repro = false; + StringRef Value(Arg->getValue()); + if (Value.getAsInteger(0, Config->Timestamp)) + fatal(Twine("invalid timestamp: ") + Value + + ". Expected 32-bit integer"); + } + } else { + Config->Repro = false; + Config->Timestamp = time(nullptr); + } + // Handle /alternatename for (auto *Arg : Args.filtered(OPT_alternatename)) parseAlternateName(Arg->getValue()); @@ -921,8 +1099,10 @@ void LinkerDriver::link(ArrayRef<const char *> ArgsArr) { Config->Implib = Arg->getValue(); // Handle /opt. - bool DoGC = !Args.hasArg(OPT_debug); - unsigned ICFLevel = 1; // 0: off, 1: limited, 2: on + bool DoGC = !Args.hasArg(OPT_debug) || Args.hasArg(OPT_profile); + unsigned ICFLevel = + Args.hasArg(OPT_profile) ? 0 : 1; // 0: off, 1: limited, 2: on + unsigned TailMerge = 1; for (auto *Arg : Args.filtered(OPT_opt)) { std::string Str = StringRef(Arg->getValue()).lower(); SmallVector<StringRef, 1> Vec; @@ -936,14 +1116,18 @@ void LinkerDriver::link(ArrayRef<const char *> ArgsArr) { ICFLevel = 2; } else if (S == "noicf") { ICFLevel = 0; + } else if (S == "lldtailmerge") { + TailMerge = 2; + } else if (S == "nolldtailmerge") { + TailMerge = 0; } else if (S.startswith("lldlto=")) { StringRef OptLevel = S.substr(7); - if (OptLevel.getAsInteger(10, Config->LTOOptLevel) || - Config->LTOOptLevel > 3) + if (OptLevel.getAsInteger(10, Config->LTOO) || Config->LTOO > 3) error("/opt:lldlto: invalid optimization level: " + OptLevel); } else if (S.startswith("lldltojobs=")) { StringRef Jobs = S.substr(11); - if (Jobs.getAsInteger(10, Config->LTOJobs) || Config->LTOJobs == 0) + if (Jobs.getAsInteger(10, Config->ThinLTOJobs) || + Config->ThinLTOJobs == 0) error("/opt:lldltojobs: invalid job count: " + Jobs); } else if (S.startswith("lldltopartitions=")) { StringRef N = S.substr(17); @@ -964,11 +1148,16 @@ void LinkerDriver::link(ArrayRef<const char *> ArgsArr) { ICFLevel = 0; Config->DoGC = DoGC; Config->DoICF = ICFLevel > 0; + Config->TailMerge = (TailMerge == 1 && Config->DoICF) || TailMerge == 2; // Handle /lldsavetemps if (Args.hasArg(OPT_lldsavetemps)) Config->SaveTemps = true; + // Handle /kill-at + if (Args.hasArg(OPT_kill_at)) + Config->KillAt = true; + // Handle /lldltocache if (auto *Arg = Args.getLastArg(OPT_lldltocache)) Config->LTOCache = Arg->getValue(); @@ -987,6 +1176,14 @@ void LinkerDriver::link(ArrayRef<const char *> ArgsArr) { for (auto *Arg : Args.filtered(OPT_merge)) parseMerge(Arg->getValue()); + // Add default section merging rules after user rules. User rules take + // precedence, but we will emit a warning if there is a conflict. + parseMerge(".idata=.rdata"); + parseMerge(".didat=.rdata"); + parseMerge(".edata=.rdata"); + parseMerge(".xdata=.rdata"); + parseMerge(".bss=.data"); + // Handle /section for (auto *Arg : Args.filtered(OPT_section)) parseSection(Arg->getValue()); @@ -1024,39 +1221,77 @@ void LinkerDriver::link(ArrayRef<const char *> ArgsArr) { if (!Config->ManifestInput.empty() && Config->Manifest != Configuration::Embed) { - fatal("/MANIFESTINPUT: requires /MANIFEST:EMBED"); + fatal("/manifestinput: requires /manifest:embed"); } // Handle miscellaneous boolean flags. Config->AllowBind = Args.hasFlag(OPT_allowbind, OPT_allowbind_no, true); Config->AllowIsolation = Args.hasFlag(OPT_allowisolation, OPT_allowisolation_no, true); + Config->Incremental = + Args.hasFlag(OPT_incremental, OPT_incremental_no, + !Config->DoGC && !Config->DoICF && !Args.hasArg(OPT_order) && + !Args.hasArg(OPT_profile)); + Config->IntegrityCheck = + Args.hasFlag(OPT_integritycheck, OPT_integritycheck_no, false); Config->NxCompat = Args.hasFlag(OPT_nxcompat, OPT_nxcompat_no, true); - Config->TerminalServerAware = Args.hasFlag(OPT_tsaware, OPT_tsaware_no, true); + Config->TerminalServerAware = + !Config->DLL && Args.hasFlag(OPT_tsaware, OPT_tsaware_no, true); Config->DebugDwarf = Args.hasArg(OPT_debug_dwarf); Config->DebugGHashes = Args.hasArg(OPT_debug_ghash); + Config->DebugSymtab = Args.hasArg(OPT_debug_symtab); Config->MapFile = getMapFile(Args); + if (Config->Incremental && Args.hasArg(OPT_profile)) { + warn("ignoring '/incremental' due to '/profile' specification"); + Config->Incremental = false; + } + + if (Config->Incremental && Args.hasArg(OPT_order)) { + warn("ignoring '/incremental' due to '/order' specification"); + Config->Incremental = false; + } + + if (Config->Incremental && Config->DoGC) { + warn("ignoring '/incremental' because REF is enabled; use '/opt:noref' to " + "disable"); + Config->Incremental = false; + } + + if (Config->Incremental && Config->DoICF) { + warn("ignoring '/incremental' because ICF is enabled; use '/opt:noicf' to " + "disable"); + Config->Incremental = false; + } + if (errorCount()) return; - bool WholeArchiveFlag = Args.hasArg(OPT_wholearchive_flag); + std::set<sys::fs::UniqueID> WholeArchives; + for (auto *Arg : Args.filtered(OPT_wholearchive_file)) + if (Optional<StringRef> Path = doFindFile(Arg->getValue())) + if (Optional<sys::fs::UniqueID> ID = getUniqueID(*Path)) + WholeArchives.insert(*ID); + + // A predicate returning true if a given path is an argument for + // /wholearchive:, or /wholearchive is enabled globally. + // This function is a bit tricky because "foo.obj /wholearchive:././foo.obj" + // needs to be handled as "/wholearchive:foo.obj foo.obj". + auto IsWholeArchive = [&](StringRef Path) -> bool { + if (Args.hasArg(OPT_wholearchive_flag)) + return true; + if (Optional<sys::fs::UniqueID> ID = getUniqueID(Path)) + return WholeArchives.count(*ID); + return false; + }; + // Create a list of input files. Files can be given as arguments // for /defaultlib option. - std::vector<MemoryBufferRef> MBs; - for (auto *Arg : Args.filtered(OPT_INPUT, OPT_wholearchive_file)) { - switch (Arg->getOption().getID()) { - case OPT_INPUT: - if (Optional<StringRef> Path = findFile(Arg->getValue())) - enqueuePath(*Path, WholeArchiveFlag); - break; - case OPT_wholearchive_file: - if (Optional<StringRef> Path = findFile(Arg->getValue())) - enqueuePath(*Path, true); - break; - } - } + for (auto *Arg : Args.filtered(OPT_INPUT, OPT_wholearchive_file)) + if (Optional<StringRef> Path = findFile(Arg->getValue())) + enqueuePath(*Path, IsWholeArchive(*Path)); + for (auto *Arg : Args.filtered(OPT_defaultlib)) if (Optional<StringRef> Path = findLib(Arg->getValue())) enqueuePath(*Path, false); @@ -1160,10 +1395,24 @@ void LinkerDriver::link(ArrayRef<const char *> ArgsArr) { getOutputPath((*Args.filtered(OPT_INPUT).begin())->getValue()); } - // Put the PDB next to the image if no /pdb flag was passed. - if (ShouldCreatePDB && Config->PDBPath.empty()) { - Config->PDBPath = Config->OutputFile; - sys::path::replace_extension(Config->PDBPath, ".pdb"); + if (ShouldCreatePDB) { + // Put the PDB next to the image if no /pdb flag was passed. + if (Config->PDBPath.empty()) { + Config->PDBPath = Config->OutputFile; + sys::path::replace_extension(Config->PDBPath, ".pdb"); + } + + // The embedded PDB path should be the absolute path to the PDB if no + // /pdbaltpath flag was passed. + if (Config->PDBAltPath.empty()) { + Config->PDBAltPath = Config->PDBPath; + + // It's important to make the path absolute and remove dots. This path + // will eventually be written into the PE header, and certain Microsoft + // tools won't work correctly if these assumptions are not held. + sys::fs::make_absolute(Config->PDBAltPath); + sys::path::remove_dots(Config->PDBAltPath); + } } // Set default image base if /base is not given. @@ -1176,11 +1425,9 @@ void LinkerDriver::link(ArrayRef<const char *> ArgsArr) { Symtab->addAbsolute("___safe_se_handler_count", 0); } - // We do not support /guard:cf (control flow protection) yet. - // Define CFG symbols anyway so that we can link MSVC 2015 CRT. Symtab->addAbsolute(mangle("__guard_fids_count"), 0); Symtab->addAbsolute(mangle("__guard_fids_table"), 0); - Symtab->addAbsolute(mangle("__guard_flags"), 0x100); + Symtab->addAbsolute(mangle("__guard_flags"), 0); Symtab->addAbsolute(mangle("__guard_iat_count"), 0); Symtab->addAbsolute(mangle("__guard_iat_table"), 0); Symtab->addAbsolute(mangle("__guard_longjmp_count"), 0); @@ -1255,7 +1502,7 @@ void LinkerDriver::link(ArrayRef<const char *> ArgsArr) { // Handle /safeseh. if (Args.hasFlag(OPT_safeseh, OPT_safeseh_no, false)) { for (ObjFile *File : ObjFile::Instances) - if (!File->SEHCompat) + if (!File->hasSafeSEH()) error("/safeseh: " + File->getName() + " is not compatible with SEH"); if (errorCount()) return; @@ -1275,7 +1522,7 @@ void LinkerDriver::link(ArrayRef<const char *> ArgsArr) { E.Name = Def->getName(); E.Sym = Def; if (Def->getChunk() && - !(Def->getChunk()->getPermissions() & IMAGE_SCN_MEM_EXECUTE)) + !(Def->getChunk()->getOutputCharacteristics() & IMAGE_SCN_MEM_EXECUTE)) E.Data = true; Config->Exports.push_back(E); }); @@ -1318,6 +1565,12 @@ void LinkerDriver::link(ArrayRef<const char *> ArgsArr) { if (Config->Manifest == Configuration::SideBySide) createSideBySideManifest(); + // Handle /order. We want to do this at this moment because we + // need a complete list of comdat sections to warn on nonexistent + // functions. + if (auto *Arg = Args.getLastArg(OPT_order)) + parseOrderFile(Arg->getValue()); + // Identify unreferenced COMDAT sections. if (Config->DoGC) markLive(Symtab->getChunks()); @@ -1328,6 +1581,11 @@ void LinkerDriver::link(ArrayRef<const char *> ArgsArr) { // Write the result. writeResult(); + + // Stop early so we can print the results. + Timer::root().stop(); + if (Config->ShowTiming) + Timer::root().print(); } } // namespace coff diff --git a/COFF/Driver.h b/COFF/Driver.h index 3f7fad1038f3..627e991a9028 100644 --- a/COFF/Driver.h +++ b/COFF/Driver.h @@ -21,6 +21,7 @@ #include "llvm/Object/COFF.h" #include "llvm/Option/Arg.h" #include "llvm/Option/ArgList.h" +#include "llvm/Support/FileSystem.h" #include "llvm/Support/TarWriter.h" #include <memory> #include <set> @@ -36,12 +37,6 @@ using llvm::COFF::MachineTypes; using llvm::COFF::WindowsSubsystem; using llvm::Optional; -// Implemented in MarkLive.cpp. -void markLive(ArrayRef<Chunk *> Chunks); - -// Implemented in ICF.cpp. -void doICF(ArrayRef<Chunk *> Chunks); - class COFFOptTable : public llvm::opt::OptTable { public: COFFOptTable(); @@ -56,8 +51,10 @@ public: llvm::opt::InputArgList parse(StringRef S) { return parse(tokenize(S)); } // Tokenizes a given string and then parses as command line options in - // .drectve section. - llvm::opt::InputArgList parseDirectives(StringRef S); + // .drectve section. /EXPORT options are returned in second element + // to be processed in fastpath. + std::pair<llvm::opt::InputArgList, std::vector<StringRef>> + parseDirectives(StringRef S); private: // Parses command line options. @@ -98,7 +95,11 @@ private: // Library search path. The first element is always "" (current directory). std::vector<StringRef> SearchPaths; - std::set<std::string> VisitedFiles; + + // We don't want to add the same file more than once. + // Files are uniquified by their filesystem and file number. + std::set<llvm::sys::fs::UniqueID> VisitedFiles; + std::set<std::string> VisitedLibs; Symbol *addUndefined(StringRef Sym); @@ -143,6 +144,8 @@ StringRef machineToStr(MachineTypes MT); // Parses a string in the form of "<integer>[,<integer>]". void parseNumbers(StringRef Arg, uint64_t *Addr, uint64_t *Size = nullptr); +void parseGuard(StringRef Arg); + // Parses a string in the form of "<integer>[.<integer>]". // Minor's default value is 0. void parseVersion(StringRef Arg, uint32_t *Major, uint32_t *Minor); diff --git a/COFF/DriverUtils.cpp b/COFF/DriverUtils.cpp index e0641e04a017..c12e791f9507 100644 --- a/COFF/DriverUtils.cpp +++ b/COFF/DriverUtils.cpp @@ -61,12 +61,7 @@ public: StringRef Exe = Saver.save(*ExeOrErr); Args.insert(Args.begin(), Exe); - std::vector<const char *> Vec; - for (StringRef S : Args) - Vec.push_back(S.data()); - Vec.push_back(nullptr); - - if (sys::ExecuteAndWait(Args[0], Vec.data()) != 0) + if (sys::ExecuteAndWait(Args[0], Args) != 0) fatal("ExecuteAndWait failed: " + llvm::join(Args.begin(), Args.end(), " ")); } @@ -128,6 +123,21 @@ void parseVersion(StringRef Arg, uint32_t *Major, uint32_t *Minor) { fatal("invalid number: " + S2); } +void parseGuard(StringRef FullArg) { + SmallVector<StringRef, 1> SplitArgs; + FullArg.split(SplitArgs, ","); + for (StringRef Arg : SplitArgs) { + if (Arg.equals_lower("no")) + Config->GuardCF = GuardCFLevel::Off; + else if (Arg.equals_lower("nolongjmp")) + Config->GuardCF = GuardCFLevel::NoLongJmp; + else if (Arg.equals_lower("cf") || Arg.equals_lower("longjmp")) + Config->GuardCF = GuardCFLevel::Full; + else + fatal("invalid argument to /guard: " + Arg); + } +} + // Parses a string in the form of "<subsystem>[,<integer>[.<integer>]]". void parseSubsystem(StringRef Arg, WindowsSubsystem *Sys, uint32_t *Major, uint32_t *Minor) { @@ -170,6 +180,10 @@ void parseMerge(StringRef S) { std::tie(From, To) = S.split('='); if (From.empty() || To.empty()) fatal("/merge: invalid argument: " + S); + if (From == ".rsrc" || To == ".rsrc") + fatal("/merge: cannot merge '.rsrc' with any section"); + if (From == ".reloc" || To == ".reloc") + fatal("/merge: cannot merge '.reloc' with any section"); auto Pair = Config->Merge.insert(std::make_pair(From, To)); bool Inserted = Pair.second; if (!Inserted) { @@ -418,15 +432,15 @@ static std::string createManifestXml() { return createManifestXmlWithExternalMt(DefaultXml); } -static std::unique_ptr<MemoryBuffer> +static std::unique_ptr<WritableMemoryBuffer> createMemoryBufferForManifestRes(size_t ManifestSize) { size_t ResSize = alignTo( object::WIN_RES_MAGIC_SIZE + object::WIN_RES_NULL_ENTRY_SIZE + sizeof(object::WinResHeaderPrefix) + sizeof(object::WinResIDs) + sizeof(object::WinResHeaderSuffix) + ManifestSize, object::WIN_RES_DATA_ALIGNMENT); - return MemoryBuffer::getNewMemBuffer(ResSize, - Config->OutputFile + ".manifest.res"); + return WritableMemoryBuffer::getNewMemBuffer(ResSize, Config->OutputFile + + ".manifest.res"); } static void writeResFileHeader(char *&Buf) { @@ -465,16 +479,16 @@ static void writeResEntryHeader(char *&Buf, size_t ManifestSize) { std::unique_ptr<MemoryBuffer> createManifestRes() { std::string Manifest = createManifestXml(); - std::unique_ptr<MemoryBuffer> Res = + std::unique_ptr<WritableMemoryBuffer> Res = createMemoryBufferForManifestRes(Manifest.size()); - char *Buf = const_cast<char *>(Res->getBufferStart()); + char *Buf = Res->getBufferStart(); writeResFileHeader(Buf); writeResEntryHeader(Buf, Manifest.size()); // Copy the manifest data into the .res file. std::copy(Manifest.begin(), Manifest.end(), Buf); - return Res; + return std::move(Res); } void createSideBySideManifest() { @@ -558,9 +572,35 @@ err: static StringRef undecorate(StringRef Sym) { if (Config->Machine != I386) return Sym; + // In MSVC mode, a fully decorated stdcall function is exported + // as-is with the leading underscore (with type IMPORT_NAME). + // In MinGW mode, a decorated stdcall function gets the underscore + // removed, just like normal cdecl functions. + if (Sym.startswith("_") && Sym.contains('@') && !Config->MinGW) + return Sym; return Sym.startswith("_") ? Sym.substr(1) : Sym; } +// Convert stdcall/fastcall style symbols into unsuffixed symbols, +// with or without a leading underscore. (MinGW specific.) +static StringRef killAt(StringRef Sym, bool Prefix) { + if (Sym.empty()) + return Sym; + // Strip any trailing stdcall suffix + Sym = Sym.substr(0, Sym.find('@', 1)); + if (!Sym.startswith("@")) { + if (Prefix && !Sym.startswith("_")) + return Saver.save("_" + Sym); + return Sym; + } + // For fastcall, remove the leading @ and replace it with an + // underscore, if prefixes are used. + Sym = Sym.substr(1); + if (Prefix) + Sym = Saver.save("_" + Sym); + return Sym; +} + // Performs error checking on all /export arguments. // It also sets ordinals. void fixupExports() { @@ -593,6 +633,15 @@ void fixupExports() { } } + if (Config->KillAt && Config->Machine == I386) { + for (Export &E : Config->Exports) { + E.Name = killAt(E.Name, true); + E.ExportName = killAt(E.ExportName, false); + E.ExtName = killAt(E.ExtName, true); + E.SymbolName = killAt(E.SymbolName, true); + } + } + // Uniquefy by name. DenseMap<StringRef, Export *> Map(Config->Exports.size()); std::vector<Export> V; @@ -702,6 +751,28 @@ static const llvm::opt::OptTable::Info InfoTable[] = { COFFOptTable::COFFOptTable() : OptTable(InfoTable, true) {} +// Set color diagnostics according to --color-diagnostics={auto,always,never} +// or --no-color-diagnostics flags. +static void handleColorDiagnostics(opt::InputArgList &Args) { + auto *Arg = Args.getLastArg(OPT_color_diagnostics, OPT_color_diagnostics_eq, + OPT_no_color_diagnostics); + if (!Arg) + return; + if (Arg->getOption().getID() == OPT_color_diagnostics) { + errorHandler().ColorDiagnostics = true; + } else if (Arg->getOption().getID() == OPT_no_color_diagnostics) { + errorHandler().ColorDiagnostics = false; + } else { + StringRef S = Arg->getValue(); + if (S == "always") + errorHandler().ColorDiagnostics = true; + else if (S == "never") + errorHandler().ColorDiagnostics = false; + else if (S != "auto") + error("unknown option: --color-diagnostics=" + S); + } +} + static cl::TokenizerCallback getQuotingStyle(opt::InputArgList &Args) { if (auto *Arg = Args.getLastArg(OPT_rsp_quoting)) { StringRef S = Arg->getValue(); @@ -720,50 +791,73 @@ opt::InputArgList ArgParser::parse(ArrayRef<const char *> Argv) { // Make InputArgList from string vectors. unsigned MissingIndex; unsigned MissingCount; - SmallVector<const char *, 256> Vec(Argv.data(), Argv.data() + Argv.size()); // We need to get the quoting style for response files before parsing all // options so we parse here before and ignore all the options but // --rsp-quoting. - opt::InputArgList Args = Table.ParseArgs(Vec, MissingIndex, MissingCount); + opt::InputArgList Args = Table.ParseArgs(Argv, MissingIndex, MissingCount); // Expand response files (arguments in the form of @<filename>) // and then parse the argument again. - cl::ExpandResponseFiles(Saver, getQuotingStyle(Args), Vec); - Args = Table.ParseArgs(Vec, MissingIndex, MissingCount); + SmallVector<const char *, 256> ExpandedArgv(Argv.data(), Argv.data() + Argv.size()); + cl::ExpandResponseFiles(Saver, getQuotingStyle(Args), ExpandedArgv); + Args = Table.ParseArgs(makeArrayRef(ExpandedArgv).drop_front(), MissingIndex, + MissingCount); // Print the real command line if response files are expanded. - if (Args.hasArg(OPT_verbose) && Argv.size() != Vec.size()) { + if (Args.hasArg(OPT_verbose) && Argv.size() != ExpandedArgv.size()) { std::string Msg = "Command line:"; - for (const char *S : Vec) + for (const char *S : ExpandedArgv) Msg += " " + std::string(S); message(Msg); } + // Save the command line after response file expansion so we can write it to + // the PDB if necessary. + Config->Argv = {ExpandedArgv.begin(), ExpandedArgv.end()}; + // Handle /WX early since it converts missing argument warnings to errors. errorHandler().FatalWarnings = Args.hasFlag(OPT_WX, OPT_WX_no, false); if (MissingCount) fatal(Twine(Args.getArgString(MissingIndex)) + ": missing argument"); + + handleColorDiagnostics(Args); + for (auto *Arg : Args.filtered(OPT_UNKNOWN)) warn("ignoring unknown argument: " + Arg->getSpelling()); + + if (Args.hasArg(OPT_lib)) + warn("ignoring /lib since it's not the first argument"); + return Args; } // Tokenizes and parses a given string as command line in .drective section. -opt::InputArgList ArgParser::parseDirectives(StringRef S) { - // Make InputArgList from string vectors. +// /EXPORT options are processed in fastpath. +std::pair<opt::InputArgList, std::vector<StringRef>> +ArgParser::parseDirectives(StringRef S) { + std::vector<StringRef> Exports; + SmallVector<const char *, 16> Rest; + + for (StringRef Tok : tokenize(S)) { + if (Tok.startswith_lower("/export:") || Tok.startswith_lower("-export:")) + Exports.push_back(Tok.substr(strlen("/export:"))); + else + Rest.push_back(Tok.data()); + } + + // Make InputArgList from unparsed string vectors. unsigned MissingIndex; unsigned MissingCount; - opt::InputArgList Args = - Table.ParseArgs(tokenize(S), MissingIndex, MissingCount); + opt::InputArgList Args = Table.ParseArgs(Rest, MissingIndex, MissingCount); if (MissingCount) fatal(Twine(Args.getArgString(MissingIndex)) + ": missing argument"); for (auto *Arg : Args.filtered(OPT_UNKNOWN)) warn("ignoring unknown argument: " + Arg->getSpelling()); - return Args; + return {std::move(Args), std::move(Exports)}; } // link.exe has an interesting feature. If LINK or _LINK_ environment @@ -773,11 +867,11 @@ opt::InputArgList ArgParser::parseLINK(std::vector<const char *> Argv) { // Concatenate LINK env and command line arguments, and then parse them. if (Optional<std::string> S = Process::GetEnv("LINK")) { std::vector<const char *> V = tokenize(*S); - Argv.insert(Argv.begin(), V.begin(), V.end()); + Argv.insert(std::next(Argv.begin()), V.begin(), V.end()); } if (Optional<std::string> S = Process::GetEnv("_LINK_")) { std::vector<const char *> V = tokenize(*S); - Argv.insert(Argv.begin(), V.begin(), V.end()); + Argv.insert(std::next(Argv.begin()), V.begin(), V.end()); } return parse(Argv); } diff --git a/COFF/ICF.cpp b/COFF/ICF.cpp index 48895c34886c..629720901ab8 100644 --- a/COFF/ICF.cpp +++ b/COFF/ICF.cpp @@ -18,9 +18,11 @@ // //===----------------------------------------------------------------------===// +#include "ICF.h" #include "Chunks.h" #include "Symbols.h" #include "lld/Common/ErrorHandler.h" +#include "lld/Common/Timer.h" #include "llvm/ADT/Hashing.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Parallel.h" @@ -34,6 +36,8 @@ using namespace llvm; namespace lld { namespace coff { +static Timer ICFTimer("ICF", Timer::root()); + class ICF { public: void run(ArrayRef<Chunk *> V); @@ -41,6 +45,8 @@ public: private: void segregate(size_t Begin, size_t End, bool Constant); + bool assocEquals(const SectionChunk *A, const SectionChunk *B); + bool equalsConstant(const SectionChunk *A, const SectionChunk *B); bool equalsVariable(const SectionChunk *A, const SectionChunk *B); @@ -61,8 +67,8 @@ private: // Returns a hash value for S. uint32_t ICF::getHash(SectionChunk *C) { - return hash_combine(C->getPermissions(), C->SectionName, C->NumRelocs, - C->Alignment, uint32_t(C->Header->SizeOfRawData), + return hash_combine(C->getOutputCharacteristics(), C->SectionName, + C->Relocs.size(), uint32_t(C->Header->SizeOfRawData), C->Checksum, C->getContents()); } @@ -73,21 +79,27 @@ uint32_t ICF::getHash(SectionChunk *C) { // 2017) says that /opt:icf folds both functions and read-only data. // Despite that, the MSVC linker folds only functions. We found // a few instances of programs that are not safe for data merging. -// Therefore, we merge only functions just like the MSVC tool. However, we merge -// identical .xdata sections, because the address of unwind information is -// insignificant to the user program and the Visual C++ linker does this. +// Therefore, we merge only functions just like the MSVC tool. However, we also +// merge read-only sections in a couple of cases where the address of the +// section is insignificant to the user program and the behaviour matches that +// of the Visual C++ linker. bool ICF::isEligible(SectionChunk *C) { // Non-comdat chunks, dead chunks, and writable chunks are not elegible. - bool Writable = C->getPermissions() & llvm::COFF::IMAGE_SCN_MEM_WRITE; + bool Writable = C->getOutputCharacteristics() & llvm::COFF::IMAGE_SCN_MEM_WRITE; if (!C->isCOMDAT() || !C->isLive() || Writable) return false; // Code sections are eligible. - if (C->getPermissions() & llvm::COFF::IMAGE_SCN_MEM_EXECUTE) + if (C->getOutputCharacteristics() & llvm::COFF::IMAGE_SCN_MEM_EXECUTE) + return true; + + // .pdata and .xdata unwind info sections are eligible. + StringRef OutSecName = C->getSectionName().split('$').first; + if (OutSecName == ".pdata" || OutSecName == ".xdata") return true; - // .xdata unwind info sections are eligble. - return C->getSectionName().split('$').first == ".xdata"; + // So are vtables. + return C->Sym && C->Sym->getName().startswith("??_7"); } // Split an equivalence class into smaller classes. @@ -116,10 +128,23 @@ void ICF::segregate(size_t Begin, size_t End, bool Constant) { } } +// Returns true if two sections' associative children are equal. +bool ICF::assocEquals(const SectionChunk *A, const SectionChunk *B) { + auto ChildClasses = [&](const SectionChunk *SC) { + std::vector<uint32_t> Classes; + for (const SectionChunk *C : SC->children()) + if (!C->SectionName.startswith(".debug") && + C->SectionName != ".gfids$y" && C->SectionName != ".gljmp$y") + Classes.push_back(C->Class[Cnt % 2]); + return Classes; + }; + return ChildClasses(A) == ChildClasses(B); +} + // Compare "non-moving" part of two sections, namely everything // except relocation targets. bool ICF::equalsConstant(const SectionChunk *A, const SectionChunk *B) { - if (A->NumRelocs != B->NumRelocs) + if (A->Relocs.size() != B->Relocs.size()) return false; // Compare relocations. @@ -142,10 +167,11 @@ bool ICF::equalsConstant(const SectionChunk *A, const SectionChunk *B) { return false; // Compare section attributes and contents. - return A->getPermissions() == B->getPermissions() && - A->SectionName == B->SectionName && A->Alignment == B->Alignment && + return A->getOutputCharacteristics() == B->getOutputCharacteristics() && + A->SectionName == B->SectionName && A->Header->SizeOfRawData == B->Header->SizeOfRawData && - A->Checksum == B->Checksum && A->getContents() == B->getContents(); + A->Checksum == B->Checksum && A->getContents() == B->getContents() && + assocEquals(A, B); } // Compare "moving" part of two sections, namely relocation targets. @@ -161,9 +187,12 @@ bool ICF::equalsVariable(const SectionChunk *A, const SectionChunk *B) { return D1->getChunk()->Class[Cnt % 2] == D2->getChunk()->Class[Cnt % 2]; return false; }; - return std::equal(A->Relocs.begin(), A->Relocs.end(), B->Relocs.begin(), Eq); + return std::equal(A->Relocs.begin(), A->Relocs.end(), B->Relocs.begin(), + Eq) && + assocEquals(A, B); } +// Find the first Chunk after Begin that has a different class from Begin. size_t ICF::findBoundary(size_t Begin, size_t End) { for (size_t I = Begin + 1; I < End; ++I) if (Chunks[Begin]->Class[Cnt % 2] != Chunks[I]->Class[Cnt % 2]) @@ -173,11 +202,8 @@ size_t ICF::findBoundary(size_t Begin, size_t End) { void ICF::forEachClassRange(size_t Begin, size_t End, std::function<void(size_t, size_t)> Fn) { - if (Begin > 0) - Begin = findBoundary(Begin - 1, End); - while (Begin < End) { - size_t Mid = findBoundary(Begin, Chunks.size()); + size_t Mid = findBoundary(Begin, End); Fn(Begin, Mid); Begin = Mid; } @@ -193,12 +219,22 @@ void ICF::forEachClass(std::function<void(size_t, size_t)> Fn) { return; } - // Split sections into 256 shards and call Fn in parallel. - size_t NumShards = 256; + // Shard into non-overlapping intervals, and call Fn in parallel. + // The sharding must be completed before any calls to Fn are made + // so that Fn can modify the Chunks in its shard without causing data + // races. + const size_t NumShards = 256; size_t Step = Chunks.size() / NumShards; - for_each_n(parallel::par, size_t(0), NumShards, [&](size_t I) { - size_t End = (I == NumShards - 1) ? Chunks.size() : (I + 1) * Step; - forEachClassRange(I * Step, End, Fn); + size_t Boundaries[NumShards + 1]; + Boundaries[0] = 0; + Boundaries[NumShards] = Chunks.size(); + for_each_n(parallel::par, size_t(1), NumShards, [&](size_t I) { + Boundaries[I] = findBoundary((I - 1) * Step, Chunks.size()); + }); + for_each_n(parallel::par, size_t(1), NumShards + 1, [&](size_t I) { + if (Boundaries[I - 1] < Boundaries[I]) { + forEachClassRange(Boundaries[I - 1], Boundaries[I], Fn); + } }); ++Cnt; } @@ -207,6 +243,8 @@ void ICF::forEachClass(std::function<void(size_t, size_t)> Fn) { // Two sections are considered the same if their section headers, // contents and relocations are all the same. void ICF::run(ArrayRef<Chunk *> Vec) { + ScopedTimer T(ICFTimer); + // Collect only mergeable sections and group by hash value. uint32_t NextId = 1; for (Chunk *C : Vec) { @@ -218,6 +256,12 @@ void ICF::run(ArrayRef<Chunk *> Vec) { } } + // Make sure that ICF doesn't merge sections that are being handled by string + // tail merging. + for (auto &P : MergeChunk::Instances) + for (SectionChunk *SC : P.second->Sections) + SC->Class[0] = NextId++; + // Initially, we use hash values to partition sections. for_each(parallel::par, Chunks.begin(), Chunks.end(), [&](SectionChunk *SC) { // Set MSB to 1 to avoid collisions with non-hash classs. diff --git a/COFF/Strings.h b/COFF/ICF.h index 67fc1c773c66..9c54e0c9ec2d 100644 --- a/COFF/Strings.h +++ b/COFF/ICF.h @@ -1,4 +1,4 @@ -//===- Strings.h ------------------------------------------------*- C++ -*-===// +//===- ICF.h --------------------------------------------------------------===// // // The LLVM Linker // @@ -7,17 +7,20 @@ // //===----------------------------------------------------------------------===// -#ifndef LLD_COFF_STRINGS_H -#define LLD_COFF_STRINGS_H +#ifndef LLD_COFF_ICF_H +#define LLD_COFF_ICF_H -#include "llvm/ADT/Optional.h" -#include "llvm/ADT/StringRef.h" -#include <string> +#include "lld/Common/LLVM.h" +#include "llvm/ADT/ArrayRef.h" namespace lld { namespace coff { -llvm::Optional<std::string> demangleMSVC(llvm::StringRef S); -} -} + +class Chunk; + +void doICF(ArrayRef<Chunk *> Chunks); + +} // namespace coff +} // namespace lld #endif diff --git a/COFF/InputFiles.cpp b/COFF/InputFiles.cpp index a8f52e0391f7..2b3e65fae04b 100644 --- a/COFF/InputFiles.cpp +++ b/COFF/InputFiles.cpp @@ -27,6 +27,7 @@ #include "llvm/Support/Error.h" #include "llvm/Support/ErrorOr.h" #include "llvm/Support/FileSystem.h" +#include "llvm/Support/Path.h" #include "llvm/Target/TargetOptions.h" #include <cstring> #include <system_error> @@ -138,12 +139,13 @@ void ObjFile::initializeChunks() { if (Sec->Characteristics & IMAGE_SCN_LNK_COMDAT) SparseChunks[I] = PendingComdat; else - SparseChunks[I] = readSection(I, nullptr); + SparseChunks[I] = readSection(I, nullptr, ""); } } SectionChunk *ObjFile::readSection(uint32_t SectionNumber, - const coff_aux_section_definition *Def) { + const coff_aux_section_definition *Def, + StringRef LeaderName) { const coff_section *Sec; StringRef Name; if (auto EC = COFFObj->getSection(SectionNumber, Sec)) @@ -151,15 +153,7 @@ SectionChunk *ObjFile::readSection(uint32_t SectionNumber, if (auto EC = COFFObj->getSectionName(Sec, Name)) fatal("getSectionName failed: #" + Twine(SectionNumber) + ": " + EC.message()); - if (Name == ".sxdata") { - ArrayRef<uint8_t> Data; - COFFObj->getSectionContents(Sec, Data); - if (Data.size() % 4 != 0) - fatal(".sxdata must be an array of symbol table indices"); - SXData = {reinterpret_cast<const ulittle32_t *>(Data.data()), - Data.size() / 4}; - return nullptr; - } + if (Name == ".drectve") { ArrayRef<uint8_t> Data; COFFObj->getSectionContents(Sec, Data); @@ -177,8 +171,8 @@ SectionChunk *ObjFile::readSection(uint32_t SectionNumber, // CodeView needs a linker support. We need to interpret and debug // info, and then write it to a separate .pdb file. - // Ignore debug info unless /debug is given. - if (!Config->Debug && Name.startswith(".debug")) + // Ignore DWARF debug info unless /debug is given. + if (!Config->Debug && Name.startswith(".debug_")) return nullptr; if (Sec->Characteristics & llvm::COFF::IMAGE_SCN_LNK_REMOVE) @@ -191,6 +185,18 @@ SectionChunk *ObjFile::readSection(uint32_t SectionNumber, // linked in the regular manner. if (C->isCodeView()) DebugChunks.push_back(C); + else if (Config->GuardCF != GuardCFLevel::Off && Name == ".gfids$y") + GuardFidChunks.push_back(C); + else if (Config->GuardCF != GuardCFLevel::Off && Name == ".gljmp$y") + GuardLJmpChunks.push_back(C); + else if (Name == ".sxdata") + SXDataChunks.push_back(C); + else if (Config->TailMerge && Sec->NumberOfRelocations == 0 && + Name == ".rdata" && LeaderName.startswith("??_C@")) + // COFF sections that look like string literal sections (i.e. no + // relocations, in .rdata, leader symbol name matches the MSVC name mangling + // for string literals) are subject to string tail merging. + MergeChunk::addSection(C); else Chunks.push_back(C); @@ -211,7 +217,7 @@ void ObjFile::readAssociativeDefinition( // the section; otherwise mark it as discarded. int32_t SectionNumber = Sym.getSectionNumber(); if (Parent) { - SparseChunks[SectionNumber] = readSection(SectionNumber, Def); + SparseChunks[SectionNumber] = readSection(SectionNumber, Def, ""); if (SparseChunks[SectionNumber]) Parent->addAssociative(SparseChunks[SectionNumber]); } else { @@ -275,6 +281,13 @@ void ObjFile::initializeSymbols() { if (auto *Def = Sym.getSectionDefinition()) if (Def->Selection == IMAGE_COMDAT_SELECT_ASSOCIATIVE) readAssociativeDefinition(Sym, Def); + if (SparseChunks[Sym.getSectionNumber()] == PendingComdat) { + StringRef Name; + COFFObj->getSymbolName(Sym, Name); + log("comdat section " + Name + + " without leader and unassociated, discarding"); + continue; + } Symbols[I] = createRegular(Sym); } @@ -294,43 +307,46 @@ Symbol *ObjFile::createUndefined(COFFSymbolRef Sym) { Optional<Symbol *> ObjFile::createDefined( COFFSymbolRef Sym, std::vector<const coff_aux_section_definition *> &ComdatDefs) { - StringRef Name; + auto GetName = [&]() { + StringRef S; + COFFObj->getSymbolName(Sym, S); + return S; + }; + if (Sym.isCommon()) { auto *C = make<CommonChunk>(Sym); Chunks.push_back(C); - COFFObj->getSymbolName(Sym, Name); - Symbol *S = - Symtab->addCommon(this, Name, Sym.getValue(), Sym.getGeneric(), C); - return S; + return Symtab->addCommon(this, GetName(), Sym.getValue(), Sym.getGeneric(), + C); } + if (Sym.isAbsolute()) { - COFFObj->getSymbolName(Sym, Name); + StringRef Name = GetName(); + // Skip special symbols. if (Name == "@comp.id") return nullptr; - // COFF spec 5.10.1. The .sxdata section. if (Name == "@feat.00") { - if (Sym.getValue() & 1) - SEHCompat = true; + Feat00Flags = Sym.getValue(); return nullptr; } + if (Sym.isExternal()) return Symtab->addAbsolute(Name, Sym); - else - return make<DefinedAbsolute>(Name, Sym); + return make<DefinedAbsolute>(Name, Sym); } + int32_t SectionNumber = Sym.getSectionNumber(); if (SectionNumber == llvm::COFF::IMAGE_SYM_DEBUG) return nullptr; - // Reserved sections numbers don't have contents. if (llvm::COFF::isReservedSectionNumber(SectionNumber)) - fatal("broken object file: " + toString(this)); + fatal(toString(this) + ": " + GetName() + + " should not refer to special section " + Twine(SectionNumber)); - // This symbol references a section which is not present in the section - // header. if ((uint32_t)SectionNumber >= SparseChunks.size()) - fatal("broken object file: " + toString(this)); + fatal(toString(this) + ": " + GetName() + + " should not refer to non-existent section " + Twine(SectionNumber)); // Handle comdat leader symbols. if (const coff_aux_section_definition *Def = ComdatDefs[SectionNumber]) { @@ -338,16 +354,16 @@ Optional<Symbol *> ObjFile::createDefined( Symbol *Leader; bool Prevailing; if (Sym.isExternal()) { - COFFObj->getSymbolName(Sym, Name); std::tie(Leader, Prevailing) = - Symtab->addComdat(this, Name, Sym.getGeneric()); + Symtab->addComdat(this, GetName(), Sym.getGeneric()); } else { Leader = make<DefinedRegular>(this, /*Name*/ "", false, /*IsExternal*/ false, Sym.getGeneric()); Prevailing = true; } + if (Prevailing) { - SectionChunk *C = readSection(SectionNumber, Def); + SectionChunk *C = readSection(SectionNumber, Def, GetName()); SparseChunks[SectionNumber] = C; C->Sym = cast<DefinedRegular>(Leader); cast<DefinedRegular>(Leader)->Data = &C->Repl; @@ -429,7 +445,8 @@ void ImportFile::parse() { // address pointed by the __imp_ symbol. (This allows you to call // DLL functions just like regular non-DLL functions.) if (Hdr->getType() == llvm::COFF::IMPORT_CODE) - ThunkSym = Symtab->addImportThunk(Name, ImpSym, Hdr->Machine); + ThunkSym = Symtab->addImportThunk( + Name, cast_or_null<DefinedImportData>(ImpSym), Hdr->Machine); } void BitcodeFile::parse() { @@ -462,7 +479,7 @@ void BitcodeFile::parse() { } else { Sym = Symtab->addRegular(this, SymName); } - SymbolBodies.push_back(Sym); + Symbols.push_back(Sym); } Directives = Obj->getCOFFLinkerOpts(); } @@ -486,10 +503,7 @@ MachineTypes BitcodeFile::getMachineType() { // Returns the last element of a path, which is supposed to be a filename. static StringRef getBasename(StringRef Path) { - size_t Pos = Path.find_last_of("\\/"); - if (Pos == StringRef::npos) - return Path; - return Path.substr(Pos + 1); + return sys::path::filename(Path, sys::path::Style::windows); } // Returns a string in the format of "foo.obj" or "foo.obj(bar.lib)". diff --git a/COFF/InputFiles.h b/COFF/InputFiles.h index adedbc2ad7a8..4ee4b363886f 100644 --- a/COFF/InputFiles.h +++ b/COFF/InputFiles.h @@ -110,6 +110,9 @@ public: MachineTypes getMachineType() override; ArrayRef<Chunk *> getChunks() { return Chunks; } ArrayRef<SectionChunk *> getDebugChunks() { return DebugChunks; } + ArrayRef<SectionChunk *> getSXDataChunks() { return SXDataChunks; } + ArrayRef<SectionChunk *> getGuardFidChunks() { return GuardFidChunks; } + ArrayRef<SectionChunk *> getGuardLJmpChunks() { return GuardLJmpChunks; } ArrayRef<Symbol *> getSymbols() { return Symbols; } // Returns a Symbol object for the SymbolIndex'th symbol in the @@ -123,13 +126,17 @@ public: static std::vector<ObjFile *> Instances; - // True if this object file is compatible with SEH. - // COFF-specific and x86-only. - bool SEHCompat = false; + // Flags in the absolute @feat.00 symbol if it is present. These usually + // indicate if an object was compiled with certain security features enabled + // like stack guard, safeseh, /guard:cf, or other things. + uint32_t Feat00Flags = 0; - // The symbol table indexes of the safe exception handlers. - // COFF-specific and x86-only. - ArrayRef<llvm::support::ulittle32_t> SXData; + // True if this object file is compatible with SEH. COFF-specific and + // x86-only. COFF spec 5.10.1. The .sxdata section. + bool hasSafeSEH() { return Feat00Flags & 0x1; } + + // True if this file was compiled with /guard:cf. + bool hasGuardCF() { return Feat00Flags & 0x800; } // Pointer to the PDB module descriptor builder. Various debug info records // will reference object files by "module index", which is here. Things like @@ -143,7 +150,8 @@ private: SectionChunk * readSection(uint32_t SectionNumber, - const llvm::object::coff_aux_section_definition *Def); + const llvm::object::coff_aux_section_definition *Def, + StringRef LeaderName); void readAssociativeDefinition( COFFSymbolRef COFFSym, @@ -165,6 +173,15 @@ private: // CodeView debug info sections. std::vector<SectionChunk *> DebugChunks; + // Chunks containing symbol table indices of exception handlers. Only used for + // 32-bit x86. + std::vector<SectionChunk *> SXDataChunks; + + // Chunks containing symbol table indices of address taken symbols and longjmp + // targets. These are not linked into the final binary when /guard:cf is set. + std::vector<SectionChunk *> GuardFidChunks; + std::vector<SectionChunk *> GuardLJmpChunks; + // This vector contains the same chunks as Chunks, but they are // indexed such that you can get a SectionChunk by section index. // Nonexistent section indices are filled with null pointers. @@ -184,15 +201,14 @@ private: // for details about the format. class ImportFile : public InputFile { public: - explicit ImportFile(MemoryBufferRef M) - : InputFile(ImportKind, M), Live(!Config->DoGC) {} + explicit ImportFile(MemoryBufferRef M) : InputFile(ImportKind, M) {} static bool classof(const InputFile *F) { return F->kind() == ImportKind; } static std::vector<ImportFile *> Instances; - DefinedImportData *ImpSym = nullptr; - DefinedImportThunk *ThunkSym = nullptr; + Symbol *ImpSym = nullptr; + Symbol *ThunkSym = nullptr; std::string DLLName; private: @@ -204,12 +220,15 @@ public: Chunk *Location = nullptr; // We want to eliminate dllimported symbols if no one actually refers them. - // This "Live" bit is used to keep track of which import library members + // These "Live" bits are used to keep track of which import library members // are actually in use. // // If the Live bit is turned off by MarkLive, Writer will ignore dllimported - // symbols provided by this import library member. - bool Live; + // symbols provided by this import library member. We also track whether the + // imported symbol is used separately from whether the thunk is used in order + // to avoid creating unnecessary thunks. + bool Live = !Config->DoGC; + bool ThunkLive = !Config->DoGC; }; // Used for LTO. @@ -217,7 +236,7 @@ class BitcodeFile : public InputFile { public: explicit BitcodeFile(MemoryBufferRef M) : InputFile(BitcodeKind, M) {} static bool classof(const InputFile *F) { return F->kind() == BitcodeKind; } - ArrayRef<Symbol *> getSymbols() { return SymbolBodies; } + ArrayRef<Symbol *> getSymbols() { return Symbols; } MachineTypes getMachineType() override; static std::vector<BitcodeFile *> Instances; std::unique_ptr<llvm::lto::InputFile> Obj; @@ -225,7 +244,7 @@ public: private: void parse() override; - std::vector<Symbol *> SymbolBodies; + std::vector<Symbol *> Symbols; }; } // namespace coff diff --git a/COFF/LTO.cpp b/COFF/LTO.cpp index fa2a54b61841..93f7ba3f9e4c 100644 --- a/COFF/LTO.cpp +++ b/COFF/LTO.cpp @@ -12,6 +12,7 @@ #include "InputFiles.h" #include "Symbols.h" #include "lld/Common/ErrorHandler.h" +#include "lld/Common/Strings.h" #include "lld/Common/TargetOptionsCommandFlags.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallString.h" @@ -40,47 +41,32 @@ using namespace llvm::object; using namespace lld; using namespace lld::coff; -static void diagnosticHandler(const DiagnosticInfo &DI) { - SmallString<128> ErrStorage; - raw_svector_ostream OS(ErrStorage); - DiagnosticPrinterRawOStream DP(OS); - DI.print(DP); - warn(ErrStorage); -} - -static void checkError(Error E) { - handleAllErrors(std::move(E), - [&](ErrorInfoBase &EIB) { error(EIB.message()); }); -} +static std::unique_ptr<lto::LTO> createLTO() { + lto::Config C; + C.Options = InitTargetOptionsFromCodeGenFlags(); -static void saveBuffer(StringRef Buffer, const Twine &Path) { - std::error_code EC; - raw_fd_ostream OS(Path.str(), EC, sys::fs::OpenFlags::F_None); - if (EC) - error("cannot create " + Path + ": " + EC.message()); - OS << Buffer; -} + // Always emit a section per function/datum with LTO. LLVM LTO should get most + // of the benefit of linker GC, but there are still opportunities for ICF. + C.Options.FunctionSections = true; + C.Options.DataSections = true; -static std::unique_ptr<lto::LTO> createLTO() { - lto::Config Conf; - Conf.Options = InitTargetOptionsFromCodeGenFlags(); // Use static reloc model on 32-bit x86 because it usually results in more // compact code, and because there are also known code generation bugs when // using the PIC model (see PR34306). if (Config->Machine == COFF::IMAGE_FILE_MACHINE_I386) - Conf.RelocModel = Reloc::Static; + C.RelocModel = Reloc::Static; else - Conf.RelocModel = Reloc::PIC_; - Conf.DisableVerify = true; - Conf.DiagHandler = diagnosticHandler; - Conf.OptLevel = Config->LTOOptLevel; + C.RelocModel = Reloc::PIC_; + C.DisableVerify = true; + C.DiagHandler = diagnosticHandler; + C.OptLevel = Config->LTOO; if (Config->SaveTemps) - checkError(Conf.addSaveTemps(std::string(Config->OutputFile) + ".", - /*UseInputModulePath*/ true)); + checkError(C.addSaveTemps(std::string(Config->OutputFile) + ".", + /*UseInputModulePath*/ true)); lto::ThinBackend Backend; - if (Config->LTOJobs != 0) - Backend = lto::createInProcessThinBackend(Config->LTOJobs); - return llvm::make_unique<lto::LTO>(std::move(Conf), Backend, + if (Config->ThinLTOJobs != 0) + Backend = lto::createInProcessThinBackend(Config->ThinLTOJobs); + return llvm::make_unique<lto::LTO>(std::move(C), Backend, Config->LTOPartitions); } @@ -119,7 +105,7 @@ void BitcodeCompiler::add(BitcodeFile &F) { // and return the resulting objects. std::vector<StringRef> BitcodeCompiler::compile() { unsigned MaxTasks = LTOObj->getMaxTasks(); - Buff.resize(MaxTasks); + Buf.resize(MaxTasks); Files.resize(MaxTasks); // The /lldltocache option specifies the path to a directory in which to cache @@ -127,15 +113,15 @@ std::vector<StringRef> BitcodeCompiler::compile() { // specified, configure LTO to use it as the cache directory. lto::NativeObjectCache Cache; if (!Config->LTOCache.empty()) - Cache = check( - lto::localCache(Config->LTOCache, - [&](size_t Task, std::unique_ptr<MemoryBuffer> MB, - StringRef Path) { Files[Task] = std::move(MB); })); + Cache = check(lto::localCache( + Config->LTOCache, [&](size_t Task, std::unique_ptr<MemoryBuffer> MB) { + Files[Task] = std::move(MB); + })); checkError(LTOObj->run( [&](size_t Task) { return llvm::make_unique<lto::NativeObjectStream>( - llvm::make_unique<raw_svector_ostream>(Buff[Task])); + llvm::make_unique<raw_svector_ostream>(Buf[Task])); }, Cache)); @@ -144,15 +130,15 @@ std::vector<StringRef> BitcodeCompiler::compile() { std::vector<StringRef> Ret; for (unsigned I = 0; I != MaxTasks; ++I) { - if (Buff[I].empty()) + if (Buf[I].empty()) continue; if (Config->SaveTemps) { if (I == 0) - saveBuffer(Buff[I], Config->OutputFile + ".lto.obj"); + saveBuffer(Buf[I], Config->OutputFile + ".lto.obj"); else - saveBuffer(Buff[I], Config->OutputFile + Twine(I) + ".lto.obj"); + saveBuffer(Buf[I], Config->OutputFile + Twine(I) + ".lto.obj"); } - Ret.emplace_back(Buff[I].data(), Buff[I].size()); + Ret.emplace_back(Buf[I].data(), Buf[I].size()); } for (std::unique_ptr<MemoryBuffer> &File : Files) diff --git a/COFF/LTO.h b/COFF/LTO.h index a444aa7ac4fe..f00924654780 100644 --- a/COFF/LTO.h +++ b/COFF/LTO.h @@ -48,7 +48,7 @@ public: private: std::unique_ptr<llvm::lto::LTO> LTOObj; - std::vector<SmallString<0>> Buff; + std::vector<SmallString<0>> Buf; std::vector<std::unique_ptr<MemoryBuffer>> Files; }; } diff --git a/COFF/MapFile.cpp b/COFF/MapFile.cpp index 717ed3419ea5..6ca1b6647bd7 100644 --- a/COFF/MapFile.cpp +++ b/COFF/MapFile.cpp @@ -23,7 +23,6 @@ #include "SymbolTable.h" #include "Symbols.h" #include "Writer.h" - #include "lld/Common/ErrorHandler.h" #include "llvm/Support/Parallel.h" #include "llvm/Support/raw_ostream.h" @@ -37,14 +36,15 @@ using namespace lld::coff; typedef DenseMap<const SectionChunk *, SmallVector<DefinedRegular *, 4>> SymbolMapTy; +static const std::string Indent8 = " "; // 8 spaces +static const std::string Indent16 = " "; // 16 spaces + // Print out the first three columns of a line. static void writeHeader(raw_ostream &OS, uint64_t Addr, uint64_t Size, uint64_t Align) { OS << format("%08llx %08llx %5lld ", Addr, Size, Align); } -static std::string indent(int Depth) { return std::string(Depth * 8, ' '); } - // Returns a list of all symbols that we want to print out. static std::vector<DefinedRegular *> getSymbols() { std::vector<DefinedRegular *> V; @@ -79,7 +79,7 @@ getSymbolStrings(ArrayRef<DefinedRegular *> Syms) { for_each_n(parallel::par, (size_t)0, Syms.size(), [&](size_t I) { raw_string_ostream OS(Str[I]); writeHeader(OS, Syms[I]->getRVA(), 0, 0); - OS << indent(2) << toString(*Syms[I]); + OS << Indent16 << toString(*Syms[I]); }); DenseMap<DefinedRegular *, std::string> Ret; @@ -108,7 +108,7 @@ void coff::writeMapFile(ArrayRef<OutputSection *> OutputSections) { // Print out file contents. for (OutputSection *Sec : OutputSections) { writeHeader(OS, Sec->getRVA(), Sec->getVirtualSize(), /*Align=*/PageSize); - OS << Sec->getName() << '\n'; + OS << Sec->Name << '\n'; for (Chunk *C : Sec->getChunks()) { auto *SC = dyn_cast<SectionChunk>(C); @@ -116,7 +116,7 @@ void coff::writeMapFile(ArrayRef<OutputSection *> OutputSections) { continue; writeHeader(OS, SC->getRVA(), SC->getSize(), SC->Alignment); - OS << indent(1) << SC->File->getName() << ":(" << SC->getSectionName() + OS << Indent8 << SC->File->getName() << ":(" << SC->getSectionName() << ")\n"; for (DefinedRegular *Sym : SectionSyms[SC]) OS << SymStr[Sym] << '\n'; diff --git a/COFF/MarkLive.cpp b/COFF/MarkLive.cpp index 01be60d12d82..57ae450a9138 100644 --- a/COFF/MarkLive.cpp +++ b/COFF/MarkLive.cpp @@ -9,16 +9,21 @@ #include "Chunks.h" #include "Symbols.h" +#include "lld/Common/Timer.h" #include "llvm/ADT/STLExtras.h" #include <vector> namespace lld { namespace coff { +static Timer GCTimer("GC", Timer::root()); + // Set live bit on for each reachable chunk. Unmarked (unreachable) // COMDAT chunks will be ignored by Writer, so they will be excluded // from the final output. void markLive(ArrayRef<Chunk *> Chunks) { + ScopedTimer T(GCTimer); + // We build up a worklist of sections which have been marked as live. We only // push into the worklist when we discover an unmarked section, and we mark // as we push, so sections never appear twice in the list. @@ -43,7 +48,7 @@ void markLive(ArrayRef<Chunk *> Chunks) { else if (auto *Sym = dyn_cast<DefinedImportData>(B)) Sym->File->Live = true; else if (auto *Sym = dyn_cast<DefinedImportThunk>(B)) - Sym->WrappedSym->File->Live = true; + Sym->WrappedSym->File->Live = Sym->WrappedSym->File->ThunkLive = true; }; // Add GC root chunks. diff --git a/COFF/MarkLive.h b/COFF/MarkLive.h new file mode 100644 index 000000000000..5b652dd48196 --- /dev/null +++ b/COFF/MarkLive.h @@ -0,0 +1,24 @@ +//===- MarkLive.h -----------------------------------------------*- C++ -*-===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_COFF_MARKLIVE_H +#define LLD_COFF_MARKLIVE_H + +#include "lld/Common/LLVM.h" +#include "llvm/ADT/ArrayRef.h" + +namespace lld { +namespace coff { + +void markLive(ArrayRef<Chunk *> Chunks); + +} // namespace coff +} // namespace lld + +#endif // LLD_COFF_MARKLIVE_H diff --git a/COFF/MinGW.cpp b/COFF/MinGW.cpp index b7a47165640d..2ca00587331f 100644 --- a/COFF/MinGW.cpp +++ b/COFF/MinGW.cpp @@ -138,7 +138,7 @@ void coff::writeDefFile(StringRef Name) { << "@" << E.Ordinal; if (auto *Def = dyn_cast_or_null<Defined>(E.Sym)) { if (Def && Def->getChunk() && - !(Def->getChunk()->getPermissions() & IMAGE_SCN_MEM_EXECUTE)) + !(Def->getChunk()->getOutputCharacteristics() & IMAGE_SCN_MEM_EXECUTE)) OS << " DATA"; } OS << "\n"; diff --git a/COFF/Options.td b/COFF/Options.td index 7d4cdba14f75..871bad8bd655 100644 --- a/COFF/Options.td +++ b/COFF/Options.td @@ -20,6 +20,10 @@ def align : P<"align", "Section alignment">; def aligncomm : P<"aligncomm", "Set common symbol alignment">; def alternatename : P<"alternatename", "Define weak alias">; def base : P<"base", "Base address of the program">; +def color_diagnostics: Flag<["--"], "color-diagnostics">, + HelpText<"Use colors in diagnostics">; +def color_diagnostics_eq: Joined<["--"], "color-diagnostics=">, + HelpText<"Use colors in diagnostics; one of 'always', 'never', 'auto'">; def defaultlib : P<"defaultlib", "Add the library to the list of input files">; def delayload : P<"delayload", "Delay loaded DLL name">; def entry : P<"entry", "Name of entry point symbol">; @@ -28,9 +32,12 @@ def errorlimit : P<"errorlimit", def export : P<"export", "Export a function">; // No help text because /failifmismatch is not intended to be used by the user. def failifmismatch : P<"failifmismatch", "">; +def guard : P<"guard", "Control flow guard">; def heap : P<"heap", "Size of the heap">; def ignore : P<"ignore", "Specify warning codes to ignore">; def implib : P<"implib", "Import library name">; +def lib : F<"lib">, + HelpText<"Act like lib.exe; must be first argument if present">; def libpath : P<"libpath", "Additional library search path">; def linkrepro : P<"linkrepro", "Dump linker invocation and input files for debugging">; def lldltocache : P<"lldltocache", "Path to ThinLTO cached object file directory">; @@ -42,12 +49,18 @@ def merge : P<"merge", "Combine sections">; def mllvm : P<"mllvm", "Options to pass to LLVM">; def nodefaultlib : P<"nodefaultlib", "Remove a default library">; def opt : P<"opt", "Control optimizations">; +def order : P<"order", "Put functions in order">; def out : P<"out", "Path to file to write output">; +def natvis : P<"natvis", "Path to natvis file to embed in the PDB">; +def no_color_diagnostics: F<"no-color-diagnostics">, + HelpText<"Do not use colors in diagnostics">; def pdb : P<"pdb", "PDB file path">; +def pdbaltpath : P<"pdbaltpath", "PDB file path to embed in the image">; def section : P<"section", "Specify section attributes">; def stack : P<"stack", "Size of the stack">; def stub : P<"stub", "Specify DOS stub file">; def subsystem : P<"subsystem", "Specify subsystem">; +def timestamp : P<"timestamp", "Specify the PE header timestamp">; def version : P<"version", "Specify a version number in the PE header">; def wholearchive_file : P<"wholearchive", "Include all object files from this archive">; @@ -72,12 +85,14 @@ def deffile : Joined<["/", "-"], "def:">, HelpText<"Use module-definition file">; def debug : F<"debug">, HelpText<"Embed a symbol table in the image">; +def debug_full : F<"debug:full">, Alias<debug>; def debugtype : P<"debugtype", "Debug Info Options">; def dll : F<"dll">, HelpText<"Create a DLL">; def driver : P<"driver", "Generate a Windows NT Kernel Mode Driver">; def nodefaultlib_all : F<"nodefaultlib">; def noentry : F<"noentry">; def profile : F<"profile">; +def repro : F<"Brepro">, HelpText<"Use a hash of the executable as the PE header timestamp">; def swaprun_cd : F<"swaprun:cd">; def swaprun_net : F<"swaprun:net">; def verbose : F<"verbose">; @@ -102,6 +117,12 @@ defm fixed : B<"fixed", "Disable base relocations", defm highentropyva : B<"highentropyva", "Enable 64-bit ASLR (default on 64-bit)", "Disable 64-bit ASLR">; +defm incremental : B<"incremental", + "Keep original import library if contents are unchanged", + "Overwrite import library even if contents are unchanged">; +defm integritycheck : B<"integritycheck", + "Set FORCE_INTEGRITY bit in PE header", + "No effect (default)">; defm largeaddressaware : B<"largeaddressaware", "Enable large addresses (default on 64-bit)", "Disable large addresses (default on 32-bit)">; @@ -120,10 +141,14 @@ def help_q : Flag<["/?", "-?"], "">, Alias<help>; // LLD extensions def debug_ghash : F<"debug:ghash">; def debug_dwarf : F<"debug:dwarf">; +def debug_symtab : F<"debug:symtab">; def export_all_symbols : F<"export-all-symbols">; +def kill_at : F<"kill-at">; def lldmingw : F<"lldmingw">; def msvclto : F<"msvclto">; def output_def : Joined<["/", "-"], "output-def:">; +def pdb_source_path : P<"pdbsourcepath", + "Base path used to make relative source file path absolute in PDB">; def rsp_quoting : Joined<["--"], "rsp-quoting=">, HelpText<"Quoting style for response files, 'windows' (default) or 'posix'">; def dash_dash_version : Flag<["--"], "version">, @@ -132,6 +157,7 @@ def dash_dash_version : Flag<["--"], "version">, // Flags for debugging def lldmap : F<"lldmap">; def lldmap_file : Joined<["/", "-"], "lldmap:">; +def show_timing : F<"time">; //============================================================================== // The flags below do nothing. They are defined only for link.exe compatibility. @@ -146,8 +172,6 @@ multiclass QB<string name> { def functionpadmin : F<"functionpadmin">; def ignoreidl : F<"ignoreidl">; -def incremental : F<"incremental">; -def no_incremental : F<"incremental:no">; def nologo : F<"nologo">; def throwingnew : F<"throwingnew">; def editandcontinue : F<"editandcontinue">; @@ -157,8 +181,6 @@ def delay : QF<"delay">; def errorreport : QF<"errorreport">; def idlout : QF<"idlout">; def maxilksize : QF<"maxilksize">; -def natvis : QF<"natvis">; -def pdbaltpath : QF<"pdbaltpath">; def tlbid : QF<"tlbid">; def tlbout : QF<"tlbout">; def verbose_all : QF<"verbose">; diff --git a/COFF/PDB.cpp b/COFF/PDB.cpp index 91a9a01db569..766bf3f6b456 100644 --- a/COFF/PDB.cpp +++ b/COFF/PDB.cpp @@ -15,7 +15,7 @@ #include "Symbols.h" #include "Writer.h" #include "lld/Common/ErrorHandler.h" -#include "llvm/DebugInfo/CodeView/CVDebugRecord.h" +#include "lld/Common/Timer.h" #include "llvm/DebugInfo/CodeView/DebugSubsectionRecord.h" #include "llvm/DebugInfo/CodeView/GlobalTypeTableBuilder.h" #include "llvm/DebugInfo/CodeView/LazyRandomTypeCollection.h" @@ -45,8 +45,10 @@ #include "llvm/DebugInfo/PDB/Native/TpiStreamBuilder.h" #include "llvm/DebugInfo/PDB/PDB.h" #include "llvm/Object/COFF.h" +#include "llvm/Object/CVDebugRecord.h" #include "llvm/Support/BinaryByteStream.h" #include "llvm/Support/Endian.h" +#include "llvm/Support/FormatVariadic.h" #include "llvm/Support/JamCRC.h" #include "llvm/Support/Path.h" #include "llvm/Support/ScopedPrinter.h" @@ -61,6 +63,15 @@ using llvm::object::coff_section; static ExitOnError ExitOnErr; +static Timer TotalPdbLinkTimer("PDB Emission (Cumulative)", Timer::root()); + +static Timer AddObjectsTimer("Add Objects", TotalPdbLinkTimer); +static Timer TypeMergingTimer("Type Merging", AddObjectsTimer); +static Timer SymbolMergingTimer("Symbol Merging", AddObjectsTimer); +static Timer GlobalsLayoutTimer("Globals Stream Layout", TotalPdbLinkTimer); +static Timer TpiStreamLayoutTimer("TPI Stream Layout", TotalPdbLinkTimer); +static Timer DiskCommitTimer("Commit to Disk", TotalPdbLinkTimer); + namespace { /// Map from type index and item index in a type server PDB to the /// corresponding index in the destination PDB. @@ -74,11 +85,19 @@ class PDBLinker { public: PDBLinker(SymbolTable *Symtab) : Alloc(), Symtab(Symtab), Builder(Alloc), TypeTable(Alloc), - IDTable(Alloc), GlobalTypeTable(Alloc), GlobalIDTable(Alloc) {} + IDTable(Alloc), GlobalTypeTable(Alloc), GlobalIDTable(Alloc) { + // This isn't strictly necessary, but link.exe usually puts an empty string + // as the first "valid" string in the string table, so we do the same in + // order to maintain as much byte-for-byte compatibility as possible. + PDBStrTab.insert(""); + } /// Emit the basic PDB structure: initial streams, headers, etc. void initialize(const llvm::codeview::DebugInfo &BuildId); + /// Add natvis files specified on the command line. + void addNatvisFiles(); + /// Link CodeView from each object file in the symbol table into the PDB. void addObjectsToPDB(); @@ -96,18 +115,16 @@ public: /// If the object does not use a type server PDB (compiled with /Z7), we merge /// all the type and item records from the .debug$S stream and fill in the /// caller-provided ObjectIndexMap. - const CVIndexMap &mergeDebugT(ObjFile *File, CVIndexMap &ObjectIndexMap); + Expected<const CVIndexMap&> mergeDebugT(ObjFile *File, + CVIndexMap &ObjectIndexMap); - const CVIndexMap &maybeMergeTypeServerPDB(ObjFile *File, - TypeServer2Record &TS); + Expected<const CVIndexMap&> maybeMergeTypeServerPDB(ObjFile *File, + TypeServer2Record &TS); /// Add the section map and section contributions to the PDB. void addSections(ArrayRef<OutputSection *> OutputSections, ArrayRef<uint8_t> SectionTable); - void addSectionContrib(pdb::DbiModuleDescriptorBuilder &LinkerModule, - OutputSection *OS, Chunk *C); - /// Write the PDB to disk. void commit(); @@ -136,10 +153,19 @@ private: llvm::SmallString<128> NativePath; + /// A list of other PDBs which are loaded during the linking process and which + /// we need to keep around since the linking operation may reference pointers + /// inside of these PDBs. + llvm::SmallVector<std::unique_ptr<pdb::NativeSession>, 2> LoadedPDBs; + std::vector<pdb::SecMapEntry> SectionMap; /// Type index mappings of type server PDBs that we've loaded so far. std::map<GUID, CVIndexMap> TypeServerIndexMappings; + + /// List of TypeServer PDBs which cannot be loaded. + /// Cached to prevent repeated load attempts. + std::set<GUID> MissingTypeServerPDBs; }; } @@ -179,8 +205,8 @@ static bool canUseDebugH(ArrayRef<uint8_t> DebugH) { DebugH = DebugH.drop_front(sizeof(object::debug_h_header)); return Header->Magic == COFF::DEBUG_HASHES_SECTION_MAGIC && Header->Version == 0 && - Header->HashAlgorithm == uint16_t(GlobalTypeHashAlg::SHA1) && - (DebugH.size() % 20 == 0); + Header->HashAlgorithm == uint16_t(GlobalTypeHashAlg::SHA1_8) && + (DebugH.size() % 8 == 0); } static Optional<ArrayRef<uint8_t>> getDebugH(ObjFile *File) { @@ -230,8 +256,10 @@ maybeReadTypeServerRecord(CVTypeArray &Types) { return std::move(TS); } -const CVIndexMap &PDBLinker::mergeDebugT(ObjFile *File, - CVIndexMap &ObjectIndexMap) { +Expected<const CVIndexMap&> PDBLinker::mergeDebugT(ObjFile *File, + CVIndexMap &ObjectIndexMap) { + ScopedTimer T(TypeMergingTimer); + ArrayRef<uint8_t> Data = getDebugSection(File, ".debug$T"); if (Data.empty()) return ObjectIndexMap; @@ -304,11 +332,19 @@ tryToLoadPDB(const GUID &GuidFromObj, StringRef TSPath) { return std::move(NS); } -const CVIndexMap &PDBLinker::maybeMergeTypeServerPDB(ObjFile *File, - TypeServer2Record &TS) { - // First, check if we already loaded a PDB with this GUID. Return the type +Expected<const CVIndexMap&> PDBLinker::maybeMergeTypeServerPDB(ObjFile *File, + TypeServer2Record &TS) { + const GUID& TSId = TS.getGuid(); + StringRef TSPath = TS.getName(); + + // First, check if the PDB has previously failed to load. + if (MissingTypeServerPDBs.count(TSId)) + return make_error<pdb::GenericError>( + pdb::generic_error_code::type_server_not_found, TSPath); + + // Second, check if we already loaded a PDB with this GUID. Return the type // index mapping if we have it. - auto Insertion = TypeServerIndexMappings.insert({TS.getGuid(), CVIndexMap()}); + auto Insertion = TypeServerIndexMappings.insert({TSId, CVIndexMap()}); CVIndexMap &IndexMap = Insertion.first->second; if (!Insertion.second) return IndexMap; @@ -319,23 +355,32 @@ const CVIndexMap &PDBLinker::maybeMergeTypeServerPDB(ObjFile *File, // Check for a PDB at: // 1. The given file path // 2. Next to the object file or archive file - auto ExpectedSession = tryToLoadPDB(TS.getGuid(), TS.getName()); + auto ExpectedSession = tryToLoadPDB(TSId, TSPath); if (!ExpectedSession) { consumeError(ExpectedSession.takeError()); StringRef LocalPath = !File->ParentName.empty() ? File->ParentName : File->getName(); SmallString<128> Path = sys::path::parent_path(LocalPath); sys::path::append( - Path, sys::path::filename(TS.getName(), sys::path::Style::windows)); - ExpectedSession = tryToLoadPDB(TS.getGuid(), Path); + Path, sys::path::filename(TSPath, sys::path::Style::windows)); + ExpectedSession = tryToLoadPDB(TSId, Path); + } + if (auto E = ExpectedSession.takeError()) { + TypeServerIndexMappings.erase(TSId); + MissingTypeServerPDBs.emplace(TSId); + return std::move(E); } - if (auto E = ExpectedSession.takeError()) - fatal("Type server PDB was not found: " + toString(std::move(E))); - auto ExpectedTpi = (*ExpectedSession)->getPDBFile().getPDBTpiStream(); + pdb::NativeSession *Session = ExpectedSession->get(); + + // Keep a strong reference to this PDB, so that it's safe to hold pointers + // into the file. + LoadedPDBs.push_back(std::move(*ExpectedSession)); + + auto ExpectedTpi = Session->getPDBFile().getPDBTpiStream(); if (auto E = ExpectedTpi.takeError()) fatal("Type server does not have TPI stream: " + toString(std::move(E))); - auto ExpectedIpi = (*ExpectedSession)->getPDBFile().getPDBIpiStream(); + auto ExpectedIpi = Session->getPDBFile().getPDBIpiStream(); if (auto E = ExpectedIpi.takeError()) fatal("Type server does not have TPI stream: " + toString(std::move(E))); @@ -412,6 +457,38 @@ static void remapTypesInSymbolRecord(ObjFile *File, SymbolKind SymKind, } } +static void +recordStringTableReferenceAtOffset(MutableArrayRef<uint8_t> Contents, + uint32_t Offset, + std::vector<ulittle32_t *> &StrTableRefs) { + Contents = + Contents.drop_front(Offset).take_front(sizeof(support::ulittle32_t)); + ulittle32_t *Index = reinterpret_cast<ulittle32_t *>(Contents.data()); + StrTableRefs.push_back(Index); +} + +static void +recordStringTableReferences(SymbolKind Kind, MutableArrayRef<uint8_t> Contents, + std::vector<ulittle32_t *> &StrTableRefs) { + // For now we only handle S_FILESTATIC, but we may need the same logic for + // S_DEFRANGE and S_DEFRANGE_SUBFIELD. However, I cannot seem to generate any + // PDBs that contain these types of records, so because of the uncertainty + // they are omitted here until we can prove that it's necessary. + switch (Kind) { + case SymbolKind::S_FILESTATIC: + // FileStaticSym::ModFileOffset + recordStringTableReferenceAtOffset(Contents, 4, StrTableRefs); + break; + case SymbolKind::S_DEFRANGE: + case SymbolKind::S_DEFRANGE_SUBFIELD: + log("Not fixing up string table reference in S_DEFRANGE / " + "S_DEFRANGE_SUBFIELD record"); + break; + default: + break; + } +} + static SymbolKind symbolKind(ArrayRef<uint8_t> RecordData) { const RecordPrefix *Prefix = reinterpret_cast<const RecordPrefix *>(RecordData.data()); @@ -628,53 +705,65 @@ static void mergeSymbolRecords(BumpPtrAllocator &Alloc, ObjFile *File, pdb::GSIStreamBuilder &GsiBuilder, const CVIndexMap &IndexMap, TypeCollection &IDTable, + std::vector<ulittle32_t *> &StringTableRefs, BinaryStreamRef SymData) { // FIXME: Improve error recovery by warning and skipping records when // possible. - CVSymbolArray Syms; - BinaryStreamReader Reader(SymData); - ExitOnErr(Reader.readArray(Syms, Reader.getLength())); + ArrayRef<uint8_t> SymsBuffer; + cantFail(SymData.readBytes(0, SymData.getLength(), SymsBuffer)); SmallVector<SymbolScope, 4> Scopes; - for (CVSymbol Sym : Syms) { - // Discover type index references in the record. Skip it if we don't know - // where they are. - SmallVector<TiReference, 32> TypeRefs; - if (!discoverTypeIndicesInSymbol(Sym, TypeRefs)) { - log("ignoring unknown symbol record with kind 0x" + utohexstr(Sym.kind())); - continue; - } - - // Copy the symbol record so we can mutate it. - MutableArrayRef<uint8_t> NewData = copySymbolForPdb(Sym, Alloc); - - // Re-map all the type index references. - MutableArrayRef<uint8_t> Contents = - NewData.drop_front(sizeof(RecordPrefix)); - remapTypesInSymbolRecord(File, Sym.kind(), Contents, IndexMap, TypeRefs); - // An object file may have S_xxx_ID symbols, but these get converted to - // "real" symbols in a PDB. - translateIdSymbols(NewData, IDTable); - - SymbolKind NewKind = symbolKind(NewData); - - // Fill in "Parent" and "End" fields by maintaining a stack of scopes. - CVSymbol NewSym(NewKind, NewData); - if (symbolOpensScope(NewKind)) - scopeStackOpen(Scopes, File->ModuleDBI->getNextSymbolOffset(), NewSym); - else if (symbolEndsScope(NewKind)) - scopeStackClose(Scopes, File->ModuleDBI->getNextSymbolOffset(), File); - - // Add the symbol to the globals stream if necessary. Do this before adding - // the symbol to the module since we may need to get the next symbol offset, - // and writing to the module's symbol stream will update that offset. - if (symbolGoesInGlobalsStream(NewSym)) - addGlobalSymbol(GsiBuilder, *File, NewSym); + auto EC = forEachCodeViewRecord<CVSymbol>( + SymsBuffer, [&](const CVSymbol &Sym) -> llvm::Error { + // Discover type index references in the record. Skip it if we don't + // know where they are. + SmallVector<TiReference, 32> TypeRefs; + if (!discoverTypeIndicesInSymbol(Sym, TypeRefs)) { + log("ignoring unknown symbol record with kind 0x" + + utohexstr(Sym.kind())); + return Error::success(); + } - // Add the symbol to the module. - if (symbolGoesInModuleStream(NewSym)) - File->ModuleDBI->addSymbol(NewSym); - } + // Copy the symbol record so we can mutate it. + MutableArrayRef<uint8_t> NewData = copySymbolForPdb(Sym, Alloc); + + // Re-map all the type index references. + MutableArrayRef<uint8_t> Contents = + NewData.drop_front(sizeof(RecordPrefix)); + remapTypesInSymbolRecord(File, Sym.kind(), Contents, IndexMap, + TypeRefs); + + // An object file may have S_xxx_ID symbols, but these get converted to + // "real" symbols in a PDB. + translateIdSymbols(NewData, IDTable); + + // If this record refers to an offset in the object file's string table, + // add that item to the global PDB string table and re-write the index. + recordStringTableReferences(Sym.kind(), Contents, StringTableRefs); + + SymbolKind NewKind = symbolKind(NewData); + + // Fill in "Parent" and "End" fields by maintaining a stack of scopes. + CVSymbol NewSym(NewKind, NewData); + if (symbolOpensScope(NewKind)) + scopeStackOpen(Scopes, File->ModuleDBI->getNextSymbolOffset(), + NewSym); + else if (symbolEndsScope(NewKind)) + scopeStackClose(Scopes, File->ModuleDBI->getNextSymbolOffset(), File); + + // Add the symbol to the globals stream if necessary. Do this before + // adding the symbol to the module since we may need to get the next + // symbol offset, and writing to the module's symbol stream will update + // that offset. + if (symbolGoesInGlobalsStream(NewSym)) + addGlobalSymbol(GsiBuilder, *File, NewSym); + + // Add the symbol to the module. + if (symbolGoesInModuleStream(NewSym)) + File->ModuleDBI->addSymbol(NewSym); + return Error::success(); + }); + cantFail(std::move(EC)); } // Allocate memory for a .debug$S section and relocate it. @@ -688,6 +777,32 @@ static ArrayRef<uint8_t> relocateDebugChunk(BumpPtrAllocator &Alloc, ".debug$S"); } +static pdb::SectionContrib createSectionContrib(const Chunk *C, uint32_t Modi) { + OutputSection *OS = C->getOutputSection(); + pdb::SectionContrib SC; + memset(&SC, 0, sizeof(SC)); + SC.ISect = OS->SectionIndex; + SC.Off = C->getRVA() - OS->getRVA(); + SC.Size = C->getSize(); + if (auto *SecChunk = dyn_cast<SectionChunk>(C)) { + SC.Characteristics = SecChunk->Header->Characteristics; + SC.Imod = SecChunk->File->ModuleDBI->getModuleIndex(); + ArrayRef<uint8_t> Contents = SecChunk->getContents(); + JamCRC CRC(0); + ArrayRef<char> CharContents = makeArrayRef( + reinterpret_cast<const char *>(Contents.data()), Contents.size()); + CRC.update(CharContents); + SC.DataCrc = CRC.getCRC(); + } else { + SC.Characteristics = OS->Header.Characteristics; + // FIXME: When we start creating DBI for import libraries, use those here. + SC.Imod = Modi; + } + SC.RelocCrc = 0; // FIXME + + return SC; +} + void PDBLinker::addObjFile(ObjFile *File) { // Add a module descriptor for every object file. We need to put an absolute // path to the object into the PDB. If this is a plain object, we make its @@ -702,14 +817,39 @@ void PDBLinker::addObjFile(ObjFile *File) { File->ModuleDBI = &ExitOnErr(Builder.getDbiBuilder().addModuleInfo(Name)); File->ModuleDBI->setObjFileName(Path); + auto Chunks = File->getChunks(); + uint32_t Modi = File->ModuleDBI->getModuleIndex(); + for (Chunk *C : Chunks) { + auto *SecChunk = dyn_cast<SectionChunk>(C); + if (!SecChunk || !SecChunk->isLive()) + continue; + pdb::SectionContrib SC = createSectionContrib(SecChunk, Modi); + File->ModuleDBI->setFirstSectionContrib(SC); + break; + } + // Before we can process symbol substreams from .debug$S, we need to process // type information, file checksums, and the string table. Add type info to // the PDB first, so that we can get the map from object file type and item // indices to PDB type and item indices. CVIndexMap ObjectIndexMap; - const CVIndexMap &IndexMap = mergeDebugT(File, ObjectIndexMap); + auto IndexMapResult = mergeDebugT(File, ObjectIndexMap); + + // If the .debug$T sections fail to merge, assume there is no debug info. + if (!IndexMapResult) { + warn("Type server PDB for " + Name + " is invalid, ignoring debug info. " + + toString(IndexMapResult.takeError())); + return; + } + + const CVIndexMap &IndexMap = *IndexMapResult; + + ScopedTimer T(SymbolMergingTimer); // Now do all live .debug$S sections. + DebugStringTableSubsectionRef CVStrTab; + DebugChecksumsSubsectionRef Checksums; + std::vector<ulittle32_t *> StringTableReferences; for (SectionChunk *DebugChunk : File->getDebugChunks()) { if (!DebugChunk->isLive() || DebugChunk->getSectionName() != ".debug$S") continue; @@ -723,14 +863,17 @@ void PDBLinker::addObjFile(ObjFile *File) { BinaryStreamReader Reader(RelocatedDebugContents, support::little); ExitOnErr(Reader.readArray(Subsections, RelocatedDebugContents.size())); - DebugStringTableSubsectionRef CVStrTab; - DebugChecksumsSubsectionRef Checksums; for (const DebugSubsectionRecord &SS : Subsections) { switch (SS.kind()) { - case DebugSubsectionKind::StringTable: + case DebugSubsectionKind::StringTable: { + assert(!CVStrTab.valid() && + "Encountered multiple string table subsections!"); ExitOnErr(CVStrTab.initialize(SS.getRecordData())); break; + } case DebugSubsectionKind::FileChecksums: + assert(!Checksums.valid() && + "Encountered multiple checksum subsections!"); ExitOnErr(Checksums.initialize(SS.getRecordData())); break; case DebugSubsectionKind::Lines: @@ -741,10 +884,12 @@ void PDBLinker::addObjFile(ObjFile *File) { case DebugSubsectionKind::Symbols: if (Config->DebugGHashes) { mergeSymbolRecords(Alloc, File, Builder.getGsiBuilder(), IndexMap, - GlobalIDTable, SS.getRecordData()); + GlobalIDTable, StringTableReferences, + SS.getRecordData()); } else { mergeSymbolRecords(Alloc, File, Builder.getGsiBuilder(), IndexMap, - IDTable, SS.getRecordData()); + IDTable, StringTableReferences, + SS.getRecordData()); } break; default: @@ -752,25 +897,55 @@ void PDBLinker::addObjFile(ObjFile *File) { break; } } + } + + // We should have seen all debug subsections across the entire object file now + // which means that if a StringTable subsection and Checksums subsection were + // present, now is the time to handle them. + if (!CVStrTab.valid()) { + if (Checksums.valid()) + fatal(".debug$S sections with a checksums subsection must also contain a " + "string table subsection"); + + if (!StringTableReferences.empty()) + warn("No StringTable subsection was encountered, but there are string " + "table references"); + return; + } + + // Rewrite each string table reference based on the value that the string + // assumes in the final PDB. + for (ulittle32_t *Ref : StringTableReferences) { + auto ExpectedString = CVStrTab.getString(*Ref); + if (!ExpectedString) { + warn("Invalid string table reference"); + consumeError(ExpectedString.takeError()); + continue; + } - if (Checksums.valid()) { - // Make a new file checksum table that refers to offsets in the PDB-wide - // string table. Generally the string table subsection appears after the - // checksum table, so we have to do this after looping over all the - // subsections. - if (!CVStrTab.valid()) - fatal(".debug$S sections must have both a string table subsection " - "and a checksum subsection table or neither"); - auto NewChecksums = make_unique<DebugChecksumsSubsection>(PDBStrTab); - for (FileChecksumEntry &FC : Checksums) { - StringRef FileName = ExitOnErr(CVStrTab.getString(FC.FileNameOffset)); - ExitOnErr(Builder.getDbiBuilder().addModuleSourceFile(*File->ModuleDBI, - FileName)); - NewChecksums->addChecksum(FileName, FC.Kind, FC.Checksum); - } - File->ModuleDBI->addDebugSubsection(std::move(NewChecksums)); + *Ref = PDBStrTab.insert(*ExpectedString); + } + + // Make a new file checksum table that refers to offsets in the PDB-wide + // string table. Generally the string table subsection appears after the + // checksum table, so we have to do this after looping over all the + // subsections. + auto NewChecksums = make_unique<DebugChecksumsSubsection>(PDBStrTab); + for (FileChecksumEntry &FC : Checksums) { + SmallString<128> FileName = ExitOnErr(CVStrTab.getString(FC.FileNameOffset)); + if (!sys::path::is_absolute(FileName) && + !Config->PDBSourcePath.empty()) { + SmallString<128> AbsoluteFileName = Config->PDBSourcePath; + sys::path::append(AbsoluteFileName, FileName); + sys::path::native(AbsoluteFileName); + sys::path::remove_dots(AbsoluteFileName, /*remove_dot_dots=*/true); + FileName = std::move(AbsoluteFileName); } + ExitOnErr(Builder.getDbiBuilder().addModuleSourceFile(*File->ModuleDBI, + FileName)); + NewChecksums->addChecksum(FileName, FC.Kind, FC.Checksum); } + File->ModuleDBI->addDebugSubsection(std::move(NewChecksums)); } static PublicSym32 createPublic(Defined *Def) { @@ -793,12 +968,15 @@ static PublicSym32 createPublic(Defined *Def) { // Add all object files to the PDB. Merge .debug$T sections into IpiData and // TpiData. void PDBLinker::addObjectsToPDB() { + ScopedTimer T1(AddObjectsTimer); for (ObjFile *File : ObjFile::Instances) addObjFile(File); Builder.getStringTableBuilder().setStrings(PDBStrTab); + T1.stop(); // Construct TPI and IPI stream contents. + ScopedTimer T2(TpiStreamLayoutTimer); if (Config->DebugGHashes) { addTypeInfo(Builder.getTpiBuilder(), GlobalTypeTable); addTypeInfo(Builder.getIpiBuilder(), GlobalIDTable); @@ -806,7 +984,9 @@ void PDBLinker::addObjectsToPDB() { addTypeInfo(Builder.getTpiBuilder(), TypeTable); addTypeInfo(Builder.getIpiBuilder(), IDTable); } + T2.stop(); + ScopedTimer T3(GlobalsLayoutTimer); // Compute the public and global symbols. auto &GsiBuilder = Builder.getGsiBuilder(); std::vector<PublicSym32> Publics; @@ -828,6 +1008,35 @@ void PDBLinker::addObjectsToPDB() { } } +void PDBLinker::addNatvisFiles() { + for (StringRef File : Config->NatvisFiles) { + ErrorOr<std::unique_ptr<MemoryBuffer>> DataOrErr = + MemoryBuffer::getFile(File); + if (!DataOrErr) { + warn("Cannot open input file: " + File); + continue; + } + Builder.addInjectedSource(File, std::move(*DataOrErr)); + } +} + +static codeview::CPUType toCodeViewMachine(COFF::MachineTypes Machine) { + switch (Machine) { + case COFF::IMAGE_FILE_MACHINE_AMD64: + return codeview::CPUType::X64; + case COFF::IMAGE_FILE_MACHINE_ARM: + return codeview::CPUType::ARM7; + case COFF::IMAGE_FILE_MACHINE_ARM64: + return codeview::CPUType::ARM64; + case COFF::IMAGE_FILE_MACHINE_ARMNT: + return codeview::CPUType::ARMNT; + case COFF::IMAGE_FILE_MACHINE_I386: + return codeview::CPUType::Intel80386; + default: + llvm_unreachable("Unsupported CPU Type"); + } +} + static void addCommonLinkerModuleSymbols(StringRef Path, pdb::DbiModuleDescriptorBuilder &Mod, BumpPtrAllocator &Allocator) { @@ -838,7 +1047,7 @@ static void addCommonLinkerModuleSymbols(StringRef Path, ONS.Name = "* Linker *"; ONS.Signature = 0; - CS.Machine = Config->is64() ? CPUType::X64 : CPUType::Intel80386; + CS.Machine = toCodeViewMachine(Config->Machine); // Interestingly, if we set the string to 0.0.0.0, then when trying to view // local variables WinDbg emits an error that private symbols are not present. // By setting this to a valid MSVC linker version string, local variables are @@ -889,9 +1098,9 @@ static void addLinkerModuleSectionSymbol(pdb::DbiModuleDescriptorBuilder &Mod, BumpPtrAllocator &Allocator) { SectionSym Sym(SymbolRecordKind::SectionSym); Sym.Alignment = 12; // 2^12 = 4KB - Sym.Characteristics = OS.getCharacteristics(); + Sym.Characteristics = OS.Header.Characteristics; Sym.Length = OS.getVirtualSize(); - Sym.Name = OS.getName(); + Sym.Name = OS.Name; Sym.Rva = OS.getRVA(); Sym.SectionNumber = OS.SectionIndex; Mod.addSymbol(codeview::SymbolSerializer::writeOneSymbol( @@ -903,10 +1112,15 @@ void coff::createPDB(SymbolTable *Symtab, ArrayRef<OutputSection *> OutputSections, ArrayRef<uint8_t> SectionTable, const llvm::codeview::DebugInfo &BuildId) { + ScopedTimer T1(TotalPdbLinkTimer); PDBLinker PDB(Symtab); + PDB.initialize(BuildId); PDB.addObjectsToPDB(); PDB.addSections(OutputSections, SectionTable); + PDB.addNatvisFiles(); + + ScopedTimer T2(DiskCommitTimer); PDB.commit(); } @@ -920,44 +1134,22 @@ void PDBLinker::initialize(const llvm::codeview::DebugInfo &BuildId) { // Add an Info stream. auto &InfoBuilder = Builder.getInfoBuilder(); - InfoBuilder.setAge(BuildId.PDB70.Age); - GUID uuid; memcpy(&uuid, &BuildId.PDB70.Signature, sizeof(uuid)); + InfoBuilder.setAge(BuildId.PDB70.Age); InfoBuilder.setGuid(uuid); - InfoBuilder.setSignature(time(nullptr)); InfoBuilder.setVersion(pdb::PdbRaw_ImplVer::PdbImplVC70); // Add an empty DBI stream. pdb::DbiStreamBuilder &DbiBuilder = Builder.getDbiBuilder(); DbiBuilder.setAge(BuildId.PDB70.Age); DbiBuilder.setVersionHeader(pdb::PdbDbiV70); - ExitOnErr(DbiBuilder.addDbgStream(pdb::DbgHeaderType::NewFPO, {})); -} - -void PDBLinker::addSectionContrib(pdb::DbiModuleDescriptorBuilder &LinkerModule, - OutputSection *OS, Chunk *C) { - pdb::SectionContrib SC; - memset(&SC, 0, sizeof(SC)); - SC.ISect = OS->SectionIndex; - SC.Off = C->getRVA() - OS->getRVA(); - SC.Size = C->getSize(); - if (auto *SecChunk = dyn_cast<SectionChunk>(C)) { - SC.Characteristics = SecChunk->Header->Characteristics; - SC.Imod = SecChunk->File->ModuleDBI->getModuleIndex(); - ArrayRef<uint8_t> Contents = SecChunk->getContents(); - JamCRC CRC(0); - ArrayRef<char> CharContents = makeArrayRef( - reinterpret_cast<const char *>(Contents.data()), Contents.size()); - CRC.update(CharContents); - SC.DataCrc = CRC.getCRC(); - } else { - SC.Characteristics = OS->getCharacteristics(); - // FIXME: When we start creating DBI for import libraries, use those here. - SC.Imod = LinkerModule.getModuleIndex(); - } - SC.RelocCrc = 0; // FIXME - Builder.getDbiBuilder().addSectionContrib(SC); + DbiBuilder.setMachineType(Config->Machine); + // Technically we are not link.exe 14.11, but there are known cases where + // debugging tools on Windows expect Microsoft-specific version numbers or + // they fail to work at all. Since we know we produce PDBs that are + // compatible with LINK 14.11, we set that version number here. + DbiBuilder.setBuildNumber(14, 11); } void PDBLinker::addSections(ArrayRef<OutputSection *> OutputSections, @@ -975,8 +1167,11 @@ void PDBLinker::addSections(ArrayRef<OutputSection *> OutputSections, // Add section contributions. They must be ordered by ascending RVA. for (OutputSection *OS : OutputSections) { addLinkerModuleSectionSymbol(LinkerModule, *OS, Alloc); - for (Chunk *C : OS->getChunks()) - addSectionContrib(LinkerModule, OS, C); + for (Chunk *C : OS->getChunks()) { + pdb::SectionContrib SC = + createSectionContrib(C, LinkerModule.getModuleIndex()); + Builder.getDbiBuilder().addSectionContrib(SC); + } } // Add Section Map stream. @@ -995,3 +1190,145 @@ void PDBLinker::commit() { // Write to a file. ExitOnErr(Builder.commit(Config->PDBPath)); } + +static Expected<StringRef> +getFileName(const DebugStringTableSubsectionRef &Strings, + const DebugChecksumsSubsectionRef &Checksums, uint32_t FileID) { + auto Iter = Checksums.getArray().at(FileID); + if (Iter == Checksums.getArray().end()) + return make_error<CodeViewError>(cv_error_code::no_records); + uint32_t Offset = Iter->FileNameOffset; + return Strings.getString(Offset); +} + +static uint32_t getSecrelReloc() { + switch (Config->Machine) { + case AMD64: + return COFF::IMAGE_REL_AMD64_SECREL; + case I386: + return COFF::IMAGE_REL_I386_SECREL; + case ARMNT: + return COFF::IMAGE_REL_ARM_SECREL; + case ARM64: + return COFF::IMAGE_REL_ARM64_SECREL; + default: + llvm_unreachable("unknown machine type"); + } +} + +// Try to find a line table for the given offset Addr into the given chunk C. +// If a line table was found, the line table, the string and checksum tables +// that are used to interpret the line table, and the offset of Addr in the line +// table are stored in the output arguments. Returns whether a line table was +// found. +static bool findLineTable(const SectionChunk *C, uint32_t Addr, + DebugStringTableSubsectionRef &CVStrTab, + DebugChecksumsSubsectionRef &Checksums, + DebugLinesSubsectionRef &Lines, + uint32_t &OffsetInLinetable) { + ExitOnError ExitOnErr; + uint32_t SecrelReloc = getSecrelReloc(); + + for (SectionChunk *DbgC : C->File->getDebugChunks()) { + if (DbgC->getSectionName() != ".debug$S") + continue; + + // Build a mapping of SECREL relocations in DbgC that refer to C. + DenseMap<uint32_t, uint32_t> Secrels; + for (const coff_relocation &R : DbgC->Relocs) { + if (R.Type != SecrelReloc) + continue; + + if (auto *S = dyn_cast_or_null<DefinedRegular>( + C->File->getSymbols()[R.SymbolTableIndex])) + if (S->getChunk() == C) + Secrels[R.VirtualAddress] = S->getValue(); + } + + ArrayRef<uint8_t> Contents = + consumeDebugMagic(DbgC->getContents(), ".debug$S"); + DebugSubsectionArray Subsections; + BinaryStreamReader Reader(Contents, support::little); + ExitOnErr(Reader.readArray(Subsections, Contents.size())); + + for (const DebugSubsectionRecord &SS : Subsections) { + switch (SS.kind()) { + case DebugSubsectionKind::StringTable: { + assert(!CVStrTab.valid() && + "Encountered multiple string table subsections!"); + ExitOnErr(CVStrTab.initialize(SS.getRecordData())); + break; + } + case DebugSubsectionKind::FileChecksums: + assert(!Checksums.valid() && + "Encountered multiple checksum subsections!"); + ExitOnErr(Checksums.initialize(SS.getRecordData())); + break; + case DebugSubsectionKind::Lines: { + ArrayRef<uint8_t> Bytes; + auto Ref = SS.getRecordData(); + ExitOnErr(Ref.readLongestContiguousChunk(0, Bytes)); + size_t OffsetInDbgC = Bytes.data() - DbgC->getContents().data(); + + // Check whether this line table refers to C. + auto I = Secrels.find(OffsetInDbgC); + if (I == Secrels.end()) + break; + + // Check whether this line table covers Addr in C. + DebugLinesSubsectionRef LinesTmp; + ExitOnErr(LinesTmp.initialize(BinaryStreamReader(Ref))); + uint32_t OffsetInC = I->second + LinesTmp.header()->RelocOffset; + if (Addr < OffsetInC || Addr >= OffsetInC + LinesTmp.header()->CodeSize) + break; + + assert(!Lines.header() && + "Encountered multiple line tables for function!"); + ExitOnErr(Lines.initialize(BinaryStreamReader(Ref))); + OffsetInLinetable = Addr - OffsetInC; + break; + } + default: + break; + } + + if (CVStrTab.valid() && Checksums.valid() && Lines.header()) + return true; + } + } + + return false; +} + +// Use CodeView line tables to resolve a file and line number for the given +// offset into the given chunk and return them, or {"", 0} if a line table was +// not found. +std::pair<StringRef, uint32_t> coff::getFileLine(const SectionChunk *C, + uint32_t Addr) { + ExitOnError ExitOnErr; + + DebugStringTableSubsectionRef CVStrTab; + DebugChecksumsSubsectionRef Checksums; + DebugLinesSubsectionRef Lines; + uint32_t OffsetInLinetable; + + if (!findLineTable(C, Addr, CVStrTab, Checksums, Lines, OffsetInLinetable)) + return {"", 0}; + + uint32_t NameIndex; + uint32_t LineNumber; + for (LineColumnEntry &Entry : Lines) { + for (const LineNumberEntry &LN : Entry.LineNumbers) { + if (LN.Offset > OffsetInLinetable) { + StringRef Filename = + ExitOnErr(getFileName(CVStrTab, Checksums, NameIndex)); + return {Filename, LineNumber}; + } + LineInfo LI(LN.Flags); + NameIndex = Entry.NameIndex; + LineNumber = LI.getStartLine(); + } + } + StringRef Filename = ExitOnErr(getFileName(CVStrTab, Checksums, NameIndex)); + return {Filename, LineNumber}; +} diff --git a/COFF/PDB.h b/COFF/PDB.h index defd7d236790..a98d129a633b 100644 --- a/COFF/PDB.h +++ b/COFF/PDB.h @@ -22,12 +22,16 @@ union DebugInfo; namespace lld { namespace coff { class OutputSection; +class SectionChunk; class SymbolTable; void createPDB(SymbolTable *Symtab, llvm::ArrayRef<OutputSection *> OutputSections, llvm::ArrayRef<uint8_t> SectionTable, const llvm::codeview::DebugInfo &BuildId); + +std::pair<llvm::StringRef, uint32_t> getFileLine(const SectionChunk *C, + uint32_t Addr); } } diff --git a/COFF/Strings.cpp b/COFF/Strings.cpp deleted file mode 100644 index 89b9c5186fd1..000000000000 --- a/COFF/Strings.cpp +++ /dev/null @@ -1,35 +0,0 @@ -//===- Strings.cpp -------------------------------------------------------===// -// -// The LLVM Linker -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#include "Strings.h" -#include <mutex> - -#if defined(_MSC_VER) -#include <Windows.h> -#include <DbgHelp.h> -#pragma comment(lib, "dbghelp.lib") -#endif - -using namespace lld; -using namespace lld::coff; -using namespace llvm; - -Optional<std::string> coff::demangleMSVC(StringRef S) { -#if defined(_MSC_VER) - // UnDecorateSymbolName is not thread-safe, so we need a mutex. - static std::mutex Mu; - std::lock_guard<std::mutex> Lock(Mu); - - char Buf[4096]; - if (S.startswith("?")) - if (size_t Len = UnDecorateSymbolName(S.str().c_str(), Buf, sizeof(Buf), 0)) - return std::string(Buf, Len); -#endif - return None; -} diff --git a/COFF/SymbolTable.cpp b/COFF/SymbolTable.cpp index df76679535cb..b286d865caaf 100644 --- a/COFF/SymbolTable.cpp +++ b/COFF/SymbolTable.cpp @@ -11,9 +11,11 @@ #include "Config.h" #include "Driver.h" #include "LTO.h" +#include "PDB.h" #include "Symbols.h" #include "lld/Common/ErrorHandler.h" #include "lld/Common/Memory.h" +#include "lld/Common/Timer.h" #include "llvm/IR/LLVMContext.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" @@ -24,6 +26,8 @@ using namespace llvm; namespace lld { namespace coff { +static Timer LTOTimer("LTO", Timer::root()); + SymbolTable *Symtab; void SymbolTable::addFile(InputFile *File) { @@ -34,8 +38,9 @@ void SymbolTable::addFile(InputFile *File) { if (Config->Machine == IMAGE_FILE_MACHINE_UNKNOWN) { Config->Machine = MT; } else if (MT != IMAGE_FILE_MACHINE_UNKNOWN && Config->Machine != MT) { - fatal(toString(File) + ": machine type " + machineToStr(MT) + + error(toString(File) + ": machine type " + machineToStr(MT) + " conflicts with " + machineToStr(Config->Machine)); + return; } if (auto *F = dyn_cast<ObjFile>(File)) { @@ -61,6 +66,66 @@ static void errorOrWarn(const Twine &S) { error(S); } +// Returns the name of the symbol in SC whose value is <= Addr that is closest +// to Addr. This is generally the name of the global variable or function whose +// definition contains Addr. +static StringRef getSymbolName(SectionChunk *SC, uint32_t Addr) { + DefinedRegular *Candidate = nullptr; + + for (Symbol *S : SC->File->getSymbols()) { + auto *D = dyn_cast_or_null<DefinedRegular>(S); + if (!D || D->getChunk() != SC || D->getValue() > Addr || + (Candidate && D->getValue() < Candidate->getValue())) + continue; + + Candidate = D; + } + + if (!Candidate) + return ""; + return Candidate->getName(); +} + +static std::string getSymbolLocations(ObjFile *File, uint32_t SymIndex) { + struct Location { + StringRef SymName; + std::pair<StringRef, uint32_t> FileLine; + }; + std::vector<Location> Locations; + + for (Chunk *C : File->getChunks()) { + auto *SC = dyn_cast<SectionChunk>(C); + if (!SC) + continue; + for (const coff_relocation &R : SC->Relocs) { + if (R.SymbolTableIndex != SymIndex) + continue; + std::pair<StringRef, uint32_t> FileLine = + getFileLine(SC, R.VirtualAddress); + StringRef SymName = getSymbolName(SC, R.VirtualAddress); + if (!FileLine.first.empty() || !SymName.empty()) + Locations.push_back({SymName, FileLine}); + } + } + + if (Locations.empty()) + return "\n>>> referenced by " + toString(File) + "\n"; + + std::string Out; + llvm::raw_string_ostream OS(Out); + for (Location Loc : Locations) { + OS << "\n>>> referenced by "; + if (!Loc.FileLine.first.empty()) + OS << Loc.FileLine.first << ":" << Loc.FileLine.second + << "\n>>> "; + OS << toString(File); + if (!Loc.SymName.empty()) + OS << ":(" << Loc.SymName << ')'; + } + OS << '\n'; + return OS.str(); +} + void SymbolTable::reportRemainingUndefines() { SmallPtrSet<Symbol *, 8> Undefs; DenseMap<Symbol *, Symbol *> LocalImports; @@ -120,20 +185,23 @@ void SymbolTable::reportRemainingUndefines() { if (Config->WarnLocallyDefinedImported) if (Symbol *Imp = LocalImports.lookup(B)) warn("<root>: locally defined symbol imported: " + Imp->getName() + - " (defined in " + toString(Imp->getFile()) + ")"); + " (defined in " + toString(Imp->getFile()) + ") [LNK4217]"); } for (ObjFile *File : ObjFile::Instances) { + size_t SymIndex = (size_t)-1; for (Symbol *Sym : File->getSymbols()) { + ++SymIndex; if (!Sym) continue; if (Undefs.count(Sym)) - errorOrWarn(toString(File) + ": undefined symbol: " + Sym->getName()); + errorOrWarn("undefined symbol: " + Sym->getName() + + getSymbolLocations(File, SymIndex)); if (Config->WarnLocallyDefinedImported) if (Symbol *Imp = LocalImports.lookup(Sym)) warn(toString(File) + ": locally defined symbol imported: " + Imp->getName() + " (defined in " + toString(Imp->getFile()) + - ")"); + ") [LNK4217]"); } } } @@ -142,7 +210,7 @@ std::pair<Symbol *, bool> SymbolTable::insert(StringRef Name) { Symbol *&Sym = SymMap[CachedHashStringRef(Name)]; if (Sym) return {Sym, false}; - Sym = (Symbol *)make<SymbolUnion>(); + Sym = reinterpret_cast<Symbol *>(make<SymbolUnion>()); Sym->IsUsedInRegularObj = false; Sym->PendingArchiveLoad = false; return {Sym, true}; @@ -274,30 +342,29 @@ Symbol *SymbolTable::addCommon(InputFile *F, StringRef N, uint64_t Size, return S; } -DefinedImportData *SymbolTable::addImportData(StringRef N, ImportFile *F) { +Symbol *SymbolTable::addImportData(StringRef N, ImportFile *F) { Symbol *S; bool WasInserted; std::tie(S, WasInserted) = insert(N); S->IsUsedInRegularObj = true; if (WasInserted || isa<Undefined>(S) || isa<Lazy>(S)) { replaceSymbol<DefinedImportData>(S, N, F); - return cast<DefinedImportData>(S); + return S; } reportDuplicate(S, F); return nullptr; } -DefinedImportThunk *SymbolTable::addImportThunk(StringRef Name, - DefinedImportData *ID, - uint16_t Machine) { +Symbol *SymbolTable::addImportThunk(StringRef Name, DefinedImportData *ID, + uint16_t Machine) { Symbol *S; bool WasInserted; std::tie(S, WasInserted) = insert(Name); S->IsUsedInRegularObj = true; if (WasInserted || isa<Undefined>(S) || isa<Lazy>(S)) { replaceSymbol<DefinedImportThunk>(S, Name, ID, Machine); - return cast<DefinedImportThunk>(S); + return S; } reportDuplicate(S, ID->File); @@ -314,10 +381,7 @@ std::vector<Chunk *> SymbolTable::getChunks() { } Symbol *SymbolTable::find(StringRef Name) { - auto It = SymMap.find(CachedHashStringRef(Name)); - if (It == SymMap.end()) - return nullptr; - return It->second; + return SymMap.lookup(CachedHashStringRef(Name)); } Symbol *SymbolTable::findUnderscore(StringRef Name) { @@ -384,6 +448,8 @@ std::vector<StringRef> SymbolTable::compileBitcodeFiles() { void SymbolTable::addCombinedLTOObjects() { if (BitcodeFile::Instances.empty()) return; + + ScopedTimer T(LTOTimer); for (StringRef Object : compileBitcodeFiles()) { auto *Obj = make<ObjFile>(MemoryBufferRef(Object, "lto.tmp")); Obj->parse(); diff --git a/COFF/SymbolTable.h b/COFF/SymbolTable.h index 55481e6475bb..30cb1a5410c3 100644 --- a/COFF/SymbolTable.h +++ b/COFF/SymbolTable.h @@ -92,9 +92,9 @@ public: Symbol *addCommon(InputFile *F, StringRef N, uint64_t Size, const llvm::object::coff_symbol_generic *S = nullptr, CommonChunk *C = nullptr); - DefinedImportData *addImportData(StringRef N, ImportFile *F); - DefinedImportThunk *addImportThunk(StringRef Name, DefinedImportData *S, - uint16_t Machine); + Symbol *addImportData(StringRef N, ImportFile *F); + Symbol *addImportThunk(StringRef Name, DefinedImportData *S, + uint16_t Machine); void reportDuplicate(Symbol *Existing, InputFile *NewFile); diff --git a/COFF/Symbols.cpp b/COFF/Symbols.cpp index 4c5ab48c7565..7c8b7d5e8fc5 100644 --- a/COFF/Symbols.cpp +++ b/COFF/Symbols.cpp @@ -9,9 +9,9 @@ #include "Symbols.h" #include "InputFiles.h" -#include "Strings.h" #include "lld/Common/ErrorHandler.h" #include "lld/Common/Memory.h" +#include "lld/Common/Strings.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" @@ -21,7 +21,7 @@ using namespace llvm::object; // Returns a symbol name for an error message. std::string lld::toString(coff::Symbol &B) { - if (Optional<std::string> S = coff::demangleMSVC(B.getName())) + if (Optional<std::string> S = lld::demangleMSVC(B.getName())) return ("\"" + *S + "\" (" + B.getName() + ")").str(); return B.getName(); } @@ -58,7 +58,7 @@ bool Symbol::isLive() const { if (auto *Imp = dyn_cast<DefinedImportData>(this)) return Imp->File->Live; if (auto *Imp = dyn_cast<DefinedImportThunk>(this)) - return Imp->WrappedSym->File->Live; + return Imp->WrappedSym->File->ThunkLive; // Assume any other kind of symbol is live. return true; } @@ -71,7 +71,7 @@ COFFSymbolRef DefinedCOFF::getCOFFSymbol() { return COFFSymbolRef(reinterpret_cast<const coff_symbol32 *>(Sym)); } -uint16_t DefinedAbsolute::OutputSectionIndex = 0; +uint16_t DefinedAbsolute::NumOutputSections; static Chunk *makeImportThunk(DefinedImportData *S, uint16_t Machine) { if (Machine == AMD64) diff --git a/COFF/Symbols.h b/COFF/Symbols.h index d8a030705e27..783965adbd9a 100644 --- a/COFF/Symbols.h +++ b/COFF/Symbols.h @@ -213,11 +213,10 @@ public: uint64_t getRVA() { return VA - Config->ImageBase; } void setVA(uint64_t V) { VA = V; } - // The sentinel absolute symbol section index. Section index relocations - // against absolute symbols resolve to this 16 bit number, and it is the - // largest valid section index plus one. This is written by the Writer. - static uint16_t OutputSectionIndex; - uint16_t getSecIdx() { return OutputSectionIndex; } + // Section index relocations against absolute symbols resolve to + // this 16 bit number, and it is the largest valid section index + // plus one. This variable keeps it. + static uint16_t NumOutputSections; private: uint64_t VA; @@ -416,6 +415,8 @@ union SymbolUnion { template <typename T, typename... ArgT> void replaceSymbol(Symbol *S, ArgT &&... Arg) { + static_assert(std::is_trivially_destructible<T>(), + "Symbol types must be trivially destructible"); static_assert(sizeof(T) <= sizeof(SymbolUnion), "Symbol too small"); static_assert(alignof(T) <= alignof(SymbolUnion), "SymbolUnion not aligned enough"); diff --git a/COFF/Writer.cpp b/COFF/Writer.cpp index 584f0621bea3..d17405ec26ab 100644 --- a/COFF/Writer.cpp +++ b/COFF/Writer.cpp @@ -17,6 +17,7 @@ #include "Symbols.h" #include "lld/Common/ErrorHandler.h" #include "lld/Common/Memory.h" +#include "lld/Common/Timer.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringSwitch.h" @@ -25,7 +26,9 @@ #include "llvm/Support/Endian.h" #include "llvm/Support/FileOutputBuffer.h" #include "llvm/Support/Parallel.h" +#include "llvm/Support/Path.h" #include "llvm/Support/RandomNumberGenerator.h" +#include "llvm/Support/xxhash.h" #include <algorithm> #include <cstdio> #include <map> @@ -40,8 +43,40 @@ using namespace llvm::support::endian; using namespace lld; using namespace lld::coff; +/* To re-generate DOSProgram: +$ cat > /tmp/DOSProgram.asm +org 0 + ; Copy cs to ds. + push cs + pop ds + ; Point ds:dx at the $-terminated string. + mov dx, str + ; Int 21/AH=09h: Write string to standard output. + mov ah, 0x9 + int 0x21 + ; Int 21/AH=4Ch: Exit with return code (in AL). + mov ax, 0x4C01 + int 0x21 +str: + db 'This program cannot be run in DOS mode.$' +align 8, db 0 +$ nasm -fbin /tmp/DOSProgram.asm -o /tmp/DOSProgram.bin +$ xxd -i /tmp/DOSProgram.bin +*/ +static unsigned char DOSProgram[] = { + 0x0e, 0x1f, 0xba, 0x0e, 0x00, 0xb4, 0x09, 0xcd, 0x21, 0xb8, 0x01, 0x4c, + 0xcd, 0x21, 0x54, 0x68, 0x69, 0x73, 0x20, 0x70, 0x72, 0x6f, 0x67, 0x72, + 0x61, 0x6d, 0x20, 0x63, 0x61, 0x6e, 0x6e, 0x6f, 0x74, 0x20, 0x62, 0x65, + 0x20, 0x72, 0x75, 0x6e, 0x20, 0x69, 0x6e, 0x20, 0x44, 0x4f, 0x53, 0x20, + 0x6d, 0x6f, 0x64, 0x65, 0x2e, 0x24, 0x00, 0x00 +}; +static_assert(sizeof(DOSProgram) % 8 == 0, + "DOSProgram size must be multiple of 8"); + static const int SectorSize = 512; -static const int DOSStubSize = 64; +static const int DOSStubSize = sizeof(dos_header) + sizeof(DOSProgram); +static_assert(DOSStubSize % 8 == 0, "DOSStub size must be multiple of 8"); + static const int NumberfOfDataDirectory = 16; namespace { @@ -69,24 +104,25 @@ public: uint64_t Offs = OS->getFileOff() + (Record->getRVA() - OS->getRVA()); D->PointerToRawData = Offs; + TimeDateStamps.push_back(&D->TimeDateStamp); ++D; } } + void setTimeDateStamp(uint32_t TimeDateStamp) { + for (support::ulittle32_t *TDS : TimeDateStamps) + *TDS = TimeDateStamp; + } + private: + mutable std::vector<support::ulittle32_t *> TimeDateStamps; const std::vector<Chunk *> &Records; }; class CVDebugRecordChunk : public Chunk { public: - CVDebugRecordChunk() { - PDBAbsPath = Config->PDBPath; - if (!PDBAbsPath.empty()) - llvm::sys::fs::make_absolute(PDBAbsPath); - } - size_t getSize() const override { - return sizeof(codeview::DebugInfo) + PDBAbsPath.size() + 1; + return sizeof(codeview::DebugInfo) + Config->PDBAltPath.size() + 1; } void writeTo(uint8_t *B) const override { @@ -96,12 +132,11 @@ public: // variable sized field (PDB Path) char *P = reinterpret_cast<char *>(B + OutputSectionOff + sizeof(*BuildId)); - if (!PDBAbsPath.empty()) - memcpy(P, PDBAbsPath.data(), PDBAbsPath.size()); - P[PDBAbsPath.size()] = '\0'; + if (!Config->PDBAltPath.empty()) + memcpy(P, Config->PDBAltPath.data(), Config->PDBAltPath.size()); + P[Config->PDBAltPath.size()] = '\0'; } - SmallString<128> PDBAbsPath; mutable codeview::DebugInfo *BuildId = nullptr; }; @@ -116,12 +151,19 @@ private: void createMiscChunks(); void createImportTables(); void createExportTable(); + void mergeSections(); void assignAddresses(); void removeEmptySections(); void createSymbolAndStringTable(); void openFile(StringRef OutputPath); template <typename PEHeaderTy> void writeHeader(); - void createSEHTable(OutputSection *RData); + void createSEHTable(); + void createGuardCFTables(); + void markSymbolsForRVATable(ObjFile *File, + ArrayRef<SectionChunk *> SymIdxChunks, + SymbolRVASet &TableSymbols); + void maybeAddRVATable(SymbolRVASet TableSymbols, StringRef TableSym, + StringRef CountSym); void setSectionPermissions(); void writeSections(); void writeBuildId(); @@ -131,9 +173,8 @@ private: size_t addEntryToStringTable(StringRef Str); OutputSection *findSection(StringRef Name); - OutputSection *createSection(StringRef Name); - void addBaserels(OutputSection *Dest); - void addBaserelBlocks(OutputSection *Dest, std::vector<Baserel> &V); + void addBaserels(); + void addBaserelBlocks(std::vector<Baserel> &V); uint32_t getSizeOfInitializedData(); std::map<StringRef, std::vector<DefinedImportData *>> binImports(); @@ -145,9 +186,9 @@ private: IdataContents Idata; DelayLoadContents DelayIdata; EdataContents Edata; - SEHTableChunk *SEHTable = nullptr; + bool SetNoSEHCharacteristic = false; - Chunk *DebugDirectory = nullptr; + DebugDirectoryChunk *DebugDirectory = nullptr; std::vector<Chunk *> DebugRecords; CVDebugRecordChunk *BuildId = nullptr; Optional<codeview::DebugInfo> PreviousBuildId; @@ -157,50 +198,55 @@ private: uint32_t PointerToSymbolTable = 0; uint64_t SizeOfImage; uint64_t SizeOfHeaders; + + OutputSection *TextSec; + OutputSection *RdataSec; + OutputSection *BuildidSec; + OutputSection *DataSec; + OutputSection *PdataSec; + OutputSection *IdataSec; + OutputSection *EdataSec; + OutputSection *DidatSec; + OutputSection *RsrcSec; + OutputSection *RelocSec; + + // The first and last .pdata sections in the output file. + // + // We need to keep track of the location of .pdata in whichever section it + // gets merged into so that we can sort its contents and emit a correct data + // directory entry for the exception table. This is also the case for some + // other sections (such as .edata) but because the contents of those sections + // are entirely linker-generated we can keep track of their locations using + // the chunks that the linker creates. All .pdata chunks come from input + // files, so we need to keep track of them separately. + Chunk *FirstPdata = nullptr; + Chunk *LastPdata; }; } // anonymous namespace namespace lld { namespace coff { -void writeResult() { Writer().run(); } - -void OutputSection::setRVA(uint64_t RVA) { - Header.VirtualAddress = RVA; - for (Chunk *C : Chunks) - C->setRVA(C->getRVA() + RVA); -} +static Timer CodeLayoutTimer("Code Layout", Timer::root()); +static Timer DiskCommitTimer("Commit Output File", Timer::root()); -void OutputSection::setFileOffset(uint64_t Off) { - // If a section has no actual data (i.e. BSS section), we want to - // set 0 to its PointerToRawData. Otherwise the output is rejected - // by the loader. - if (Header.SizeOfRawData == 0) - return; - Header.PointerToRawData = Off; -} +void writeResult() { Writer().run(); } void OutputSection::addChunk(Chunk *C) { Chunks.push_back(C); C->setOutputSection(this); - uint64_t Off = Header.VirtualSize; - Off = alignTo(Off, C->Alignment); - C->setRVA(Off); - C->OutputSectionOff = Off; - Off += C->getSize(); - if (Off > UINT32_MAX) - error("section larger than 4 GiB: " + Name); - Header.VirtualSize = Off; - if (C->hasData()) - Header.SizeOfRawData = alignTo(Off, SectorSize); } -void OutputSection::addPermissions(uint32_t C) { - Header.Characteristics |= C & PermMask; +void OutputSection::setPermissions(uint32_t C) { + Header.Characteristics &= ~PermMask; + Header.Characteristics |= C; } -void OutputSection::setPermissions(uint32_t C) { - Header.Characteristics = C & PermMask; +void OutputSection::merge(OutputSection *Other) { + for (Chunk *C : Other->Chunks) + C->setOutputSection(this); + Chunks.insert(Chunks.end(), Other->Chunks.begin(), Other->Chunks.end()); + Other->Chunks.clear(); } // Write the section header to a given buffer. @@ -284,17 +330,22 @@ static Optional<codeview::DebugInfo> loadExistingBuildId(StringRef Path) { // The main function of the writer. void Writer::run() { + ScopedTimer T1(CodeLayoutTimer); + createSections(); createMiscChunks(); createImportTables(); createExportTable(); - if (Config->Relocatable) - createSection(".reloc"); + mergeSections(); assignAddresses(); removeEmptySections(); setSectionPermissions(); createSymbolAndStringTable(); + if (FileSize > UINT32_MAX) + fatal("image size (" + Twine(FileSize) + ") " + + "exceeds maximum allowable size (" + Twine(UINT32_MAX) + ")"); + // We must do this before opening the output file, as it depends on being able // to read the contents of the existing output file. PreviousBuildId = loadExistingBuildId(Config->OutputFile); @@ -308,35 +359,79 @@ void Writer::run() { sortExceptionTable(); writeBuildId(); - if (!Config->PDBPath.empty() && Config->Debug) { + T1.stop(); + if (!Config->PDBPath.empty() && Config->Debug) { assert(BuildId); createPDB(Symtab, OutputSections, SectionTable, *BuildId->BuildId); } writeMapFile(OutputSections); + ScopedTimer T2(DiskCommitTimer); if (auto E = Buffer->commit()) fatal("failed to write the output file: " + toString(std::move(E))); } -static StringRef getOutputSection(StringRef Name) { +static StringRef getOutputSectionName(StringRef Name) { StringRef S = Name.split('$').first; // Treat a later period as a separator for MinGW, for sections like // ".ctors.01234". - S = S.substr(0, S.find('.', 1)); + return S.substr(0, S.find('.', 1)); +} - auto It = Config->Merge.find(S); - if (It == Config->Merge.end()) - return S; - return It->second; +// For /order. +static void sortBySectionOrder(std::vector<Chunk *> &Chunks) { + auto GetPriority = [](const Chunk *C) { + if (auto *Sec = dyn_cast<SectionChunk>(C)) + if (Sec->Sym) + return Config->Order.lookup(Sec->Sym->getName()); + return 0; + }; + + std::stable_sort(Chunks.begin(), Chunks.end(), + [=](const Chunk *A, const Chunk *B) { + return GetPriority(A) < GetPriority(B); + }); } // Create output section objects and add them to OutputSections. void Writer::createSections() { - // First, bin chunks by name. - std::map<StringRef, std::vector<Chunk *>> Map; + // First, create the builtin sections. + const uint32_t DATA = IMAGE_SCN_CNT_INITIALIZED_DATA; + const uint32_t BSS = IMAGE_SCN_CNT_UNINITIALIZED_DATA; + const uint32_t CODE = IMAGE_SCN_CNT_CODE; + const uint32_t DISCARDABLE = IMAGE_SCN_MEM_DISCARDABLE; + const uint32_t R = IMAGE_SCN_MEM_READ; + const uint32_t W = IMAGE_SCN_MEM_WRITE; + const uint32_t X = IMAGE_SCN_MEM_EXECUTE; + + SmallDenseMap<std::pair<StringRef, uint32_t>, OutputSection *> Sections; + auto CreateSection = [&](StringRef Name, uint32_t OutChars) { + OutputSection *&Sec = Sections[{Name, OutChars}]; + if (!Sec) { + Sec = make<OutputSection>(Name, OutChars); + OutputSections.push_back(Sec); + } + return Sec; + }; + + // Try to match the section order used by link.exe. + TextSec = CreateSection(".text", CODE | R | X); + CreateSection(".bss", BSS | R | W); + RdataSec = CreateSection(".rdata", DATA | R); + BuildidSec = CreateSection(".buildid", DATA | R); + DataSec = CreateSection(".data", DATA | R | W); + PdataSec = CreateSection(".pdata", DATA | R); + IdataSec = CreateSection(".idata", DATA | R); + EdataSec = CreateSection(".edata", DATA | R); + DidatSec = CreateSection(".didat", DATA | R); + RsrcSec = CreateSection(".rsrc", DATA | R); + RelocSec = CreateSection(".reloc", DATA | DISCARDABLE | R); + + // Then bin chunks by name and output characteristics. + std::map<std::pair<StringRef, uint32_t>, std::vector<Chunk *>> Map; for (Chunk *C : Symtab->getChunks()) { auto *SC = dyn_cast<SectionChunk>(C); if (SC && !SC->isLive()) { @@ -344,42 +439,71 @@ void Writer::createSections() { SC->printDiscardedMessage(); continue; } - Map[C->getSectionName()].push_back(C); + Map[{C->getSectionName(), C->getOutputCharacteristics()}].push_back(C); } + // Process an /order option. + if (!Config->Order.empty()) + for (auto &Pair : Map) + sortBySectionOrder(Pair.second); + // Then create an OutputSection for each section. // '$' and all following characters in input section names are // discarded when determining output section. So, .text$foo // contributes to .text, for example. See PE/COFF spec 3.2. - SmallDenseMap<StringRef, OutputSection *> Sections; for (auto Pair : Map) { - StringRef Name = getOutputSection(Pair.first); - OutputSection *&Sec = Sections[Name]; - if (!Sec) { - Sec = make<OutputSection>(Name); - OutputSections.push_back(Sec); - } + StringRef Name = getOutputSectionName(Pair.first.first); + uint32_t OutChars = Pair.first.second; + + // In link.exe, there is a special case for the I386 target where .CRT + // sections are treated as if they have output characteristics DATA | R if + // their characteristics are DATA | R | W. This implements the same special + // case for all architectures. + if (Name == ".CRT") + OutChars = DATA | R; + + OutputSection *Sec = CreateSection(Name, OutChars); std::vector<Chunk *> &Chunks = Pair.second; - for (Chunk *C : Chunks) { + for (Chunk *C : Chunks) Sec->addChunk(C); - Sec->addPermissions(C->getPermissions()); - } } + + // Finally, move some output sections to the end. + auto SectionOrder = [&](OutputSection *S) { + // Move DISCARDABLE (or non-memory-mapped) sections to the end of file because + // the loader cannot handle holes. Stripping can remove other discardable ones + // than .reloc, which is first of them (created early). + if (S->Header.Characteristics & IMAGE_SCN_MEM_DISCARDABLE) + return 2; + // .rsrc should come at the end of the non-discardable sections because its + // size may change by the Win32 UpdateResources() function, causing + // subsequent sections to move (see https://crbug.com/827082). + if (S == RsrcSec) + return 1; + return 0; + }; + std::stable_sort(OutputSections.begin(), OutputSections.end(), + [&](OutputSection *S, OutputSection *T) { + return SectionOrder(S) < SectionOrder(T); + }); } void Writer::createMiscChunks() { - OutputSection *RData = createSection(".rdata"); + for (auto &P : MergeChunk::Instances) + RdataSec->addChunk(P.second); // Create thunks for locally-dllimported symbols. if (!Symtab->LocalImportChunks.empty()) { for (Chunk *C : Symtab->LocalImportChunks) - RData->addChunk(C); + RdataSec->addChunk(C); } // Create Debug Information Chunks if (Config->Debug) { DebugDirectory = make<DebugDirectoryChunk>(DebugRecords); + OutputSection *DebugInfoSec = Config->MinGW ? BuildidSec : RdataSec; + // Make a CVDebugRecordChunk even when /DEBUG:CV is not specified. We // output a PDB no matter what, and this chunk provides the only means of // allowing a debugger to match a PDB and an executable. So we need it even @@ -388,12 +512,18 @@ void Writer::createMiscChunks() { BuildId = CVChunk; DebugRecords.push_back(CVChunk); - RData->addChunk(DebugDirectory); + DebugInfoSec->addChunk(DebugDirectory); for (Chunk *C : DebugRecords) - RData->addChunk(C); + DebugInfoSec->addChunk(C); } - createSEHTable(RData); + // Create SEH table. x86-only. + if (Config->Machine == I386) + createSEHTable(); + + // Create /guard:cf tables if requested. + if (Config->GuardCF != GuardCFLevel::Off) + createGuardCFTables(); } // Create .idata section for the DLL-imported symbol table. @@ -414,53 +544,49 @@ void Writer::createImportTables() { std::string DLL = StringRef(File->DLLName).lower(); if (Config->DLLOrder.count(DLL) == 0) Config->DLLOrder[DLL] = Config->DLLOrder.size(); - } - OutputSection *Text = createSection(".text"); - for (ImportFile *File : ImportFile::Instances) { - if (!File->Live) - continue; - - if (DefinedImportThunk *Thunk = File->ThunkSym) - Text->addChunk(Thunk->getChunk()); + if (File->ThunkSym) { + if (!isa<DefinedImportThunk>(File->ThunkSym)) + fatal(toString(*File->ThunkSym) + " was replaced"); + DefinedImportThunk *Thunk = cast<DefinedImportThunk>(File->ThunkSym); + if (File->ThunkLive) + TextSec->addChunk(Thunk->getChunk()); + } + if (File->ImpSym && !isa<DefinedImportData>(File->ImpSym)) + fatal(toString(*File->ImpSym) + " was replaced"); + DefinedImportData *ImpSym = cast_or_null<DefinedImportData>(File->ImpSym); if (Config->DelayLoads.count(StringRef(File->DLLName).lower())) { if (!File->ThunkSym) fatal("cannot delay-load " + toString(File) + - " due to import of data: " + toString(*File->ImpSym)); - DelayIdata.add(File->ImpSym); + " due to import of data: " + toString(*ImpSym)); + DelayIdata.add(ImpSym); } else { - Idata.add(File->ImpSym); + Idata.add(ImpSym); } } - if (!Idata.empty()) { - OutputSection *Sec = createSection(".idata"); + if (!Idata.empty()) for (Chunk *C : Idata.getChunks()) - Sec->addChunk(C); - } + IdataSec->addChunk(C); if (!DelayIdata.empty()) { Defined *Helper = cast<Defined>(Config->DelayLoadHelper); DelayIdata.create(Helper); - OutputSection *Sec = createSection(".didat"); for (Chunk *C : DelayIdata.getChunks()) - Sec->addChunk(C); - Sec = createSection(".data"); + DidatSec->addChunk(C); for (Chunk *C : DelayIdata.getDataChunks()) - Sec->addChunk(C); - Sec = createSection(".text"); + DataSec->addChunk(C); for (Chunk *C : DelayIdata.getCodeChunks()) - Sec->addChunk(C); + TextSec->addChunk(C); } } void Writer::createExportTable() { if (Config->Exports.empty()) return; - OutputSection *Sec = createSection(".edata"); for (Chunk *C : Edata.Chunks) - Sec->addChunk(C); + EdataSec->addChunk(C); } // The Windows loader doesn't seem to like empty sections, @@ -484,19 +610,31 @@ size_t Writer::addEntryToStringTable(StringRef Str) { } Optional<coff_symbol16> Writer::createSymbol(Defined *Def) { - // Relative symbols are unrepresentable in a COFF symbol table. - if (isa<DefinedSynthetic>(Def)) - return None; - - // Don't write dead symbols or symbols in codeview sections to the symbol - // table. - if (!Def->isLive()) + coff_symbol16 Sym; + switch (Def->kind()) { + case Symbol::DefinedAbsoluteKind: + Sym.Value = Def->getRVA(); + Sym.SectionNumber = IMAGE_SYM_ABSOLUTE; + break; + case Symbol::DefinedSyntheticKind: + // Relative symbols are unrepresentable in a COFF symbol table. return None; - if (auto *D = dyn_cast<DefinedRegular>(Def)) - if (D->getChunk()->isCodeView()) + default: { + // Don't write symbols that won't be written to the output to the symbol + // table. + Chunk *C = Def->getChunk(); + if (!C) + return None; + OutputSection *OS = C->getOutputSection(); + if (!OS) return None; - coff_symbol16 Sym; + Sym.Value = Def->getRVA() - OS->getRVA(); + Sym.SectionNumber = OS->SectionIndex; + break; + } + } + StringRef Name = Def->getName(); if (Name.size() > COFF::NameSize) { Sym.Name.Offset.Zeroes = 0; @@ -515,46 +653,27 @@ Optional<coff_symbol16> Writer::createSymbol(Defined *Def) { Sym.StorageClass = IMAGE_SYM_CLASS_EXTERNAL; } Sym.NumberOfAuxSymbols = 0; - - switch (Def->kind()) { - case Symbol::DefinedAbsoluteKind: - Sym.Value = Def->getRVA(); - Sym.SectionNumber = IMAGE_SYM_ABSOLUTE; - break; - default: { - uint64_t RVA = Def->getRVA(); - OutputSection *Sec = nullptr; - for (OutputSection *S : OutputSections) { - if (S->getRVA() > RVA) - break; - Sec = S; - } - Sym.Value = RVA - Sec->getRVA(); - Sym.SectionNumber = Sec->SectionIndex; - break; - } - } return Sym; } void Writer::createSymbolAndStringTable() { - // Name field in the section table is 8 byte long. Longer names need - // to be written to the string table. First, construct string table. + // PE/COFF images are limited to 8 byte section names. Longer names can be + // supported by writing a non-standard string table, but this string table is + // not mapped at runtime and the long names will therefore be inaccessible. + // link.exe always truncates section names to 8 bytes, whereas binutils always + // preserves long section names via the string table. LLD adopts a hybrid + // solution where discardable sections have long names preserved and + // non-discardable sections have their names truncated, to ensure that any + // section which is mapped at runtime also has its name mapped at runtime. for (OutputSection *Sec : OutputSections) { - StringRef Name = Sec->getName(); - if (Name.size() <= COFF::NameSize) + if (Sec->Name.size() <= COFF::NameSize) continue; - // If a section isn't discardable (i.e. will be mapped at runtime), - // prefer a truncated section name over a long section name in - // the string table that is unavailable at runtime. This is different from - // what link.exe does, but finding ".eh_fram" instead of "/4" is useful - // to libunwind. - if ((Sec->getPermissions() & IMAGE_SCN_MEM_DISCARDABLE) == 0) + if ((Sec->Header.Characteristics & IMAGE_SCN_MEM_DISCARDABLE) == 0) continue; - Sec->setStringTableOff(addEntryToStringTable(Name)); + Sec->setStringTableOff(addEntryToStringTable(Sec->Name)); } - if (Config->DebugDwarf) { + if (Config->DebugDwarf || Config->DebugSymtab) { for (ObjFile *File : ObjFile::Instances) { for (Symbol *B : File->getSymbols()) { auto *D = dyn_cast_or_null<Defined>(B); @@ -571,16 +690,45 @@ void Writer::createSymbolAndStringTable() { if (OutputSymtab.empty() && Strtab.empty()) return; - OutputSection *LastSection = OutputSections.back(); // We position the symbol table to be adjacent to the end of the last section. - uint64_t FileOff = LastSection->getFileOff() + - alignTo(LastSection->getRawSize(), SectorSize); + uint64_t FileOff = FileSize; PointerToSymbolTable = FileOff; FileOff += OutputSymtab.size() * sizeof(coff_symbol16); FileOff += 4 + Strtab.size(); FileSize = alignTo(FileOff, SectorSize); } +void Writer::mergeSections() { + if (!PdataSec->getChunks().empty()) { + FirstPdata = PdataSec->getChunks().front(); + LastPdata = PdataSec->getChunks().back(); + } + + for (auto &P : Config->Merge) { + StringRef ToName = P.second; + if (P.first == ToName) + continue; + StringSet<> Names; + while (1) { + if (!Names.insert(ToName).second) + fatal("/merge: cycle found for section '" + P.first + "'"); + auto I = Config->Merge.find(ToName); + if (I == Config->Merge.end()) + break; + ToName = I->second; + } + OutputSection *From = findSection(P.first); + OutputSection *To = findSection(ToName); + if (!From) + continue; + if (!To) { + From->Name = ToName; + continue; + } + To->merge(From); + } +} + // Visits all sections to assign incremental, non-overlapping RVAs and // file offsets. void Writer::assignAddresses() { @@ -590,35 +738,57 @@ void Writer::assignAddresses() { SizeOfHeaders += Config->is64() ? sizeof(pe32plus_header) : sizeof(pe32_header); SizeOfHeaders = alignTo(SizeOfHeaders, SectorSize); - uint64_t RVA = 0x1000; // The first page is kept unmapped. + uint64_t RVA = PageSize; // The first page is kept unmapped. FileSize = SizeOfHeaders; - // Move DISCARDABLE (or non-memory-mapped) sections to the end of file because - // the loader cannot handle holes. - std::stable_partition( - OutputSections.begin(), OutputSections.end(), [](OutputSection *S) { - return (S->getPermissions() & IMAGE_SCN_MEM_DISCARDABLE) == 0; - }); + for (OutputSection *Sec : OutputSections) { - if (Sec->getName() == ".reloc") - addBaserels(Sec); - Sec->setRVA(RVA); - Sec->setFileOffset(FileSize); - RVA += alignTo(Sec->getVirtualSize(), PageSize); - FileSize += alignTo(Sec->getRawSize(), SectorSize); + if (Sec == RelocSec) + addBaserels(); + uint64_t RawSize = 0, VirtualSize = 0; + Sec->Header.VirtualAddress = RVA; + for (Chunk *C : Sec->getChunks()) { + VirtualSize = alignTo(VirtualSize, C->Alignment); + C->setRVA(RVA + VirtualSize); + C->OutputSectionOff = VirtualSize; + C->finalizeContents(); + VirtualSize += C->getSize(); + if (C->hasData()) + RawSize = alignTo(VirtualSize, SectorSize); + } + if (VirtualSize > UINT32_MAX) + error("section larger than 4 GiB: " + Sec->Name); + Sec->Header.VirtualSize = VirtualSize; + Sec->Header.SizeOfRawData = RawSize; + if (RawSize != 0) + Sec->Header.PointerToRawData = FileSize; + RVA += alignTo(VirtualSize, PageSize); + FileSize += alignTo(RawSize, SectorSize); } SizeOfImage = alignTo(RVA, PageSize); } template <typename PEHeaderTy> void Writer::writeHeader() { - // Write DOS stub + // Write DOS header. For backwards compatibility, the first part of a PE/COFF + // executable consists of an MS-DOS MZ executable. If the executable is run + // under DOS, that program gets run (usually to just print an error message). + // When run under Windows, the loader looks at AddressOfNewExeHeader and uses + // the PE header instead. uint8_t *Buf = Buffer->getBufferStart(); auto *DOS = reinterpret_cast<dos_header *>(Buf); - Buf += DOSStubSize; + Buf += sizeof(dos_header); DOS->Magic[0] = 'M'; DOS->Magic[1] = 'Z'; + DOS->UsedBytesInTheLastPage = DOSStubSize % 512; + DOS->FileSizeInPages = divideCeil(DOSStubSize, 512); + DOS->HeaderSizeInParagraphs = sizeof(dos_header) / 16; + DOS->AddressOfRelocationTable = sizeof(dos_header); DOS->AddressOfNewExeHeader = DOSStubSize; + // Write DOS program. + memcpy(Buf, DOSProgram, sizeof(DOSProgram)); + Buf += sizeof(DOSProgram); + // Write PE magic memcpy(Buf, PEMagic, sizeof(PEMagic)); Buf += sizeof(PEMagic); @@ -688,24 +858,27 @@ template <typename PEHeaderTy> void Writer::writeHeader() { PE->DLLCharacteristics |= IMAGE_DLL_CHARACTERISTICS_NX_COMPAT; if (!Config->AllowIsolation) PE->DLLCharacteristics |= IMAGE_DLL_CHARACTERISTICS_NO_ISOLATION; - if (Config->Machine == I386 && !SEHTable && - !Symtab->findUnderscore("_load_config_used")) + if (Config->GuardCF != GuardCFLevel::Off) + PE->DLLCharacteristics |= IMAGE_DLL_CHARACTERISTICS_GUARD_CF; + if (Config->IntegrityCheck) + PE->DLLCharacteristics |= IMAGE_DLL_CHARACTERISTICS_FORCE_INTEGRITY; + if (SetNoSEHCharacteristic) PE->DLLCharacteristics |= IMAGE_DLL_CHARACTERISTICS_NO_SEH; if (Config->TerminalServerAware) PE->DLLCharacteristics |= IMAGE_DLL_CHARACTERISTICS_TERMINAL_SERVER_AWARE; PE->NumberOfRvaAndSize = NumberfOfDataDirectory; - if (OutputSection *Text = findSection(".text")) { - PE->BaseOfCode = Text->getRVA(); - PE->SizeOfCode = Text->getRawSize(); + if (TextSec->getVirtualSize()) { + PE->BaseOfCode = TextSec->getRVA(); + PE->SizeOfCode = TextSec->getRawSize(); } PE->SizeOfInitializedData = getSizeOfInitializedData(); // Write data directory auto *Dir = reinterpret_cast<data_directory *>(Buf); Buf += sizeof(*Dir) * NumberfOfDataDirectory; - if (OutputSection *Sec = findSection(".edata")) { - Dir[EXPORT_TABLE].RelativeVirtualAddress = Sec->getRVA(); - Dir[EXPORT_TABLE].Size = Sec->getVirtualSize(); + if (!Config->Exports.empty()) { + Dir[EXPORT_TABLE].RelativeVirtualAddress = Edata.getRVA(); + Dir[EXPORT_TABLE].Size = Edata.getSize(); } if (!Idata.empty()) { Dir[IMPORT_TABLE].RelativeVirtualAddress = Idata.getDirRVA(); @@ -713,17 +886,18 @@ template <typename PEHeaderTy> void Writer::writeHeader() { Dir[IAT].RelativeVirtualAddress = Idata.getIATRVA(); Dir[IAT].Size = Idata.getIATSize(); } - if (OutputSection *Sec = findSection(".rsrc")) { - Dir[RESOURCE_TABLE].RelativeVirtualAddress = Sec->getRVA(); - Dir[RESOURCE_TABLE].Size = Sec->getVirtualSize(); + if (RsrcSec->getVirtualSize()) { + Dir[RESOURCE_TABLE].RelativeVirtualAddress = RsrcSec->getRVA(); + Dir[RESOURCE_TABLE].Size = RsrcSec->getVirtualSize(); } - if (OutputSection *Sec = findSection(".pdata")) { - Dir[EXCEPTION_TABLE].RelativeVirtualAddress = Sec->getRVA(); - Dir[EXCEPTION_TABLE].Size = Sec->getVirtualSize(); + if (FirstPdata) { + Dir[EXCEPTION_TABLE].RelativeVirtualAddress = FirstPdata->getRVA(); + Dir[EXCEPTION_TABLE].Size = + LastPdata->getRVA() + LastPdata->getSize() - FirstPdata->getRVA(); } - if (OutputSection *Sec = findSection(".reloc")) { - Dir[BASE_RELOCATION_TABLE].RelativeVirtualAddress = Sec->getRVA(); - Dir[BASE_RELOCATION_TABLE].Size = Sec->getVirtualSize(); + if (RelocSec->getVirtualSize()) { + Dir[BASE_RELOCATION_TABLE].RelativeVirtualAddress = RelocSec->getRVA(); + Dir[BASE_RELOCATION_TABLE].Size = RelocSec->getVirtualSize(); } if (Symbol *Sym = Symtab->findUnderscore("_tls_used")) { if (Defined *B = dyn_cast<Defined>(Sym)) { @@ -792,35 +966,172 @@ void Writer::openFile(StringRef Path) { "failed to open " + Path); } -void Writer::createSEHTable(OutputSection *RData) { - // Create SEH table. x86-only. - if (Config->Machine != I386) - return; - - std::set<Defined *> Handlers; +void Writer::createSEHTable() { + // Set the no SEH characteristic on x86 binaries unless we find exception + // handlers. + SetNoSEHCharacteristic = true; + SymbolRVASet Handlers; for (ObjFile *File : ObjFile::Instances) { - if (!File->SEHCompat) + // FIXME: We should error here instead of earlier unless /safeseh:no was + // passed. + if (!File->hasSafeSEH()) return; - for (uint32_t I : File->SXData) - if (Symbol *B = File->getSymbol(I)) - if (B->isLive()) - Handlers.insert(cast<Defined>(B)); + + markSymbolsForRVATable(File, File->getSXDataChunks(), Handlers); + } + + // Remove the "no SEH" characteristic if all object files were built with + // safeseh, we found some exception handlers, and there is a load config in + // the object. + SetNoSEHCharacteristic = + Handlers.empty() || !Symtab->findUnderscore("_load_config_used"); + + maybeAddRVATable(std::move(Handlers), "__safe_se_handler_table", + "__safe_se_handler_count"); +} + +// Add a symbol to an RVA set. Two symbols may have the same RVA, but an RVA set +// cannot contain duplicates. Therefore, the set is uniqued by Chunk and the +// symbol's offset into that Chunk. +static void addSymbolToRVASet(SymbolRVASet &RVASet, Defined *S) { + Chunk *C = S->getChunk(); + if (auto *SC = dyn_cast<SectionChunk>(C)) + C = SC->Repl; // Look through ICF replacement. + uint32_t Off = S->getRVA() - (C ? C->getRVA() : 0); + RVASet.insert({C, Off}); +} + +// Visit all relocations from all section contributions of this object file and +// mark the relocation target as address-taken. +static void markSymbolsWithRelocations(ObjFile *File, + SymbolRVASet &UsedSymbols) { + for (Chunk *C : File->getChunks()) { + // We only care about live section chunks. Common chunks and other chunks + // don't generally contain relocations. + SectionChunk *SC = dyn_cast<SectionChunk>(C); + if (!SC || !SC->isLive()) + continue; + + // Look for relocations in this section against symbols in executable output + // sections. + for (Symbol *Ref : SC->symbols()) { + // FIXME: Do further testing to see if the relocation type matters, + // especially for 32-bit where taking the address of something usually + // uses an absolute relocation instead of a relative one. + if (auto *D = dyn_cast_or_null<Defined>(Ref)) { + Chunk *RefChunk = D->getChunk(); + OutputSection *OS = RefChunk ? RefChunk->getOutputSection() : nullptr; + if (OS && OS->Header.Characteristics & IMAGE_SCN_MEM_EXECUTE) + addSymbolToRVASet(UsedSymbols, D); + } + } + } +} + +// Create the guard function id table. This is a table of RVAs of all +// address-taken functions. It is sorted and uniqued, just like the safe SEH +// table. +void Writer::createGuardCFTables() { + SymbolRVASet AddressTakenSyms; + SymbolRVASet LongJmpTargets; + for (ObjFile *File : ObjFile::Instances) { + // If the object was compiled with /guard:cf, the address taken symbols + // are in .gfids$y sections, and the longjmp targets are in .gljmp$y + // sections. If the object was not compiled with /guard:cf, we assume there + // were no setjmp targets, and that all code symbols with relocations are + // possibly address-taken. + if (File->hasGuardCF()) { + markSymbolsForRVATable(File, File->getGuardFidChunks(), AddressTakenSyms); + markSymbolsForRVATable(File, File->getGuardLJmpChunks(), LongJmpTargets); + } else { + markSymbolsWithRelocations(File, AddressTakenSyms); + } + } + + // Mark the image entry as address-taken. + if (Config->Entry) + addSymbolToRVASet(AddressTakenSyms, cast<Defined>(Config->Entry)); + + // Ensure sections referenced in the gfid table are 16-byte aligned. + for (const ChunkAndOffset &C : AddressTakenSyms) + if (C.InputChunk->Alignment < 16) + C.InputChunk->Alignment = 16; + + maybeAddRVATable(std::move(AddressTakenSyms), "__guard_fids_table", + "__guard_fids_count"); + + // Add the longjmp target table unless the user told us not to. + if (Config->GuardCF == GuardCFLevel::Full) + maybeAddRVATable(std::move(LongJmpTargets), "__guard_longjmp_table", + "__guard_longjmp_count"); + + // Set __guard_flags, which will be used in the load config to indicate that + // /guard:cf was enabled. + uint32_t GuardFlags = uint32_t(coff_guard_flags::CFInstrumented) | + uint32_t(coff_guard_flags::HasFidTable); + if (Config->GuardCF == GuardCFLevel::Full) + GuardFlags |= uint32_t(coff_guard_flags::HasLongJmpTable); + Symbol *FlagSym = Symtab->findUnderscore("__guard_flags"); + cast<DefinedAbsolute>(FlagSym)->setVA(GuardFlags); +} + +// Take a list of input sections containing symbol table indices and add those +// symbols to an RVA table. The challenge is that symbol RVAs are not known and +// depend on the table size, so we can't directly build a set of integers. +void Writer::markSymbolsForRVATable(ObjFile *File, + ArrayRef<SectionChunk *> SymIdxChunks, + SymbolRVASet &TableSymbols) { + for (SectionChunk *C : SymIdxChunks) { + // Skip sections discarded by linker GC. This comes up when a .gfids section + // is associated with something like a vtable and the vtable is discarded. + // In this case, the associated gfids section is discarded, and we don't + // mark the virtual member functions as address-taken by the vtable. + if (!C->isLive()) + continue; + + // Validate that the contents look like symbol table indices. + ArrayRef<uint8_t> Data = C->getContents(); + if (Data.size() % 4 != 0) { + warn("ignoring " + C->getSectionName() + + " symbol table index section in object " + toString(File)); + continue; + } + + // Read each symbol table index and check if that symbol was included in the + // final link. If so, add it to the table symbol set. + ArrayRef<ulittle32_t> SymIndices( + reinterpret_cast<const ulittle32_t *>(Data.data()), Data.size() / 4); + ArrayRef<Symbol *> ObjSymbols = File->getSymbols(); + for (uint32_t SymIndex : SymIndices) { + if (SymIndex >= ObjSymbols.size()) { + warn("ignoring invalid symbol table index in section " + + C->getSectionName() + " in object " + toString(File)); + continue; + } + if (Symbol *S = ObjSymbols[SymIndex]) { + if (S->isLive()) + addSymbolToRVASet(TableSymbols, cast<Defined>(S)); + } + } } +} - if (Handlers.empty()) +// Replace the absolute table symbol with a synthetic symbol pointing to +// TableChunk so that we can emit base relocations for it and resolve section +// relative relocations. +void Writer::maybeAddRVATable(SymbolRVASet TableSymbols, StringRef TableSym, + StringRef CountSym) { + if (TableSymbols.empty()) return; - SEHTable = make<SEHTableChunk>(Handlers); - RData->addChunk(SEHTable); + RVATableChunk *TableChunk = make<RVATableChunk>(std::move(TableSymbols)); + RdataSec->addChunk(TableChunk); - // Replace the absolute table symbol with a synthetic symbol pointing to the - // SEHTable chunk so that we can emit base relocations for it and resolve - // section relative relocations. - Symbol *T = Symtab->find("___safe_se_handler_table"); - Symbol *C = Symtab->find("___safe_se_handler_count"); - replaceSymbol<DefinedSynthetic>(T, T->getName(), SEHTable); - cast<DefinedAbsolute>(C)->setVA(SEHTable->getSize() / 4); + Symbol *T = Symtab->findUnderscore(TableSym); + Symbol *C = Symtab->findUnderscore(CountSym); + replaceSymbol<DefinedSynthetic>(T, T->getName(), TableChunk); + cast<DefinedAbsolute>(C)->setVA(TableChunk->getSize() / 4); } // Handles /section options to allow users to overwrite @@ -829,16 +1140,17 @@ void Writer::setSectionPermissions() { for (auto &P : Config->Section) { StringRef Name = P.first; uint32_t Perm = P.second; - if (auto *Sec = findSection(Name)) - Sec->setPermissions(Perm); + for (OutputSection *Sec : OutputSections) + if (Sec->Name == Name) + Sec->setPermissions(Perm); } } // Write section contents to a mmap'ed file. void Writer::writeSections() { - // Record the section index that should be used when resolving a section - // relocation against an absolute symbol. - DefinedAbsolute::OutputSectionIndex = OutputSections.size() + 1; + // Record the number of sections to apply section index relocations + // against absolute symbols. See applySecIdx in Chunks.cpp.. + DefinedAbsolute::NumOutputSections = OutputSections.size(); uint8_t *Buf = Buffer->getBufferStart(); for (OutputSection *Sec : OutputSections) { @@ -846,7 +1158,7 @@ void Writer::writeSections() { // Fill gaps between functions in .text with INT3 instructions // instead of leaving as NUL bytes (which can be interpreted as // ADD instructions). - if (Sec->getPermissions() & IMAGE_SCN_CNT_CODE) + if (Sec->Header.Characteristics & IMAGE_SCN_CNT_CODE) memset(SecBuf, 0xCC, Sec->getRawSize()); for_each(parallel::par, Sec->getChunks().begin(), Sec->getChunks().end(), [&](Chunk *C) { C->writeTo(SecBuf); }); @@ -854,32 +1166,65 @@ void Writer::writeSections() { } void Writer::writeBuildId() { - // If we're not writing a build id (e.g. because /debug is not specified), - // then just return; - if (!Config->Debug) - return; + // There are two important parts to the build ID. + // 1) If building with debug info, the COFF debug directory contains a + // timestamp as well as a Guid and Age of the PDB. + // 2) In all cases, the PE COFF file header also contains a timestamp. + // For reproducibility, instead of a timestamp we want to use a hash of the + // binary, however when building with debug info the hash needs to take into + // account the debug info, since it's possible to add blank lines to a file + // which causes the debug info to change but not the generated code. + // + // To handle this, we first set the Guid and Age in the debug directory (but + // only if we're doing a debug build). Then, we hash the binary (thus causing + // the hash to change if only the debug info changes, since the Age will be + // different). Finally, we write that hash into the debug directory (if + // present) as well as the COFF file header (always). + if (Config->Debug) { + assert(BuildId && "BuildId is not set!"); + if (PreviousBuildId.hasValue()) { + *BuildId->BuildId = *PreviousBuildId; + BuildId->BuildId->PDB70.Age = BuildId->BuildId->PDB70.Age + 1; + } else { + BuildId->BuildId->Signature.CVSignature = OMF::Signature::PDB70; + BuildId->BuildId->PDB70.Age = 1; + llvm::getRandomBytes(BuildId->BuildId->PDB70.Signature, 16); + } + } - assert(BuildId && "BuildId is not set!"); + // At this point the only fields in the COFF file which remain unset are the + // "timestamp" in the COFF file header, and the ones in the coff debug + // directory. Now we can hash the file and write that hash to the various + // timestamp fields in the file. + StringRef OutputFileData( + reinterpret_cast<const char *>(Buffer->getBufferStart()), + Buffer->getBufferSize()); - if (PreviousBuildId.hasValue()) { - *BuildId->BuildId = *PreviousBuildId; - BuildId->BuildId->PDB70.Age = BuildId->BuildId->PDB70.Age + 1; - return; - } + uint32_t Timestamp = Config->Timestamp; + if (Config->Repro) + Timestamp = static_cast<uint32_t>(xxHash64(OutputFileData)); + + if (DebugDirectory) + DebugDirectory->setTimeDateStamp(Timestamp); - BuildId->BuildId->Signature.CVSignature = OMF::Signature::PDB70; - BuildId->BuildId->PDB70.Age = 1; - llvm::getRandomBytes(BuildId->BuildId->PDB70.Signature, 16); + uint8_t *Buf = Buffer->getBufferStart(); + Buf += DOSStubSize + sizeof(PEMagic); + object::coff_file_header *CoffHeader = + reinterpret_cast<coff_file_header *>(Buf); + CoffHeader->TimeDateStamp = Timestamp; } // Sort .pdata section contents according to PE/COFF spec 5.5. void Writer::sortExceptionTable() { - OutputSection *Sec = findSection(".pdata"); - if (!Sec) + if (!FirstPdata) return; // We assume .pdata contains function table entries only. - uint8_t *Begin = Buffer->getBufferStart() + Sec->getFileOff(); - uint8_t *End = Begin + Sec->getVirtualSize(); + auto BufAddr = [&](Chunk *C) { + return Buffer->getBufferStart() + C->getOutputSection()->getFileOff() + + C->getRVA() - C->getOutputSection()->getRVA(); + }; + uint8_t *Begin = BufAddr(FirstPdata); + uint8_t *End = BufAddr(LastPdata) + LastPdata->getSize(); if (Config->Machine == AMD64) { struct Entry { ulittle32_t Begin, End, Unwind; }; sort(parallel::par, (Entry *)Begin, (Entry *)End, @@ -897,7 +1242,7 @@ void Writer::sortExceptionTable() { OutputSection *Writer::findSection(StringRef Name) { for (OutputSection *Sec : OutputSections) - if (Sec->getName() == Name) + if (Sec->Name == Name) return Sec; return nullptr; } @@ -905,55 +1250,31 @@ OutputSection *Writer::findSection(StringRef Name) { uint32_t Writer::getSizeOfInitializedData() { uint32_t Res = 0; for (OutputSection *S : OutputSections) - if (S->getPermissions() & IMAGE_SCN_CNT_INITIALIZED_DATA) + if (S->Header.Characteristics & IMAGE_SCN_CNT_INITIALIZED_DATA) Res += S->getRawSize(); return Res; } -// Returns an existing section or create a new one if not found. -OutputSection *Writer::createSection(StringRef Name) { - if (auto *Sec = findSection(Name)) - return Sec; - const auto DATA = IMAGE_SCN_CNT_INITIALIZED_DATA; - const auto BSS = IMAGE_SCN_CNT_UNINITIALIZED_DATA; - const auto CODE = IMAGE_SCN_CNT_CODE; - const auto DISCARDABLE = IMAGE_SCN_MEM_DISCARDABLE; - const auto R = IMAGE_SCN_MEM_READ; - const auto W = IMAGE_SCN_MEM_WRITE; - const auto X = IMAGE_SCN_MEM_EXECUTE; - uint32_t Perms = StringSwitch<uint32_t>(Name) - .Case(".bss", BSS | R | W) - .Case(".data", DATA | R | W) - .Cases(".didat", ".edata", ".idata", ".rdata", DATA | R) - .Case(".reloc", DATA | DISCARDABLE | R) - .Case(".text", CODE | R | X) - .Default(0); - if (!Perms) - llvm_unreachable("unknown section name"); - auto Sec = make<OutputSection>(Name); - Sec->addPermissions(Perms); - OutputSections.push_back(Sec); - return Sec; -} - -// Dest is .reloc section. Add contents to that section. -void Writer::addBaserels(OutputSection *Dest) { +// Add base relocations to .reloc section. +void Writer::addBaserels() { + if (!Config->Relocatable) + return; std::vector<Baserel> V; for (OutputSection *Sec : OutputSections) { - if (Sec == Dest) + if (Sec->Header.Characteristics & IMAGE_SCN_MEM_DISCARDABLE) continue; // Collect all locations for base relocations. for (Chunk *C : Sec->getChunks()) C->getBaserels(&V); // Add the addresses to .reloc section. if (!V.empty()) - addBaserelBlocks(Dest, V); + addBaserelBlocks(V); V.clear(); } } // Add addresses to .reloc section. Note that addresses are grouped by page. -void Writer::addBaserelBlocks(OutputSection *Dest, std::vector<Baserel> &V) { +void Writer::addBaserelBlocks(std::vector<Baserel> &V) { const uint32_t Mask = ~uint32_t(PageSize - 1); uint32_t Page = V[0].RVA & Mask; size_t I = 0, J = 1; @@ -961,11 +1282,11 @@ void Writer::addBaserelBlocks(OutputSection *Dest, std::vector<Baserel> &V) { uint32_t P = V[J].RVA & Mask; if (P == Page) continue; - Dest->addChunk(make<BaserelChunk>(Page, &V[I], &V[0] + J)); + RelocSec->addChunk(make<BaserelChunk>(Page, &V[I], &V[0] + J)); I = J; Page = P; } if (I == J) return; - Dest->addChunk(make<BaserelChunk>(Page, &V[I], &V[0] + J)); + RelocSec->addChunk(make<BaserelChunk>(Page, &V[I], &V[0] + J)); } diff --git a/COFF/Writer.h b/COFF/Writer.h index 21be1be6e92a..d37276cb6d91 100644 --- a/COFF/Writer.h +++ b/COFF/Writer.h @@ -13,6 +13,7 @@ #include "Chunks.h" #include "llvm/ADT/StringRef.h" #include "llvm/Object/COFF.h" +#include <chrono> #include <cstdint> #include <vector> @@ -29,16 +30,14 @@ void writeResult(); // non-overlapping file offsets and RVAs. class OutputSection { public: - OutputSection(llvm::StringRef N) : Name(N), Header({}) {} - void setRVA(uint64_t); - void setFileOffset(uint64_t); + OutputSection(llvm::StringRef N, uint32_t Chars) : Name(N) { + Header.Characteristics = Chars; + } void addChunk(Chunk *C); - llvm::StringRef getName() { return Name; } + void merge(OutputSection *Other); ArrayRef<Chunk *> getChunks() { return Chunks; } void addPermissions(uint32_t C); void setPermissions(uint32_t C); - uint32_t getPermissions() { return Header.Characteristics & PermMask; } - uint32_t getCharacteristics() { return Header.Characteristics; } uint64_t getRVA() { return Header.VirtualAddress; } uint64_t getFileOff() { return Header.PointerToRawData; } void writeHeaderTo(uint8_t *Buf); @@ -60,9 +59,10 @@ public: // N.B. The section index is one based. uint32_t SectionIndex = 0; -private: llvm::StringRef Name; - llvm::object::coff_section Header; + llvm::object::coff_section Header = {}; + +private: uint32_t StringTableOff = 0; std::vector<Chunk *> Chunks; }; |